diff --git a/intl/icu/source/common/utext.cpp b/intl/icu/source/common/utext.cpp
index a89e7a62b..994b5dac5 100644
--- a/intl/icu/source/common/utext.cpp
+++ b/intl/icu/source/common/utext.cpp
@@ -847,11 +847,18 @@ U_CDECL_END
//------------------------------------------------------------------------------
// Chunk size.
-// Must be less than 85, because of byte mapping from UChar indexes to native indexes.
-// Worst case is three native bytes to one UChar. (Supplemenaries are 4 native bytes
-// to two UChars.)
+// Must be less than 42 (256/6), because of byte mapping from UChar indexes to native indexes.
+// Worst case there are six UTF-8 bytes per UChar.
+// obsolete 6 byte form fd + 5 trails maps to fffd
+// obsolete 5 byte form fc + 4 trails maps to fffd
+// non-shortest 4 byte forms maps to fffd
+// normal supplementaries map to a pair of utf-16, two utf8 bytes per utf-16 unit
+// mapToUChars array size must allow for the worst case, 6.
+// This could be brought down to 4, by treating fd and fc as pure illegal,
+// rather than obsolete lead bytes. But that is not compatible with the utf-8 access macros.
//
enum { UTF8_TEXT_CHUNK_SIZE=32 };
+enum { SIZEOF_MAPTOUCHARS=UTF8_TEXT_CHUNK_SIZE*6+6 };
//
// UTF8Buf Two of these structs will be set up in the UText's extra allocated space.
@@ -889,7 +896,7 @@ struct UTF8Buf {
// Requires two extra slots,
// one for a supplementary starting in the last normal position,
// and one for an entry for the buffer limit position.
- uint8_t mapToUChars[UTF8_TEXT_CHUNK_SIZE*3+6]; // Map native offset from bufNativeStart to
+ uint8_t mapToUChars[SIZEOF_MAPTOUCHARS]; // Map native offset from bufNativeStart to
// correspoding offset in filled part of buf.
int32_t align;
};
@@ -1032,6 +1039,7 @@ utf8TextAccess(UText *ut, int64_t index, UBool forward) {
// Requested index is in this buffer.
u8b = (UTF8Buf *)ut->p; // the current buffer
mapIndex = ix - u8b->toUCharsMapStart;
+ U_ASSERT(mapIndex < (int32_t)SIZEOF_MAPTOUCHARS);
ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
return TRUE;
@@ -1298,6 +1306,10 @@ fillReverse:
// Can only do this if the incoming index is somewhere in the interior of the string.
// If index is at the end, there is no character there to look at.
if (ix != ut->b) {
+ // Note: this function will only move the index back if it is on a trail byte
+ // and there is a preceding lead byte and the sequence from the lead
+ // through this trail could be part of a valid UTF-8 sequence
+ // Otherwise the index remains unchanged.
U8_SET_CP_START(s8, 0, ix);
}
@@ -1311,7 +1323,10 @@ fillReverse:
UChar *buf = u8b->buf;
uint8_t *mapToNative = u8b->mapToNative;
uint8_t *mapToUChars = u8b->mapToUChars;
- int32_t toUCharsMapStart = ix - (UTF8_TEXT_CHUNK_SIZE*3 + 1);
+ int32_t toUCharsMapStart = ix - SIZEOF_MAPTOUCHARS + 1;
+ // Note that toUCharsMapStart can be negative. Happens when the remaining
+ // text from current position to the beginning is less than the buffer size.
+ // + 1 because mapToUChars must have a slot at the end for the bufNativeLimit entry.
int32_t destIx = UTF8_TEXT_CHUNK_SIZE+2; // Start in the overflow region
// at end of buffer to leave room
// for a surrogate pair at the
@@ -1338,6 +1353,7 @@ fillReverse:
if (c<0x80) {
// Special case ASCII range for speed.
buf[destIx] = (UChar)c;
+ U_ASSERT(toUCharsMapStart <= srcIx);
mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx;
mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
} else {
@@ -1367,6 +1383,7 @@ fillReverse:
do {
mapToUChars[sIx-- - toUCharsMapStart] = (uint8_t)destIx;
} while (sIx >= srcIx);
+ U_ASSERT(toUCharsMapStart <= (srcIx+1));
// Set native indexing limit to be the current position.
// We are processing a non-ascii, non-native-indexing char now;
@@ -1541,6 +1558,7 @@ utf8TextMapIndexToUTF16(const UText *ut, int64_t index64) {
U_ASSERT(index>=ut->chunkNativeStart+ut->nativeIndexingLimit);
U_ASSERT(index<=ut->chunkNativeLimit);
int32_t mapIndex = index - u8b->toUCharsMapStart;
+ U_ASSERT(mapIndex < (int32_t)SIZEOF_MAPTOUCHARS);
int32_t offset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
U_ASSERT(offset>=0 && offset<=ut->chunkLength);
return offset;
diff --git a/intl/icu/source/i18n/gregoimp.cpp b/intl/icu/source/i18n/gregoimp.cpp
index 1b313602d..204c3ff81 100644
--- a/intl/icu/source/i18n/gregoimp.cpp
+++ b/intl/icu/source/i18n/gregoimp.cpp
@@ -27,6 +27,11 @@ int32_t ClockMath::floorDivide(int32_t numerator, int32_t denominator) {
numerator / denominator : ((numerator + 1) / denominator) - 1;
}
+int64_t ClockMath::floorDivide(int64_t numerator, int64_t denominator) {
+ return (numerator >= 0) ?
+ numerator / denominator : ((numerator + 1) / denominator) - 1;
+}
+
int32_t ClockMath::floorDivide(double numerator, int32_t denominator,
int32_t& remainder) {
double quotient;
diff --git a/intl/icu/source/i18n/gregoimp.h b/intl/icu/source/i18n/gregoimp.h
index 27303a6ac..f6c730aef 100644
--- a/intl/icu/source/i18n/gregoimp.h
+++ b/intl/icu/source/i18n/gregoimp.h
@@ -40,6 +40,17 @@ class ClockMath {
*/
static int32_t floorDivide(int32_t numerator, int32_t denominator);
+ /**
+ * Divide two integers, returning the floor of the quotient.
+ * Unlike the built-in division, this is mathematically
+ * well-behaved. E.g., -1/4 => 0 but
+ * floorDivide(-1,4) => -1.
+ * @param numerator the numerator
+ * @param denominator a divisor which must be != 0
+ * @return the floor of the quotient
+ */
+ static int64_t floorDivide(int64_t numerator, int64_t denominator);
+
/**
* Divide two numbers, returning the floor of the quotient.
* Unlike the built-in division, this is mathematically
diff --git a/intl/icu/source/i18n/persncal.cpp b/intl/icu/source/i18n/persncal.cpp
index 278273fb9..6f46dd9c7 100644
--- a/intl/icu/source/i18n/persncal.cpp
+++ b/intl/icu/source/i18n/persncal.cpp
@@ -213,7 +213,7 @@ void PersianCalendar::handleComputeFields(int32_t julianDay, UErrorCode &/*statu
int32_t year, month, dayOfMonth, dayOfYear;
int32_t daysSinceEpoch = julianDay - PERSIAN_EPOCH;
- year = 1 + ClockMath::floorDivide(33 * daysSinceEpoch + 3, 12053);
+ year = 1 + (int32_t)ClockMath::floorDivide(33 * (int64_t)daysSinceEpoch + 3, (int64_t)12053);
int32_t farvardin1 = 365 * (year - 1) + ClockMath::floorDivide(8 * year + 21, 33);
dayOfYear = (daysSinceEpoch - farvardin1); // 0-based
diff --git a/intl/icu/source/i18n/zonemeta.cpp b/intl/icu/source/i18n/zonemeta.cpp
index fdf333c37..dd3bf202f 100644
--- a/intl/icu/source/i18n/zonemeta.cpp
+++ b/intl/icu/source/i18n/zonemeta.cpp
@@ -683,7 +683,6 @@ ZoneMeta::createMetazoneMappings(const UnicodeString &tzid) {
mzMappings = new UVector(deleteOlsonToMetaMappingEntry, NULL, status);
if (U_FAILURE(status)) {
delete mzMappings;
- deleteOlsonToMetaMappingEntry(entry);
uprv_free(entry);
break;
}