diff --git a/intl/icu/source/common/utext.cpp b/intl/icu/source/common/utext.cpp index a89e7a62b..994b5dac5 100644 --- a/intl/icu/source/common/utext.cpp +++ b/intl/icu/source/common/utext.cpp @@ -847,11 +847,18 @@ U_CDECL_END //------------------------------------------------------------------------------ // Chunk size. -// Must be less than 85, because of byte mapping from UChar indexes to native indexes. -// Worst case is three native bytes to one UChar. (Supplemenaries are 4 native bytes -// to two UChars.) +// Must be less than 42 (256/6), because of byte mapping from UChar indexes to native indexes. +// Worst case there are six UTF-8 bytes per UChar. +// obsolete 6 byte form fd + 5 trails maps to fffd +// obsolete 5 byte form fc + 4 trails maps to fffd +// non-shortest 4 byte forms maps to fffd +// normal supplementaries map to a pair of utf-16, two utf8 bytes per utf-16 unit +// mapToUChars array size must allow for the worst case, 6. +// This could be brought down to 4, by treating fd and fc as pure illegal, +// rather than obsolete lead bytes. But that is not compatible with the utf-8 access macros. // enum { UTF8_TEXT_CHUNK_SIZE=32 }; +enum { SIZEOF_MAPTOUCHARS=UTF8_TEXT_CHUNK_SIZE*6+6 }; // // UTF8Buf Two of these structs will be set up in the UText's extra allocated space. @@ -889,7 +896,7 @@ struct UTF8Buf { // Requires two extra slots, // one for a supplementary starting in the last normal position, // and one for an entry for the buffer limit position. - uint8_t mapToUChars[UTF8_TEXT_CHUNK_SIZE*3+6]; // Map native offset from bufNativeStart to + uint8_t mapToUChars[SIZEOF_MAPTOUCHARS]; // Map native offset from bufNativeStart to // correspoding offset in filled part of buf. int32_t align; }; @@ -1032,6 +1039,7 @@ utf8TextAccess(UText *ut, int64_t index, UBool forward) { // Requested index is in this buffer. u8b = (UTF8Buf *)ut->p; // the current buffer mapIndex = ix - u8b->toUCharsMapStart; + U_ASSERT(mapIndex < (int32_t)SIZEOF_MAPTOUCHARS); ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx; return TRUE; @@ -1298,6 +1306,10 @@ fillReverse: // Can only do this if the incoming index is somewhere in the interior of the string. // If index is at the end, there is no character there to look at. if (ix != ut->b) { + // Note: this function will only move the index back if it is on a trail byte + // and there is a preceding lead byte and the sequence from the lead + // through this trail could be part of a valid UTF-8 sequence + // Otherwise the index remains unchanged. U8_SET_CP_START(s8, 0, ix); } @@ -1311,7 +1323,10 @@ fillReverse: UChar *buf = u8b->buf; uint8_t *mapToNative = u8b->mapToNative; uint8_t *mapToUChars = u8b->mapToUChars; - int32_t toUCharsMapStart = ix - (UTF8_TEXT_CHUNK_SIZE*3 + 1); + int32_t toUCharsMapStart = ix - SIZEOF_MAPTOUCHARS + 1; + // Note that toUCharsMapStart can be negative. Happens when the remaining + // text from current position to the beginning is less than the buffer size. + // + 1 because mapToUChars must have a slot at the end for the bufNativeLimit entry. int32_t destIx = UTF8_TEXT_CHUNK_SIZE+2; // Start in the overflow region // at end of buffer to leave room // for a surrogate pair at the @@ -1338,6 +1353,7 @@ fillReverse: if (c<0x80) { // Special case ASCII range for speed. buf[destIx] = (UChar)c; + U_ASSERT(toUCharsMapStart <= srcIx); mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx; mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart); } else { @@ -1367,6 +1383,7 @@ fillReverse: do { mapToUChars[sIx-- - toUCharsMapStart] = (uint8_t)destIx; } while (sIx >= srcIx); + U_ASSERT(toUCharsMapStart <= (srcIx+1)); // Set native indexing limit to be the current position. // We are processing a non-ascii, non-native-indexing char now; @@ -1541,6 +1558,7 @@ utf8TextMapIndexToUTF16(const UText *ut, int64_t index64) { U_ASSERT(index>=ut->chunkNativeStart+ut->nativeIndexingLimit); U_ASSERT(index<=ut->chunkNativeLimit); int32_t mapIndex = index - u8b->toUCharsMapStart; + U_ASSERT(mapIndex < (int32_t)SIZEOF_MAPTOUCHARS); int32_t offset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx; U_ASSERT(offset>=0 && offset<=ut->chunkLength); return offset; diff --git a/intl/icu/source/i18n/gregoimp.cpp b/intl/icu/source/i18n/gregoimp.cpp index 1b313602d..204c3ff81 100644 --- a/intl/icu/source/i18n/gregoimp.cpp +++ b/intl/icu/source/i18n/gregoimp.cpp @@ -27,6 +27,11 @@ int32_t ClockMath::floorDivide(int32_t numerator, int32_t denominator) { numerator / denominator : ((numerator + 1) / denominator) - 1; } +int64_t ClockMath::floorDivide(int64_t numerator, int64_t denominator) { + return (numerator >= 0) ? + numerator / denominator : ((numerator + 1) / denominator) - 1; +} + int32_t ClockMath::floorDivide(double numerator, int32_t denominator, int32_t& remainder) { double quotient; diff --git a/intl/icu/source/i18n/gregoimp.h b/intl/icu/source/i18n/gregoimp.h index 27303a6ac..f6c730aef 100644 --- a/intl/icu/source/i18n/gregoimp.h +++ b/intl/icu/source/i18n/gregoimp.h @@ -40,6 +40,17 @@ class ClockMath { */ static int32_t floorDivide(int32_t numerator, int32_t denominator); + /** + * Divide two integers, returning the floor of the quotient. + * Unlike the built-in division, this is mathematically + * well-behaved. E.g., -1/4 => 0 but + * floorDivide(-1,4) => -1. + * @param numerator the numerator + * @param denominator a divisor which must be != 0 + * @return the floor of the quotient + */ + static int64_t floorDivide(int64_t numerator, int64_t denominator); + /** * Divide two numbers, returning the floor of the quotient. * Unlike the built-in division, this is mathematically diff --git a/intl/icu/source/i18n/persncal.cpp b/intl/icu/source/i18n/persncal.cpp index 278273fb9..6f46dd9c7 100644 --- a/intl/icu/source/i18n/persncal.cpp +++ b/intl/icu/source/i18n/persncal.cpp @@ -213,7 +213,7 @@ void PersianCalendar::handleComputeFields(int32_t julianDay, UErrorCode &/*statu int32_t year, month, dayOfMonth, dayOfYear; int32_t daysSinceEpoch = julianDay - PERSIAN_EPOCH; - year = 1 + ClockMath::floorDivide(33 * daysSinceEpoch + 3, 12053); + year = 1 + (int32_t)ClockMath::floorDivide(33 * (int64_t)daysSinceEpoch + 3, (int64_t)12053); int32_t farvardin1 = 365 * (year - 1) + ClockMath::floorDivide(8 * year + 21, 33); dayOfYear = (daysSinceEpoch - farvardin1); // 0-based diff --git a/intl/icu/source/i18n/zonemeta.cpp b/intl/icu/source/i18n/zonemeta.cpp index fdf333c37..dd3bf202f 100644 --- a/intl/icu/source/i18n/zonemeta.cpp +++ b/intl/icu/source/i18n/zonemeta.cpp @@ -683,7 +683,6 @@ ZoneMeta::createMetazoneMappings(const UnicodeString &tzid) { mzMappings = new UVector(deleteOlsonToMetaMappingEntry, NULL, status); if (U_FAILURE(status)) { delete mzMappings; - deleteOlsonToMetaMappingEntry(entry); uprv_free(entry); break; }