Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:vizhestkov:xs114b:deps
icu
icu-CVE-2016-6293-2017-7867-2017-7868-2017-1495...
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File icu-CVE-2016-6293-2017-7867-2017-7868-2017-14952-2017-15422-2017-17484.patch of Package icu
diff -Nura icu/source/common/cmemory.h icu_new/source/common/cmemory.h --- icu/source/common/cmemory.h 2013-10-05 04:49:16.000000000 +0800 +++ icu_new/source/common/cmemory.h 2018-05-04 18:08:23.333412933 +0800 @@ -59,6 +59,14 @@ #endif /* U_DEBUG */ +/** + * \def UPRV_LENGTHOF + * Convenience macro to determine the length of a fixed array at compile-time. + * @param array A fixed length array + * @return The length of the array, in elements + * @internal + */ +#define UPRV_LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) #define uprv_memset(buffer, mark, size) U_STANDARD_CPP_NAMESPACE memset(buffer, mark, size) #define uprv_memcmp(buffer1, buffer2, size) U_STANDARD_CPP_NAMESPACE memcmp(buffer1, buffer2,size) diff -Nura icu/source/common/ucnv_err.c icu_new/source/common/ucnv_err.c --- icu/source/common/ucnv_err.c 2013-10-05 04:49:16.000000000 +0800 +++ icu_new/source/common/ucnv_err.c 2018-05-04 18:08:23.333412933 +0800 @@ -50,6 +50,76 @@ #define UCNV_PRV_ESCAPE_CSS2 'S' #define UCNV_PRV_STOP_ON_ILLEGAL 'i' +/* + * IS_DEFAULT_IGNORABLE_CODE_POINT + * This is to check if a code point has the default ignorable unicode property. + * As such, this list needs to be updated if the ignorable code point list ever + * changes. + * To avoid dependency on other code, this list is hard coded here. + * When an ignorable code point is found and is unmappable, the default callbacks + * will ignore them. + * (c == 0x00AD) || \ (Latin-1 Punctuation and Symbols) + * (c == 0x034F) || \ (Combining Diacritical Marks Grapheme Joiner) + * (c == 0x061C) || \ (Arabic Format Character) + * (c == 0x115F) || \ (Hangul Jamo Old Initial Consonants) + * (c == 0x1160) || \ (Hangul Jamo Medial Vowels) + * (0x17B4 <= c && c <= 0x17B5) || \ (Khmer Inherent Vowels) + * (0x180B <= c && c <= 0x180E) || \ (Mongolian Format Controls) + * (0x200B <= c && c <= 0x200F) || \ (General Punctuation Format Characters) + * (0x202A <= c && c <= 0x202E) || \ (General Punctuation Format Characters) + * (c == 0x2060) || \ (General Punctuation Format Characters) + * (0x2066 <= c && c <= 0x2069) || \ (General Punctuation Format Characters) + * (0x2061 <= c && c <= 0x2064) || \ (General Punctuation Invisible Operators) + * (0x206A <= c && c <= 0x206F) || \ (General Punctuation Deprecated) + * (c == 0x3164) || \ (Hangul Compatibility Jamo) + * (0x0FE00 <= c && c <= 0x0FE0F) || \ (Variation Selectors) + * (c == 0x0FEFF) || \ (Arabic Presentation Forms B) + * (c == 0x0FFA0) || \ (Halfwidth and Fullwidth Forms) + * (0x01BCA0 <= c && c <= 0x01BCA3) || \ (Shorthand Format Controls) + * (0x01D173 <= c && c <= 0x01D17A) || \ (Musical Symbols) + * (c == 0x0E0001) || \ (Tag Identifiers) + * (0x0E0020 <= c && c <= 0x0E007F) || \ (Tag Components) + * (0x0E0100 <= c && c <= 0x0E01EF) || \ (Variation Selectors Supplement) + * (c == 0x2065) || \ (Unassigned) + * (0x0FFF0 <= c && c <= 0x0FFF8) || \ (Unassigned) + * (c == 0x0E0000) || \ (Unassigned) + * (0x0E0002 <= c && c <= 0x0E001F) || \ (Unassigned) + * (0x0E0080 <= c && c <= 0x0E00FF) || \ (Unassigned) + * (0x0E01F0 <= c && c <= 0x0E0FFF) \ (Unassigned) + */ + +#define IS_DEFAULT_IGNORABLE_CODE_POINT(c) (\ + (c == 0x00AD) || \ + (c == 0x034F) || \ + (c == 0x061C) || \ + (c == 0x115F) || \ + (c == 0x1160) || \ + (0x17B4 <= c && c <= 0x17B5) || \ + (0x180B <= c && c <= 0x180E) || \ + (0x200B <= c && c <= 0x200F) || \ + (0x202A <= c && c <= 0x202E) || \ + (c == 0x2060) || \ + (0x2066 <= c && c <= 0x2069) || \ + (0x2061 <= c && c <= 0x2064) || \ + (0x206A <= c && c <= 0x206F) || \ + (c == 0x3164) || \ + (0x0FE00 <= c && c <= 0x0FE0F) || \ + (c == 0x0FEFF) || \ + (c == 0x0FFA0) || \ + (0x01BCA0 <= c && c <= 0x01BCA3) || \ + (0x01D173 <= c && c <= 0x01D17A) || \ + (c == 0x0E0001) || \ + (0x0E0020 <= c && c <= 0x0E007F) || \ + (0x0E0100 <= c && c <= 0x0E01EF) || \ + (c == 0x2065) || \ + (0x0FFF0 <= c && c <= 0x0FFF8) || \ + (c == 0x0E0000) || \ + (0x0E0002 <= c && c <= 0x0E001F) || \ + (0x0E0080 <= c && c <= 0x0E00FF) || \ + (0x0E01F0 <= c && c <= 0x0E0FFF) \ + ) + + /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */ U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP ( @@ -61,6 +131,13 @@ UConverterCallbackReason reason, UErrorCode * err) { + if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint)) + { + /* + * Skip if the codepoint has unicode property of default ignorable. + */ + *err = U_ZERO_ERROR; + } /* the caller must have set the error code accordingly */ return; } @@ -92,7 +169,14 @@ { if (reason <= UCNV_IRREGULAR) { - if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) + if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint)) + { + /* + * Skip if the codepoint has unicode property of default ignorable. + */ + *err = U_ZERO_ERROR; + } + else if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) { *err = U_ZERO_ERROR; } @@ -113,7 +197,14 @@ { if (reason <= UCNV_IRREGULAR) { - if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) + if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint)) + { + /* + * Skip if the codepoint has unicode property of default ignorable. + */ + *err = U_ZERO_ERROR; + } + else if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) { *err = U_ZERO_ERROR; ucnv_cbFromUWriteSub(fromArgs, 0, err); @@ -155,6 +246,14 @@ { return; } + else if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint)) + { + /* + * Skip if the codepoint has unicode property of default ignorable. + */ + *err = U_ZERO_ERROR; + return; + } ucnv_setFromUCallBack (fromArgs->converter, (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE, diff -Nura icu/source/common/ucnv_u8.c icu_new/source/common/ucnv_u8.c --- icu/source/common/ucnv_u8.c 2013-10-05 04:49:18.000000000 +0800 +++ icu_new/source/common/ucnv_u8.c 2018-05-04 18:08:23.333412933 +0800 @@ -26,9 +26,11 @@ #include "unicode/utf.h" #include "unicode/utf8.h" #include "unicode/utf16.h" +#include "uassert.h" #include "ucnv_bld.h" #include "ucnv_cnv.h" #include "cmemory.h" +#include "ustr_imp.h" /* Prototypes --------------------------------------------------------------- */ @@ -748,7 +750,7 @@ utf8_offsets[7]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 }; /* "Convert" UTF-8 to UTF-8: Validate and copy. Modified from ucnv_DBCSFromUTF8(). */ -static void +static void U_CALLCONV ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs, UConverterToUnicodeArgs *pToUArgs, UErrorCode *pErrorCode) { @@ -793,39 +795,37 @@ *pErrorCode=U_USING_DEFAULT_WARNING; return; } else { - /* - * Use a single counter for source and target, counting the minimum of - * the source length and the target capacity. - * As a result, the source length is checked only once per multi-byte - * character instead of twice. - * - * Make sure that the last byte sequence is complete, or else - * stop just before it. - * (The longest legal byte sequence has 3 trail bytes.) - * Count oldToULength (number of source bytes from a previous buffer) - * into the source length but reduce the source index by toULimit - * while going back over trail bytes in order to not go back into - * the bytes that will be read for finishing a partial - * sequence from the previous buffer. - * Let the standard converter handle edge cases. - */ - int32_t i; - + // Use a single counter for source and target, counting the minimum of + // the source length and the target capacity. + // Let the standard converter handle edge cases. + const uint8_t *limit=sourceLimit; if(count>targetCapacity) { + limit-=(count-targetCapacity); count=targetCapacity; } - i=0; - while(i<3 && i<(count-toULimit)) { - b=source[count-oldToULength-i-1]; - if(U8_IS_TRAIL(b)) { - ++i; - } else { - if(i<U8_COUNT_TRAIL_BYTES(b)) { - /* stop converting before the lead byte if there are not enough trail bytes for it */ - count-=i+1; + // The conversion loop checks count>0 only once per 1/2/3-byte character. + // If the buffer ends with a truncated 2- or 3-byte sequence, + // then we reduce the count to stop before that, + // and collect the remaining bytes after the conversion loop. + { + // Do not go back into the bytes that will be read for finishing a partial + // sequence from the previous buffer. + int32_t length=count-toULimit; + if(length>0) { + uint8_t b1=*(limit-1); + if(U8_IS_SINGLE(b1)) { + // common ASCII character + } else if(U8_IS_TRAIL(b1) && length>=2) { + uint8_t b2=*(limit-2); + if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) { + // truncated 3-byte sequence + count-=2; + } + } else if(0xc2<=b1 && b1<0xf0) { + // truncated 2- or 3-byte sequence + --count; } - break; } } } @@ -940,7 +940,7 @@ } /* copy the legal byte sequence to the target */ - { + if(count>=toULength) { int8_t i; for(i=0; i<oldToULength; ++i) { @@ -951,9 +951,18 @@ *target++=*source++; } count-=toULength; + } else { + // A supplementary character that does not fit into the target. + // Let the standard converter handle this. + source-=(toULength-oldToULength); + pToUArgs->source=(char *)source; + pFromUArgs->target=(char *)target; + *pErrorCode=U_USING_DEFAULT_WARNING; + return; } } } + U_ASSERT(count>=0); if(U_SUCCESS(*pErrorCode) && source<sourceLimit) { if(target==(const uint8_t *)pFromUArgs->targetLimit) { diff -Nura icu/source/common/uloc.cpp icu_new/source/common/uloc.cpp --- icu/source/common/uloc.cpp 2013-10-05 04:49:26.000000000 +0800 +++ icu_new/source/common/uloc.cpp 2018-05-04 18:08:23.334412945 +0800 @@ -46,6 +46,8 @@ #include <stdio.h> /* for sprintf */ +using namespace icu; + /* ### Declarations **************************************************/ /* Locale stuff from locid.cpp */ @@ -2239,7 +2241,7 @@ typedef struct { float q; int32_t dummy; /* to avoid uninitialized memory copy from qsort */ - char *locale; + char locale[ULOC_FULLNAME_CAPACITY+1]; } _acceptLangItem; static int32_t U_CALLCONV @@ -2281,9 +2283,7 @@ UEnumeration* availableLocales, UErrorCode *status) { - _acceptLangItem *j; - _acceptLangItem smallBuffer[30]; - char **strs; + MaybeStackArray<_acceptLangItem, 4> items; // Struct for collecting items. char tmp[ULOC_FULLNAME_CAPACITY +1]; int32_t n = 0; const char *itemEnd; @@ -2293,11 +2293,7 @@ int32_t res; int32_t i; int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage); - int32_t jSize; - char *tempstr; /* Use for null pointer check */ - - j = smallBuffer; - jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]); + if(U_FAILURE(*status)) { return -1; } @@ -2325,27 +2321,29 @@ while(isspace(*t)) { t++; } - j[n].q = (float)_uloc_strtod(t,NULL); + items[n].q = (float)_uloc_strtod(t,NULL); } else { /* no semicolon - it's 1.0 */ - j[n].q = 1.0f; + items[n].q = 1.0f; paramEnd = itemEnd; } - j[n].dummy=0; + items[n].dummy=0; /* eat spaces prior to semi */ for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--) ; - /* Check for null pointer from uprv_strndup */ - tempstr = uprv_strndup(s,(int32_t)((t+1)-s)); - if (tempstr == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return -1; - } - j[n].locale = tempstr; - uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status); - if(strcmp(j[n].locale,tmp)) { - uprv_free(j[n].locale); - j[n].locale=uprv_strdup(tmp); + int32_t slen = ((t+1)-s); + if(slen > ULOC_FULLNAME_CAPACITY) { + *status = U_BUFFER_OVERFLOW_ERROR; + return -1; // too big + } + uprv_strncpy(items[n].locale, s, slen); + items[n].locale[slen]=0; // terminate + int32_t clen = uloc_canonicalize(items[n].locale, tmp, UPRV_LENGTHOF(tmp)-1, status); + if(U_FAILURE(*status)) return -1; + if((clen!=slen) || (uprv_strncmp(items[n].locale, tmp, slen))) { + // canonicalization had an effect- copy back + uprv_strncpy(items[n].locale, tmp, clen); + items[n].locale[clen] = 0; // terminate } #if defined(ULOC_DEBUG) /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/ @@ -2355,42 +2353,22 @@ while(*s==',') { /* eat duplicate commas */ s++; } - if(n>=jSize) { - if(j==smallBuffer) { /* overflowed the small buffer. */ - j = static_cast<_acceptLangItem *>(uprv_malloc(sizeof(j[0])*(jSize*2))); - if(j!=NULL) { - uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize); - } -#if defined(ULOC_DEBUG) - fprintf(stderr,"malloced at size %d\n", jSize); -#endif - } else { - j = static_cast<_acceptLangItem *>(uprv_realloc(j, sizeof(j[0])*jSize*2)); -#if defined(ULOC_DEBUG) - fprintf(stderr,"re-alloced at size %d\n", jSize); -#endif - } - jSize *= 2; - if(j==NULL) { + if(n>=items.getCapacity()) { // If we need more items + if(NULL == items.resize(items.getCapacity()*2, items.getCapacity())) { *status = U_MEMORY_ALLOCATION_ERROR; return -1; } - } - } - uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status); - if(U_FAILURE(*status)) { - if(j != smallBuffer) { #if defined(ULOC_DEBUG) - fprintf(stderr,"freeing j %p\n", j); + fprintf(stderr,"malloced at size %d\n", items.getCapacity()); #endif - uprv_free(j); } + } + uprv_sortArray(items.getAlias(), n, sizeof(items[0]), uloc_acceptLanguageCompare, NULL, TRUE, status); + if(U_FAILURE(*status)) { return -1; } - strs = static_cast<char **>(uprv_malloc((size_t)(sizeof(strs[0])*n))); - /* Check for null pointer */ - if (strs == NULL) { - uprv_free(j); /* Free to avoid memory leak */ + LocalMemory<const char*> strs(NULL); + if (strs.allocateInsteadAndReset(n) == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; return -1; } @@ -2398,20 +2376,10 @@ #if defined(ULOC_DEBUG) /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/ #endif - strs[i]=j[i].locale; + strs[i]=items[i].locale; } res = uloc_acceptLanguage(result, resultAvailable, outResult, - (const char**)strs, n, availableLocales, status); - for(i=0;i<n;i++) { - uprv_free(strs[i]); - } - uprv_free(strs); - if(j != smallBuffer) { -#if defined(ULOC_DEBUG) - fprintf(stderr,"freeing j %p\n", j); -#endif - uprv_free(j); - } + strs.getAlias(), n, availableLocales, status); return res; } diff -Nura icu/source/common/unicode/utf8.h icu_new/source/common/unicode/utf8.h --- icu/source/common/unicode/utf8.h 2013-10-05 04:49:08.000000000 +0800 +++ icu_new/source/common/unicode/utf8.h 2018-05-04 18:08:23.334412945 +0800 @@ -106,6 +106,40 @@ #define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1) /** + * Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1. + * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence. + * Lead byte E0..EF bits 3..0 are used as byte index, + * first trail byte bits 7..5 are used as bit index into that byte. + * @see U8_IS_VALID_LEAD3_AND_T1 + * @internal + */ +#define U8_LEAD3_T1_BITS "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30" + +/** + * Internal 3-byte UTF-8 validity check. + * Non-zero if lead byte E0..EF and first trail byte 00..FF start a valid sequence. + * @internal + */ +#define U8_IS_VALID_LEAD3_AND_T1(lead, t1) (U8_LEAD3_T1_BITS[(lead)&0xf]&(1<<((uint8_t)(t1)>>5))) + +/** + * Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1. + * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence. + * First trail byte bits 7..4 are used as byte index, + * lead byte F0..F4 bits 2..0 are used as bit index into that byte. + * @see U8_IS_VALID_LEAD4_AND_T1 + * @internal + */ +#define U8_LEAD4_T1_BITS "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00" + +/** + * Internal 4-byte UTF-8 validity check. + * Non-zero if lead byte F0..F4 and first trail byte 00..FF start a valid sequence. + * @internal + */ +#define U8_IS_VALID_LEAD4_AND_T1(lead, t1) (U8_LEAD4_T1_BITS[(uint8_t)(t1)>>4]&(1<<((lead)&7))) + +/** * Function for handling "next code point" with error-checking. * * This is internal since it is not meant to be called directly by external clients; diff -Nura icu/source/common/utext.cpp icu_new/source/common/utext.cpp --- icu/source/common/utext.cpp 2013-10-05 04:49:22.000000000 +0800 +++ icu_new/source/common/utext.cpp 2018-05-04 18:08:23.335412957 +0800 @@ -831,9 +831,15 @@ //------------------------------------------------------------------------------ // Chunk size. -// Must be less than 85, because of byte mapping from UChar indexes to native indexes. -// Worst case is three native bytes to one UChar. (Supplemenaries are 4 native bytes -// to two UChars.) +// Must be less than 42 (256/6), because of byte mapping from UChar indexes to native indexes. +// Worst case there are six UTF-8 bytes per UChar. +// obsolete 6 byte form fd + 5 trails maps to fffd +// obsolete 5 byte form fc + 4 trails maps to fffd +// non-shortest 4 byte forms maps to fffd +// normal supplementaries map to a pair of utf-16, two utf8 bytes per utf-16 unit +// mapToUChars array size must allow for the worst case, 6. +// This could be brought down to 4, by treating fd and fc as pure illegal, +// rather than obsolete lead bytes. But that is not compatible with the utf-8 access macros. // enum { UTF8_TEXT_CHUNK_SIZE=32 }; @@ -873,7 +879,7 @@ // Requires two extra slots, // one for a supplementary starting in the last normal position, // and one for an entry for the buffer limit position. - uint8_t mapToUChars[UTF8_TEXT_CHUNK_SIZE*3+6]; // Map native offset from bufNativeStart to + uint8_t mapToUChars[UTF8_TEXT_CHUNK_SIZE*6+6]; // Map native offset from bufNativeStart to // correspoding offset in filled part of buf. int32_t align; }; @@ -1016,6 +1022,7 @@ // Requested index is in this buffer. u8b = (UTF8Buf *)ut->p; // the current buffer mapIndex = ix - u8b->toUCharsMapStart; + U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars)); ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx; return TRUE; @@ -1282,6 +1289,10 @@ // Can only do this if the incoming index is somewhere in the interior of the string. // If index is at the end, there is no character there to look at. if (ix != ut->b) { + // Note: this function will only move the index back if it is on a trail byte + // and there is a preceding lead byte and the sequence from the lead + // through this trail could be part of a valid UTF-8 sequence + // Otherwise the index remains unchanged. U8_SET_CP_START(s8, 0, ix); } @@ -1295,7 +1306,10 @@ UChar *buf = u8b->buf; uint8_t *mapToNative = u8b->mapToNative; uint8_t *mapToUChars = u8b->mapToUChars; - int32_t toUCharsMapStart = ix - (UTF8_TEXT_CHUNK_SIZE*3 + 1); + int32_t toUCharsMapStart = ix - sizeof(UTF8Buf::mapToUChars) + 1; + // Note that toUCharsMapStart can be negative. Happens when the remaining + // text from current position to the beginning is less than the buffer size. + // + 1 because mapToUChars must have a slot at the end for the bufNativeLimit entry. int32_t destIx = UTF8_TEXT_CHUNK_SIZE+2; // Start in the overflow region // at end of buffer to leave room // for a surrogate pair at the @@ -1322,6 +1336,7 @@ if (c<0x80) { // Special case ASCII range for speed. buf[destIx] = (UChar)c; + U_ASSERT(toUCharsMapStart <= srcIx); mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx; mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart); } else { @@ -1351,6 +1366,7 @@ do { mapToUChars[sIx-- - toUCharsMapStart] = (uint8_t)destIx; } while (sIx >= srcIx); + U_ASSERT(toUCharsMapStart <= (srcIx+1)); // Set native indexing limit to be the current position. // We are processing a non-ascii, non-native-indexing char now; @@ -1525,6 +1541,7 @@ U_ASSERT(index>=ut->chunkNativeStart+ut->nativeIndexingLimit); U_ASSERT(index<=ut->chunkNativeLimit); int32_t mapIndex = index - u8b->toUCharsMapStart; + U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars)); int32_t offset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx; U_ASSERT(offset>=0 && offset<=ut->chunkLength); return offset; diff -Nura icu/source/i18n/gregoimp.cpp icu_new/source/i18n/gregoimp.cpp --- icu/source/i18n/gregoimp.cpp 2013-10-05 04:48:52.000000000 +0800 +++ icu_new/source/i18n/gregoimp.cpp 2018-05-04 18:08:23.335412957 +0800 @@ -29,6 +29,11 @@ numerator / denominator : ((numerator + 1) / denominator) - 1; } +int64_t ClockMath::floorDivide(int64_t numerator, int64_t denominator) { + return (numerator >= 0) ? + numerator / denominator : ((numerator + 1) / denominator) - 1; +} + int32_t ClockMath::floorDivide(double numerator, int32_t denominator, int32_t& remainder) { double quotient; diff -Nura icu/source/i18n/gregoimp.h icu_new/source/i18n/gregoimp.h --- icu/source/i18n/gregoimp.h 2013-10-05 04:48:56.000000000 +0800 +++ icu_new/source/i18n/gregoimp.h 2018-05-04 18:08:23.335412957 +0800 @@ -39,6 +39,17 @@ static int32_t floorDivide(int32_t numerator, int32_t denominator); /** + * Divide two integers, returning the floor of the quotient. + * Unlike the built-in division, this is mathematically + * well-behaved. E.g., <code>-1/4</code> => 0 but + * <code>floorDivide(-1,4)</code> => -1. + * @param numerator the numerator + * @param denominator a divisor which must be != 0 + * @return the floor of the quotient + */ + static int64_t floorDivide(int64_t numerator, int64_t denominator); + + /** * Divide two numbers, returning the floor of the quotient. * Unlike the built-in division, this is mathematically * well-behaved. E.g., <code>-1/4</code> => 0 but diff -Nura icu/source/i18n/persncal.cpp icu_new/source/i18n/persncal.cpp --- icu/source/i18n/persncal.cpp 2013-10-05 04:48:52.000000000 +0800 +++ icu_new/source/i18n/persncal.cpp 2018-05-04 18:08:23.335412957 +0800 @@ -211,7 +211,7 @@ int32_t year, month, dayOfMonth, dayOfYear; int32_t daysSinceEpoch = julianDay - PERSIAN_EPOCH; - year = 1 + ClockMath::floorDivide(33 * daysSinceEpoch + 3, 12053); + year = 1 + (int32_t)ClockMath::floorDivide(33 * (int64_t)daysSinceEpoch + 3, (int64_t)12053); int32_t farvardin1 = 365 * (year - 1) + ClockMath::floorDivide(8 * year + 21, 33); dayOfYear = (daysSinceEpoch - farvardin1); // 0-based diff -Nura icu/source/i18n/zonemeta.cpp icu_new/source/i18n/zonemeta.cpp --- icu/source/i18n/zonemeta.cpp 2013-10-05 04:48:44.000000000 +0800 +++ icu_new/source/i18n/zonemeta.cpp 2018-05-04 18:08:23.335412957 +0800 @@ -685,7 +685,6 @@ mzMappings = new UVector(deleteOlsonToMetaMappingEntry, NULL, status); if (U_FAILURE(status)) { delete mzMappings; - deleteOlsonToMetaMappingEntry(entry); uprv_free(entry); break; } diff -Nura icu/source/test/cintltst/cloctst.c icu_new/source/test/cintltst/cloctst.c --- icu/source/test/cintltst/cloctst.c 2013-10-05 04:47:36.000000000 +0800 +++ icu_new/source/test/cintltst/cloctst.c 2018-05-04 18:08:23.336412969 +0800 @@ -2687,16 +2687,20 @@ const char *icuSet; /**< ? */ const char *expect; /**< The expected locale result */ UAcceptResult res; /**< The expected error code */ + UErrorCode expectStatus; /**< expected status */ } tests[] = { - /*0*/{ 0, NULL, "mt_MT", ULOC_ACCEPT_VALID }, - /*1*/{ 1, NULL, "en", ULOC_ACCEPT_VALID }, - /*2*/{ 2, NULL, "en", ULOC_ACCEPT_FALLBACK }, - /*3*/{ 3, NULL, "", ULOC_ACCEPT_FAILED }, - /*4*/{ 4, NULL, "es", ULOC_ACCEPT_VALID }, - - /*5*/{ 5, NULL, "en", ULOC_ACCEPT_VALID }, /* XF */ - /*6*/{ 6, NULL, "ja", ULOC_ACCEPT_FALLBACK }, /* XF */ - /*7*/{ 7, NULL, "zh", ULOC_ACCEPT_FALLBACK }, /* XF */ + /*0*/{ 0, NULL, "mt_MT", ULOC_ACCEPT_VALID, U_ZERO_ERROR}, + /*1*/{ 1, NULL, "en", ULOC_ACCEPT_VALID, U_ZERO_ERROR}, + /*2*/{ 2, NULL, "en", ULOC_ACCEPT_FALLBACK, U_ZERO_ERROR}, + /*3*/{ 3, NULL, "", ULOC_ACCEPT_FAILED, U_ZERO_ERROR}, + /*4*/{ 4, NULL, "es", ULOC_ACCEPT_VALID, U_ZERO_ERROR}, + /*5*/{ 5, NULL, "en", ULOC_ACCEPT_VALID, U_ZERO_ERROR}, /* XF */ + /*6*/{ 6, NULL, "ja", ULOC_ACCEPT_FALLBACK, U_ZERO_ERROR}, /* XF */ + /*7*/{ 7, NULL, "zh", ULOC_ACCEPT_FALLBACK, U_ZERO_ERROR}, /* XF */ + /*8*/{ 8, NULL, "", ULOC_ACCEPT_FAILED, U_ZERO_ERROR }, /* */ + /*9*/{ 9, NULL, "", ULOC_ACCEPT_FAILED, U_ZERO_ERROR }, /* */ + /*10*/{10, NULL, "", ULOC_ACCEPT_FAILED, U_BUFFER_OVERFLOW_ERROR }, /* */ + /*11*/{11, NULL, "", ULOC_ACCEPT_FAILED, U_BUFFER_OVERFLOW_ERROR }, /* */ }; const int32_t numTests = sizeof(tests)/sizeof(tests[0]); static const char *http[] = { @@ -2711,11 +2715,26 @@ "xxx-yyy;q=.01, xxx-yyy;q=.01, xxx-yyy;q=.01, xxx-yyy;q=.01, xxx-yyy;q=.01, " "xxx-yyy;q=.01, xxx-yyy;q=.01, xxx-yyy;q=.01, xxx-yyy;q=.01, xxx-yyy;q=.01, " "xxx-yyy;q=.01, xxx-yyy;q=.01, xxx-yyy;q=.01, xx-yy;q=.1, " - "es", - + "es", /*5*/ "zh-xx;q=0.9, en;q=0.6", /*6*/ "ja-JA", /*7*/ "zh-xx;q=0.9", + /*08*/ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", // 156 + /*09*/ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB", // 157 (this hits U_STRING_NOT_TERMINATED_WARNING ) + /*10*/ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABC", // 158 + /*11*/ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", // 163 bytes }; for(i=0;i<numTests;i++) { @@ -2730,6 +2749,10 @@ (void)rc; /* Suppress set but not used warning. */ uenum_close(available); log_verbose(" got %s, %s [%s]\n", tmp[0]?tmp:"(EMPTY)", acceptResult(outResult), u_errorName(status)); + if(status != tests[i].expectStatus) { + log_err_status(status, "FAIL: expected status %s but got %s\n", u_errorName(tests[i].expectStatus), u_errorName(status)); + } else if(U_SUCCESS(tests[i].expectStatus)) { + /* don't check content if expected failure */ if(outResult != tests[i].res) { log_err_status(status, "FAIL: #%d: expected outResult of %s but got %s\n", i, acceptResult( tests[i].res), @@ -2741,6 +2764,7 @@ log_err_status(status, "FAIL: #%d: expected %s but got %s\n", i, tests[i].expect, tmp); log_info("test #%d: http[%s], ICU[%s], expect %s, %s\n", i, http[tests[i].httpSet], tests[i].icuSet, tests[i].expect, acceptResult(tests[i].res)); + } } } } diff -Nura icu/source/test/intltest/calregts.cpp icu_new/source/test/intltest/calregts.cpp --- icu/source/test/intltest/calregts.cpp 2013-10-05 04:47:58.000000000 +0800 +++ icu_new/source/test/intltest/calregts.cpp 2018-05-04 18:08:23.337412982 +0800 @@ -10,6 +10,7 @@ #include "calregts.h" +#include "unicode/calendar.h" #include "unicode/gregocal.h" #include "unicode/simpletz.h" #include "unicode/smpdtfmt.h" @@ -88,6 +89,7 @@ CASE(48,TestT8596); CASE(49,Test9019); CASE(50,TestT9452); + CASE(52,TestPersianCalOverflow); default: name = ""; break; } } @@ -2944,4 +2946,34 @@ } } +/** + * @bug ticket 13454 + */ +void CalendarRegressionTest::TestPersianCalOverflow(void) { + const char* localeID = "bs_Cyrl@calendar=persian"; + UErrorCode status = U_ZERO_ERROR; + Calendar* cal = Calendar::createInstance(Locale(localeID), status); + if(U_FAILURE(status)) { + dataerrln("FAIL: Calendar::createInstance for localeID %s: %s", localeID, u_errorName(status)); + } else { + int32_t maxMonth = cal->getMaximum(UCAL_MONTH); + int32_t maxDayOfMonth = cal->getMaximum(UCAL_DATE); + int32_t jd, month, dayOfMonth; + for (jd = 67023580; jd <= 67023584; jd++) { // year 178171, int32_t overflow if jd >= 67023582 + status = U_ZERO_ERROR; + cal->clear(); + cal->set(UCAL_JULIAN_DAY, jd); + month = cal->get(UCAL_MONTH, status); + dayOfMonth = cal->get(UCAL_DATE, status); + if ( U_FAILURE(status) ) { + errln("FAIL: Calendar->get MONTH/DATE for localeID %s, julianDay %d, status %s\n", localeID, jd, u_errorName(status)); + } else if (month > maxMonth || dayOfMonth > maxDayOfMonth) { + errln("FAIL: localeID %s, julianDay %d; maxMonth %d, got month %d; maxDayOfMonth %d, got dayOfMonth %d\n", + localeID, jd, maxMonth, month, maxDayOfMonth, dayOfMonth); + } + } + delete cal; + } +} + #endif /* #if !UCONFIG_NO_FORMATTING */ diff -Nura icu/source/test/intltest/calregts.h icu_new/source/test/intltest/calregts.h --- icu/source/test/intltest/calregts.h 2013-10-05 04:47:56.000000000 +0800 +++ icu_new/source/test/intltest/calregts.h 2018-05-04 18:08:23.337412982 +0800 @@ -75,6 +75,7 @@ void TestT8596(void); void Test9019(void); void TestT9452(void); + void TestPersianCalOverflow(void); void printdate(GregorianCalendar *cal, const char *string); void dowTest(UBool lenient) ; diff -Nura icu/source/test/intltest/convtest.cpp icu_new/source/test/intltest/convtest.cpp --- icu/source/test/intltest/convtest.cpp 2013-10-05 04:47:56.000000000 +0800 +++ icu_new/source/test/intltest/convtest.cpp 2018-05-07 15:54:38.740421990 +0800 @@ -6,7 +6,7 @@ * ******************************************************************************* * file name: convtest.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -35,13 +35,13 @@ #include "unicode/uniset.h" #include "unicode/ustring.h" #include "unicode/ures.h" +#include "unicode/utf16.h" #include "convtest.h" +#include "cmemory.h" #include "unicode/tstdtmod.h" #include <string.h> #include <stdlib.h> -#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) - enum { // characters used in test data for callbacks SUB_CB='?', @@ -66,19 +66,16 @@ void ConversionTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { if (exec) logln("TestSuite ConversionTest: "); - switch (index) { + TESTCASE_AUTO_BEGIN; #if !UCONFIG_NO_FILE_IO - case 0: name="TestToUnicode"; if (exec) TestToUnicode(); break; - case 1: name="TestFromUnicode"; if (exec) TestFromUnicode(); break; - case 2: name="TestGetUnicodeSet"; if (exec) TestGetUnicodeSet(); break; -#else - case 0: - case 1: - case 2: name="skip"; break; + TESTCASE_AUTO(TestToUnicode); + TESTCASE_AUTO(TestFromUnicode); + TESTCASE_AUTO(TestGetUnicodeSet); #endif - case 3: name="TestGetUnicodeSet2"; if (exec) TestGetUnicodeSet2(); break; - default: name=""; break; //needed to end loop - } + TESTCASE_AUTO(TestGetUnicodeSet2); + TESTCASE_AUTO(TestDefaultIgnorableCallback); + TESTCASE_AUTO(TestUTF8ToUTF8Overflow); + TESTCASE_AUTO_END; } // test data interface ----------------------------------------------------- *** @@ -289,7 +286,7 @@ // read a substitution string, separated by an equal sign p=s.getBuffer()+index+1; length=s.length()-(index+1); - if(length<0 || length>=LENGTHOF(cc.subString)) { + if(length<0 || length>=UPRV_LENGTHOF(cc.subString)) { errorCode=U_ILLEGAL_ARGUMENT_ERROR; } else { u_memcpy(cc.subString, p, length); @@ -443,7 +440,7 @@ if(!diffSet.isEmpty()) { diffSet.toPattern(s, TRUE); if(s.length()>100) { - s.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis)); + s.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis)); } errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - conversion/getUnicodeSet test case %d", charset, i); @@ -455,7 +452,7 @@ if(!diffSet.isEmpty()) { diffSet.toPattern(s, TRUE); if(s.length()>100) { - s.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis)); + s.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis)); } errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - conversion/getUnicodeSet test case %d", charset, i); @@ -553,7 +550,7 @@ LocalUConverterPointer cnv; char buffer[1024]; int32_t i; - for(i=0; i<LENGTHOF(cnvNames); ++i) { + for(i=0; i<UPRV_LENGTHOF(cnvNames); ++i) { UErrorCode errorCode=U_ZERO_ERROR; cnv.adoptInstead(cnv_open(cnvNames[i], errorCode)); if(U_FAILURE(errorCode)) { @@ -623,7 +620,7 @@ if(!diffSet.isEmpty()) { diffSet.toPattern(out, TRUE); if(out.length()>100) { - out.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis)); + out.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis)); } errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - which set: %d", cnvNames[i], which); @@ -635,7 +632,7 @@ if(!diffSet.isEmpty()) { diffSet.toPattern(out, TRUE); if(out.length()>100) { - out.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis)); + out.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis)); } errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - which set: %d", cnvNames[i], which); @@ -648,6 +645,151 @@ delete [] s0; } +// Test all codepoints which has the default ignorable Unicode property are ignored if they have no mapping +// If there are any failures, the hard coded list (IS_DEFAULT_IGNORABLE_CODE_POINT) in ucnv_err.c should be updated +void +ConversionTest::TestDefaultIgnorableCallback() { + UErrorCode status = U_ZERO_ERROR; + const char *cnv_name = "euc-jp-2007"; + const char *pattern_ignorable = "[:Default_Ignorable_Code_Point:]"; + const char *pattern_not_ignorable = "[:^Default_Ignorable_Code_Point:]"; + + UnicodeSet *set_ignorable = new UnicodeSet(pattern_ignorable, status); + if (U_FAILURE(status)) { + dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_ignorable, u_errorName(status)); + return; + } + + UnicodeSet *set_not_ignorable = new UnicodeSet(pattern_not_ignorable, status); + if (U_FAILURE(status)) { + dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_not_ignorable, u_errorName(status)); + return; + } + + UConverter *cnv = cnv_open(cnv_name, status); + if (U_FAILURE(status)) { + dataerrln("Unable to open converter: %s - %s\n", cnv_name, u_errorName(status)); + return; + } + + // set callback for the converter + ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL, &status); + + UChar32 input[1]; + char output[10]; + int32_t outputLength; + + // test default ignorables are ignored + int size = set_ignorable->size(); + for (int i = 0; i < size; i++) { + status = U_ZERO_ERROR; + outputLength= 0; + + input[0] = set_ignorable->charAt(i); + + outputLength = ucnv_fromUChars(cnv, output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status); + if (U_FAILURE(status) || outputLength != 0) { + errln("Ignorable code point: U+%04X not skipped as expected - %s", input[0], u_errorName(status)); + } + } + + // test non-ignorables are not ignored + size = set_not_ignorable->size(); + for (int i = 0; i < size; i++) { + status = U_ZERO_ERROR; + outputLength= 0; + + input[0] = set_not_ignorable->charAt(i); + + if (input[0] == 0) { + continue; + } + + ucnv_fromUChars(cnv, output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status); + if (U_FAILURE(status)) { + errln("Non-ignorable code point: U+%04X skipped unexpectedly - %s", input[0], u_errorName(status)); + } + } + + ucnv_close(cnv); + delete set_not_ignorable; + delete set_ignorable; +} + +void +ConversionTest::TestUTF8ToUTF8Overflow() { + IcuTestErrorCode errorCode(*this, "TestUTF8ToUTF8Overflow"); + LocalUConverterPointer cnv1(ucnv_open("UTF-8", errorCode)); + LocalUConverterPointer cnv2(ucnv_open("UTF-8", errorCode)); + static const char *text = "aä"; // ä: 2 bytes + const char *source = text; + const char *sourceLimit = text + strlen(text); + char result[20]; + char *target = result; + const char *targetLimit = result + sizeof(result); + UChar buffer16[20]; + UChar *pivotSource = buffer16; + UChar *pivotTarget = buffer16; + const UChar *pivotLimit = buffer16 + UPRV_LENGTHOF(buffer16); + + // Convert with insufficient target capacity. + result[2] = 5; + ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(), + &target, result + 2, &source, sourceLimit, + buffer16, &pivotSource, &pivotTarget, pivotLimit, + FALSE, FALSE, errorCode); + assertEquals("overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset()); + int32_t length = (int32_t)(target - result); + assertEquals("number of bytes written", 2, length); + assertEquals("next byte not clobbered", 5, result[2]); + + // Convert the rest and flush. + ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(), + &target, targetLimit, &source, sourceLimit, + buffer16, &pivotSource, &pivotTarget, pivotLimit, + FALSE, TRUE, errorCode); + + assertSuccess("UTF-8->UTF-8", errorCode); + length = (int32_t)(target - result); + assertEquals("3 bytes", 3, length); + if (length == 3) { + assertTrue("result same as input", memcmp(text, result, length) == 0); + } + + ucnv_reset(cnv1.getAlias()); + ucnv_reset(cnv2.getAlias()); + memset(result, 0, sizeof(result)); + static const char *text2 = "a🚲"; // U+1F6B2 bicycle: 4 bytes + source = text2; + sourceLimit = text2 + strlen(text2); + target = result; + pivotSource = pivotTarget = buffer16; + + // Convert with insufficient target capacity. + result[3] = 5; + ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(), + &target, result + 3, &source, sourceLimit, + buffer16, &pivotSource, &pivotTarget, pivotLimit, + FALSE, FALSE, errorCode); + assertEquals("text2 overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset()); + length = (int32_t)(target - result); + assertEquals("text2 number of bytes written", 3, length); + assertEquals("text2 next byte not clobbered", 5, result[3]); + + // Convert the rest and flush. + ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(), + &target, targetLimit, &source, sourceLimit, + buffer16, &pivotSource, &pivotTarget, pivotLimit, + FALSE, TRUE, errorCode); + + assertSuccess("text2 UTF-8->UTF-8", errorCode); + length = (int32_t)(target - result); + assertEquals("text2 5 bytes", 5, length); + if (length == 5) { + assertTrue("text2 result same as input", memcmp(text2, result, length) == 0); + } +} + // open testdata or ICU data converter ------------------------------------- *** UConverter * @@ -949,6 +1091,7 @@ // open the converter IcuTestErrorCode errorCode(*this, "ToUnicodeCase"); LocalUConverterPointer cnv(cnv_open(cc.charset, errorCode)); + // with no data, the above crashes with "pointer being freed was not allocated" for charset "x11-compound-text", see #13078 if(errorCode.isFailure()) { errcheckln(errorCode, "toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s", cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, errorCode.errorName()); @@ -990,7 +1133,7 @@ int32_t i, step; ok=TRUE; - for(i=0; i<LENGTHOF(steps) && ok; ++i) { + for(i=0; i<UPRV_LENGTHOF(steps) && ok; ++i) { step=steps[i].step; if(step<0 && !cc.finalFlush) { // skip ucnv_getNextUChar() if !finalFlush because @@ -1002,12 +1145,12 @@ cc.offsets=NULL; } else { - memset(resultOffsets, -1, LENGTHOF(resultOffsets)); + memset(resultOffsets, -1, UPRV_LENGTHOF(resultOffsets)); } - memset(result, -1, LENGTHOF(result)); + memset(result, -1, UPRV_LENGTHOF(result)); errorCode.reset(); resultLength=stepToUnicode(cc, cnv.getAlias(), - result, LENGTHOF(result), + result, UPRV_LENGTHOF(result), step==0 ? resultOffsets : NULL, step, errorCode); ok=checkToUnicode( @@ -1037,7 +1180,7 @@ errorCode.reset(); resultLength=ucnv_toUChars(cnv.getAlias(), - result, LENGTHOF(result), + result, UPRV_LENGTHOF(result), (const char *)cc.bytes, cc.bytesLength, errorCode); ok=checkToUnicode( @@ -1184,7 +1327,7 @@ targetLimit=resultLimit; flush=cc.finalFlush; - pivotLimit=pivotBuffer+LENGTHOF(pivotBuffer); + pivotLimit=pivotBuffer+UPRV_LENGTHOF(pivotBuffer); } else { // start with empty partial buffers sourceLimit=source; @@ -1403,7 +1546,7 @@ // convert unicode to utf8 char utf8[256]; cc.utf8=utf8; - u_strToUTF8(utf8, LENGTHOF(utf8), &cc.utf8Length, + u_strToUTF8(utf8, UPRV_LENGTHOF(utf8), &cc.utf8Length, cc.unicode, cc.unicodeLength, &errorCode); if(U_FAILURE(errorCode)) { @@ -1430,13 +1573,13 @@ int32_t i, step; ok=TRUE; - for(i=0; i<LENGTHOF(steps) && ok; ++i) { + for(i=0; i<UPRV_LENGTHOF(steps) && ok; ++i) { step=steps[i].step; - memset(resultOffsets, -1, LENGTHOF(resultOffsets)); - memset(result, -1, LENGTHOF(result)); + memset(resultOffsets, -1, UPRV_LENGTHOF(resultOffsets)); + memset(result, -1, UPRV_LENGTHOF(result)); errorCode=U_ZERO_ERROR; resultLength=stepFromUnicode(cc, cnv, - result, LENGTHOF(result), + result, UPRV_LENGTHOF(result), step==0 ? resultOffsets : NULL, step, &errorCode); ok=checkFromUnicode( @@ -1465,7 +1608,7 @@ if(cc.utf8Length>=0) { errorCode=U_ZERO_ERROR; resultLength=stepFromUTF8(cc, utf8Cnv, cnv, - result, LENGTHOF(result), + result, UPRV_LENGTHOF(result), step, &errorCode); ok=checkFromUnicode( cc, cnv, steps[i].utf8Name, @@ -1488,7 +1631,7 @@ errorCode=U_ZERO_ERROR; resultLength=ucnv_fromUChars(cnv, - result, LENGTHOF(result), + result, UPRV_LENGTHOF(result), cc.unicode, cc.unicodeLength, &errorCode); ok=checkFromUnicode( @@ -1537,7 +1680,7 @@ msg=NULL; errorCode=U_ZERO_ERROR; - resultInvalidLength=LENGTHOF(resultInvalidUChars); + resultInvalidLength=UPRV_LENGTHOF(resultInvalidUChars); ucnv_getInvalidUChars(cnv, resultInvalidUChars, &resultInvalidLength, &errorCode); if(U_FAILURE(errorCode)) { errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidUChars() failed - %s", diff -Nura icu/source/test/intltest/convtest.h icu_new/source/test/intltest/convtest.h --- icu/source/test/intltest/convtest.h 2013-10-05 04:47:50.000000000 +0800 +++ icu_new/source/test/intltest/convtest.h 2018-05-04 18:08:23.337412982 +0800 @@ -6,7 +6,7 @@ * ******************************************************************************* * file name: convtest.h - * encoding: US-ASCII + * encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -73,6 +73,8 @@ void TestFromUnicode(); void TestGetUnicodeSet(); void TestGetUnicodeSet2(); + void TestDefaultIgnorableCallback(); + void TestUTF8ToUTF8Overflow(); private: UBool diff -Nura icu/source/test/intltest/utxttest.cpp icu_new/source/test/intltest/utxttest.cpp --- icu/source/test/intltest/utxttest.cpp 2013-10-05 04:47:58.000000000 +0800 +++ icu_new/source/test/intltest/utxttest.cpp 2018-05-04 18:08:23.338412994 +0800 @@ -57,6 +57,8 @@ if (exec) Ticket5560(); break; case 4: name = "Ticket6847"; if (exec) Ticket6847(); break; + case 8: name = "Ticket12888"; + if (exec) Ticket12888(); break; default: name = ""; break; } } @@ -1452,3 +1454,62 @@ utext_close(ut); } +// Ticket 12888: bad handling of illegal utf-8 containing many instances of the archaic, now illegal, +// six byte utf-8 forms. Original implementation had an assumption that +// there would be at most three utf-8 bytes per UTF-16 code unit. +// The five and six byte sequences map to a single replacement character. + +void UTextTest::Ticket12888() { + const char *badString = + "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" + "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" + "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" + "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" + "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" + "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" + "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" + "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" + "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" + "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" + "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" + "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" + "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" + "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" + "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" + "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" + "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" + "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" + "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" + "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"; + + UErrorCode status = U_ZERO_ERROR; + LocalUTextPointer ut(utext_openUTF8(NULL, badString, -1, &status)); + TEST_SUCCESS(status); + for (;;) { + UChar32 c = utext_next32(ut.getAlias()); + if (c == U_SENTINEL) { + break; + } + } + int32_t endIdx = utext_getNativeIndex(ut.getAlias()); + if (endIdx != (int32_t)strlen(badString)) { + errln("%s:%d expected=%d, actual=%d", __FILE__, __LINE__, strlen(badString), endIdx); + return; + } + + for (int32_t prevIndex = endIdx; prevIndex>0;) { + UChar32 c = utext_previous32(ut.getAlias()); + int32_t currentIndex = utext_getNativeIndex(ut.getAlias()); + if (c != 0xfffd) { + errln("%s:%d (expected, actual, index) = (%d, %d, %d)\n", + __FILE__, __LINE__, 0xfffd, c, currentIndex); + break; + } + if (currentIndex != prevIndex - 6) { + errln("%s:%d: wrong index. Expected, actual = %d, %d", + __FILE__, __LINE__, prevIndex - 6, currentIndex); + break; + } + prevIndex = currentIndex; + } +} diff -Nura icu/source/test/intltest/utxttest.h icu_new/source/test/intltest/utxttest.h --- icu/source/test/intltest/utxttest.h 2013-10-05 04:47:56.000000000 +0800 +++ icu_new/source/test/intltest/utxttest.h 2018-05-04 18:08:23.338412994 +0800 @@ -33,6 +33,7 @@ void FreezeTest(); void Ticket5560(); void Ticket6847(); + void Ticket12888(); private: struct m { // Map between native indices & code points.
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor