File icu-CVE-2016-6293-2017-7867-2017-7868-2017-1495... of Package icu

Overview Repositories Revisions Requests Users Attributes Meta

File icu-CVE-2016-6293-2017-7867-2017-7868-2017-14952-2017-15422-2017-17484.patch of Package icu

diff -Nura icu/source/common/cmemory.h icu_new/source/common/cmemory.h
--- icu/source/common/cmemory.h	2013-10-05 04:49:16.000000000 +0800
+++ icu_new/source/common/cmemory.h	2018-05-04 18:08:23.333412933 +0800
@@ -59,6 +59,14 @@
 
 #endif  /* U_DEBUG */
 
+/**
+ * \def UPRV_LENGTHOF
+ * Convenience macro to determine the length of a fixed array at compile-time.
+ * @param array A fixed length array
+ * @return The length of the array, in elements
+ * @internal
+ */
+#define UPRV_LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
 #define uprv_memset(buffer, mark, size) U_STANDARD_CPP_NAMESPACE memset(buffer, mark, size)
 #define uprv_memcmp(buffer1, buffer2, size) U_STANDARD_CPP_NAMESPACE memcmp(buffer1, buffer2,size)
 
diff -Nura icu/source/common/ucnv_err.c icu_new/source/common/ucnv_err.c
--- icu/source/common/ucnv_err.c	2013-10-05 04:49:16.000000000 +0800
+++ icu_new/source/common/ucnv_err.c	2018-05-04 18:08:23.333412933 +0800
@@ -50,6 +50,76 @@
 #define UCNV_PRV_ESCAPE_CSS2        'S'
 #define UCNV_PRV_STOP_ON_ILLEGAL    'i'
 
+/*
+ * IS_DEFAULT_IGNORABLE_CODE_POINT
+ * This is to check if a code point has the default ignorable unicode property.
+ * As such, this list needs to be updated if the ignorable code point list ever
+ * changes.
+ * To avoid dependency on other code, this list is hard coded here.
+ * When an ignorable code point is found and is unmappable, the default callbacks
+ * will ignore them.
+ *    (c == 0x00AD) || \                     (Latin-1 Punctuation and Symbols)
+ *    (c == 0x034F) || \                     (Combining Diacritical Marks Grapheme Joiner)
+ *    (c == 0x061C) || \                     (Arabic Format Character)
+ *    (c == 0x115F) || \                     (Hangul Jamo Old Initial Consonants)
+ *    (c == 0x1160) || \                     (Hangul Jamo Medial Vowels)
+ *    (0x17B4 <= c && c <= 0x17B5) || \      (Khmer Inherent Vowels)
+ *    (0x180B <= c && c <= 0x180E) || \      (Mongolian Format Controls)
+ *    (0x200B <= c && c <= 0x200F) || \      (General Punctuation Format Characters)
+ *    (0x202A <= c && c <= 0x202E) || \      (General Punctuation Format Characters)
+ *    (c == 0x2060) || \                     (General Punctuation Format Characters)
+ *    (0x2066 <= c && c <= 0x2069) || \      (General Punctuation Format Characters)
+ *    (0x2061 <= c && c <= 0x2064) || \      (General Punctuation Invisible Operators)
+ *    (0x206A <= c && c <= 0x206F) || \      (General Punctuation Deprecated)
+ *    (c == 0x3164) || \                     (Hangul Compatibility Jamo)
+ *    (0x0FE00 <= c && c <= 0x0FE0F) || \    (Variation Selectors)
+ *    (c == 0x0FEFF) || \                    (Arabic Presentation Forms B)
+ *    (c == 0x0FFA0) || \                    (Halfwidth and Fullwidth Forms)
+ *    (0x01BCA0  <= c && c <= 0x01BCA3) || \ (Shorthand Format Controls)
+ *    (0x01D173 <= c && c <= 0x01D17A) || \  (Musical Symbols)
+ *    (c == 0x0E0001) || \                   (Tag Identifiers)
+ *    (0x0E0020 <= c && c <= 0x0E007F) || \  (Tag Components)
+ *    (0x0E0100 <= c && c <= 0x0E01EF) || \  (Variation Selectors Supplement)
+ *    (c == 0x2065) || \                     (Unassigned)
+ *    (0x0FFF0 <= c && c <= 0x0FFF8) || \    (Unassigned)
+ *    (c == 0x0E0000) || \                   (Unassigned)
+ *    (0x0E0002 <= c && c <= 0x0E001F) || \  (Unassigned)
+ *    (0x0E0080 <= c && c <= 0x0E00FF) || \  (Unassigned)
+ *    (0x0E01F0 <= c && c <= 0x0E0FFF) \     (Unassigned)
+ */
+
+#define IS_DEFAULT_IGNORABLE_CODE_POINT(c) (\
+    (c == 0x00AD) || \
+    (c == 0x034F) || \
+    (c == 0x061C) || \
+    (c == 0x115F) || \
+    (c == 0x1160) || \
+    (0x17B4 <= c && c <= 0x17B5) || \
+    (0x180B <= c && c <= 0x180E) || \
+    (0x200B <= c && c <= 0x200F) || \
+    (0x202A <= c && c <= 0x202E) || \
+    (c == 0x2060) || \
+    (0x2066 <= c && c <= 0x2069) || \
+    (0x2061 <= c && c <= 0x2064) || \
+    (0x206A <= c && c <= 0x206F) || \
+    (c == 0x3164) || \
+    (0x0FE00 <= c && c <= 0x0FE0F) || \
+    (c == 0x0FEFF) || \
+    (c == 0x0FFA0) || \
+    (0x01BCA0  <= c && c <= 0x01BCA3) || \
+    (0x01D173 <= c && c <= 0x01D17A) || \
+    (c == 0x0E0001) || \
+    (0x0E0020 <= c && c <= 0x0E007F) || \
+    (0x0E0100 <= c && c <= 0x0E01EF) || \
+    (c == 0x2065) || \
+    (0x0FFF0 <= c && c <= 0x0FFF8) || \
+    (c == 0x0E0000) || \
+    (0x0E0002 <= c && c <= 0x0E001F) || \
+    (0x0E0080 <= c && c <= 0x0E00FF) || \
+    (0x0E01F0 <= c && c <= 0x0E0FFF) \
+    )
+
+
 /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
 U_CAPI void    U_EXPORT2
 UCNV_FROM_U_CALLBACK_STOP (
@@ -61,6 +131,13 @@
                   UConverterCallbackReason reason,
                   UErrorCode * err)
 {
+    if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
+    {
+        /*
+         * Skip if the codepoint has unicode property of default ignorable.
+         */
+        *err = U_ZERO_ERROR;
+    }
     /* the caller must have set the error code accordingly */
     return;
 }
@@ -92,7 +169,14 @@
 {
     if (reason <= UCNV_IRREGULAR)
     {
-        if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
+        if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
+        {
+            /*
+             * Skip if the codepoint has unicode property of default ignorable.
+             */
+            *err = U_ZERO_ERROR;
+        }
+        else if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
         {
             *err = U_ZERO_ERROR;
         }
@@ -113,7 +197,14 @@
 {
     if (reason <= UCNV_IRREGULAR)
     {
-        if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
+        if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
+        {
+            /*
+             * Skip if the codepoint has unicode property of default ignorable.
+             */
+            *err = U_ZERO_ERROR;
+        }
+        else if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
         {
             *err = U_ZERO_ERROR;
             ucnv_cbFromUWriteSub(fromArgs, 0, err);
@@ -155,6 +246,14 @@
   {
       return;
   }
+  else if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
+  {
+      /*
+       * Skip if the codepoint has unicode property of default ignorable.
+       */
+      *err = U_ZERO_ERROR;
+      return;
+  }
 
   ucnv_setFromUCallBack (fromArgs->converter,
                      (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE,
diff -Nura icu/source/common/ucnv_u8.c icu_new/source/common/ucnv_u8.c
--- icu/source/common/ucnv_u8.c	2013-10-05 04:49:18.000000000 +0800
+++ icu_new/source/common/ucnv_u8.c	2018-05-04 18:08:23.333412933 +0800
@@ -26,9 +26,11 @@
 #include "unicode/utf.h"
 #include "unicode/utf8.h"
 #include "unicode/utf16.h"
+#include "uassert.h"
 #include "ucnv_bld.h"
 #include "ucnv_cnv.h"
 #include "cmemory.h"
+#include "ustr_imp.h"
 
 /* Prototypes --------------------------------------------------------------- */
 
@@ -748,7 +750,7 @@
 utf8_offsets[7]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 };
 
 /* "Convert" UTF-8 to UTF-8: Validate and copy. Modified from ucnv_DBCSFromUTF8(). */
-static void
+static void U_CALLCONV
 ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
                   UConverterToUnicodeArgs *pToUArgs,
                   UErrorCode *pErrorCode) {
@@ -793,39 +795,37 @@
         *pErrorCode=U_USING_DEFAULT_WARNING;
         return;
     } else {
-        /*
-         * Use a single counter for source and target, counting the minimum of
-         * the source length and the target capacity.
-         * As a result, the source length is checked only once per multi-byte
-         * character instead of twice.
-         *
-         * Make sure that the last byte sequence is complete, or else
-         * stop just before it.
-         * (The longest legal byte sequence has 3 trail bytes.)
-         * Count oldToULength (number of source bytes from a previous buffer)
-         * into the source length but reduce the source index by toULimit
-         * while going back over trail bytes in order to not go back into
-         * the bytes that will be read for finishing a partial
-         * sequence from the previous buffer.
-         * Let the standard converter handle edge cases.
-         */
-        int32_t i;
-
+        // Use a single counter for source and target, counting the minimum of
+        // the source length and the target capacity.
+        // Let the standard converter handle edge cases.
+        const uint8_t *limit=sourceLimit;
         if(count>targetCapacity) {
+            limit-=(count-targetCapacity);
             count=targetCapacity;
         }
 
-        i=0;
-        while(i<3 && i<(count-toULimit)) {
-            b=source[count-oldToULength-i-1];
-            if(U8_IS_TRAIL(b)) {
-                ++i;
-            } else {
-                if(i<U8_COUNT_TRAIL_BYTES(b)) {
-                    /* stop converting before the lead byte if there are not enough trail bytes for it */
-                    count-=i+1;
+        // The conversion loop checks count>0 only once per 1/2/3-byte character.
+        // If the buffer ends with a truncated 2- or 3-byte sequence,
+        // then we reduce the count to stop before that,
+        // and collect the remaining bytes after the conversion loop.
+        {
+            // Do not go back into the bytes that will be read for finishing a partial
+            // sequence from the previous buffer.
+            int32_t length=count-toULimit;
+            if(length>0) {
+                uint8_t b1=*(limit-1);
+                if(U8_IS_SINGLE(b1)) {
+                    // common ASCII character
+                } else if(U8_IS_TRAIL(b1) && length>=2) {
+                    uint8_t b2=*(limit-2);
+                    if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
+                        // truncated 3-byte sequence
+                        count-=2;
+                }
+                } else if(0xc2<=b1 && b1<0xf0) {
+                    // truncated 2- or 3-byte sequence
+                    --count;
                 }
-                break;
             }
         }
     }
@@ -940,7 +940,7 @@
             }
 
             /* copy the legal byte sequence to the target */
-            {
+            if(count>=toULength) {
                 int8_t i;
 
                 for(i=0; i<oldToULength; ++i) {
@@ -951,9 +951,18 @@
                     *target++=*source++;
                 }
                 count-=toULength;
+            } else {
+                // A supplementary character that does not fit into the target.
+                // Let the standard converter handle this.
+                source-=(toULength-oldToULength);
+                pToUArgs->source=(char *)source;
+                pFromUArgs->target=(char *)target;
+                *pErrorCode=U_USING_DEFAULT_WARNING;
+                return;
             }
         }
     }
+    U_ASSERT(count>=0);
 
     if(U_SUCCESS(*pErrorCode) && source<sourceLimit) {
         if(target==(const uint8_t *)pFromUArgs->targetLimit) {
diff -Nura icu/source/common/uloc.cpp icu_new/source/common/uloc.cpp
--- icu/source/common/uloc.cpp	2013-10-05 04:49:26.000000000 +0800
+++ icu_new/source/common/uloc.cpp	2018-05-04 18:08:23.334412945 +0800
@@ -46,6 +46,8 @@
 
 #include <stdio.h> /* for sprintf */
 
+using namespace icu;
+
 /* ### Declarations **************************************************/
 
 /* Locale stuff from locid.cpp */
@@ -2239,7 +2241,7 @@
 typedef struct { 
     float q;
     int32_t dummy;  /* to avoid uninitialized memory copy from qsort */
-    char *locale;
+    char locale[ULOC_FULLNAME_CAPACITY+1];
 } _acceptLangItem;
 
 static int32_t U_CALLCONV
@@ -2281,9 +2283,7 @@
                             UEnumeration* availableLocales,
                             UErrorCode *status)
 {
-    _acceptLangItem *j;
-    _acceptLangItem smallBuffer[30];
-    char **strs;
+  MaybeStackArray<_acceptLangItem, 4> items; // Struct for collecting items. 
     char tmp[ULOC_FULLNAME_CAPACITY +1];
     int32_t n = 0;
     const char *itemEnd;
@@ -2293,11 +2293,7 @@
     int32_t res;
     int32_t i;
     int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
-    int32_t jSize;
-    char *tempstr; /* Use for null pointer check */
-
-    j = smallBuffer;
-    jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]);
+ 
     if(U_FAILURE(*status)) {
         return -1;
     }
@@ -2325,27 +2321,29 @@
             while(isspace(*t)) {
                 t++;
             }
-            j[n].q = (float)_uloc_strtod(t,NULL);
+            items[n].q = (float)_uloc_strtod(t,NULL);
         } else {
             /* no semicolon - it's 1.0 */
-            j[n].q = 1.0f;
+            items[n].q = 1.0f;
             paramEnd = itemEnd;
         }
-        j[n].dummy=0;
+        items[n].dummy=0;
         /* eat spaces prior to semi */
         for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
             ;
-        /* Check for null pointer from uprv_strndup */
-        tempstr = uprv_strndup(s,(int32_t)((t+1)-s));
-        if (tempstr == NULL) {
-            *status = U_MEMORY_ALLOCATION_ERROR;
-            return -1;
-        }
-        j[n].locale = tempstr;
-        uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status);
-        if(strcmp(j[n].locale,tmp)) {
-            uprv_free(j[n].locale);
-            j[n].locale=uprv_strdup(tmp);
+        int32_t slen = ((t+1)-s);
+        if(slen > ULOC_FULLNAME_CAPACITY) {
+          *status = U_BUFFER_OVERFLOW_ERROR;
+          return -1; // too big
+        }
+        uprv_strncpy(items[n].locale, s, slen);
+        items[n].locale[slen]=0; // terminate
+        int32_t clen = uloc_canonicalize(items[n].locale, tmp, UPRV_LENGTHOF(tmp)-1, status);
+        if(U_FAILURE(*status)) return -1;
+        if((clen!=slen) || (uprv_strncmp(items[n].locale, tmp, slen))) {
+            // canonicalization had an effect- copy back
+            uprv_strncpy(items[n].locale, tmp, clen);
+            items[n].locale[clen] = 0; // terminate
         }
 #if defined(ULOC_DEBUG)
         /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
@@ -2355,42 +2353,22 @@
         while(*s==',') { /* eat duplicate commas */
             s++;
         }
-        if(n>=jSize) {
-            if(j==smallBuffer) {  /* overflowed the small buffer. */
-                j = static_cast<_acceptLangItem *>(uprv_malloc(sizeof(j[0])*(jSize*2)));
-                if(j!=NULL) {
-                    uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);
-                }
-#if defined(ULOC_DEBUG)
-                fprintf(stderr,"malloced at size %d\n", jSize);
-#endif
-            } else {
-                j = static_cast<_acceptLangItem *>(uprv_realloc(j, sizeof(j[0])*jSize*2));
-#if defined(ULOC_DEBUG)
-                fprintf(stderr,"re-alloced at size %d\n", jSize);
-#endif
-            }
-            jSize *= 2;
-            if(j==NULL) {
+        if(n>=items.getCapacity()) { // If we need more items
+          if(NULL == items.resize(items.getCapacity()*2, items.getCapacity())) {
                 *status = U_MEMORY_ALLOCATION_ERROR;
                 return -1;
             }
-        }
-    }
-    uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
-    if(U_FAILURE(*status)) {
-        if(j != smallBuffer) {
 #if defined(ULOC_DEBUG)
-            fprintf(stderr,"freeing j %p\n", j);
+          fprintf(stderr,"malloced at size %d\n", items.getCapacity());
 #endif
-            uprv_free(j);
         }
+    }
+    uprv_sortArray(items.getAlias(), n, sizeof(items[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
+    if(U_FAILURE(*status)) {
         return -1;
     }
-    strs = static_cast<char **>(uprv_malloc((size_t)(sizeof(strs[0])*n)));
-    /* Check for null pointer */
-    if (strs == NULL) {
-        uprv_free(j); /* Free to avoid memory leak */
+    LocalMemory<const char*> strs(NULL);
+    if (strs.allocateInsteadAndReset(n) == NULL) {
         *status = U_MEMORY_ALLOCATION_ERROR;
         return -1;
     }
@@ -2398,20 +2376,10 @@
 #if defined(ULOC_DEBUG)
         /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
 #endif
-        strs[i]=j[i].locale;
+        strs[i]=items[i].locale;
     }
     res =  uloc_acceptLanguage(result, resultAvailable, outResult, 
-        (const char**)strs, n, availableLocales, status);
-    for(i=0;i<n;i++) {
-        uprv_free(strs[i]);
-    }
-    uprv_free(strs);
-    if(j != smallBuffer) {
-#if defined(ULOC_DEBUG)
-        fprintf(stderr,"freeing j %p\n", j);
-#endif
-        uprv_free(j);
-    }
+                               strs.getAlias(), n, availableLocales, status);
     return res;
 }
 
diff -Nura icu/source/common/unicode/utf8.h icu_new/source/common/unicode/utf8.h
--- icu/source/common/unicode/utf8.h	2013-10-05 04:49:08.000000000 +0800
+++ icu_new/source/common/unicode/utf8.h	2018-05-04 18:08:23.334412945 +0800
@@ -106,6 +106,40 @@
 #define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
 
 /**
+ * Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1.
+ * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
+ * Lead byte E0..EF bits 3..0 are used as byte index,
+ * first trail byte bits 7..5 are used as bit index into that byte.
+ * @see U8_IS_VALID_LEAD3_AND_T1
+ * @internal
+ */
+#define U8_LEAD3_T1_BITS "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30"
+
+/**
+ * Internal 3-byte UTF-8 validity check.
+ * Non-zero if lead byte E0..EF and first trail byte 00..FF start a valid sequence.
+ * @internal
+ */
+#define U8_IS_VALID_LEAD3_AND_T1(lead, t1) (U8_LEAD3_T1_BITS[(lead)&0xf]&(1<<((uint8_t)(t1)>>5)))
+
+/**
+ * Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1.
+ * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
+ * First trail byte bits 7..4 are used as byte index,
+ * lead byte F0..F4 bits 2..0 are used as bit index into that byte.
+ * @see U8_IS_VALID_LEAD4_AND_T1
+ * @internal
+ */
+#define U8_LEAD4_T1_BITS "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00"
+
+/**
+ * Internal 4-byte UTF-8 validity check.
+ * Non-zero if lead byte F0..F4 and first trail byte 00..FF start a valid sequence.
+ * @internal
+ */
+#define U8_IS_VALID_LEAD4_AND_T1(lead, t1) (U8_LEAD4_T1_BITS[(uint8_t)(t1)>>4]&(1<<((lead)&7)))
+
+/**
  * Function for handling "next code point" with error-checking.
  *
  * This is internal since it is not meant to be called directly by external clients;
diff -Nura icu/source/common/utext.cpp icu_new/source/common/utext.cpp
--- icu/source/common/utext.cpp	2013-10-05 04:49:22.000000000 +0800
+++ icu_new/source/common/utext.cpp	2018-05-04 18:08:23.335412957 +0800
@@ -831,9 +831,15 @@
 //------------------------------------------------------------------------------
 
 // Chunk size.
-//     Must be less than 85, because of byte mapping from UChar indexes to native indexes.
-//     Worst case is three native bytes to one UChar.  (Supplemenaries are 4 native bytes
-//     to two UChars.)
+//     Must be less than 42  (256/6), because of byte mapping from UChar indexes to native indexes.
+//     Worst case there are six UTF-8 bytes per UChar.
+//         obsolete 6 byte form fd + 5 trails maps to fffd
+//         obsolete 5 byte form fc + 4 trails maps to fffd
+//         non-shortest 4 byte forms maps to fffd
+//         normal supplementaries map to a pair of utf-16, two utf8 bytes per utf-16 unit
+//     mapToUChars array size must allow for the worst case, 6.
+//     This could be brought down to 4, by treating fd and fc as pure illegal,
+//     rather than obsolete lead bytes. But that is not compatible with the utf-8 access macros.
 //
 enum { UTF8_TEXT_CHUNK_SIZE=32 };
 
@@ -873,7 +879,7 @@
                                                      //  Requires two extra slots,
                                                      //    one for a supplementary starting in the last normal position,
                                                      //    and one for an entry for the buffer limit position.
-    uint8_t   mapToUChars[UTF8_TEXT_CHUNK_SIZE*3+6]; // Map native offset from bufNativeStart to
+    uint8_t   mapToUChars[UTF8_TEXT_CHUNK_SIZE*6+6]; // Map native offset from bufNativeStart to
                                                      //   correspoding offset in filled part of buf.
     int32_t   align;
 };
@@ -1016,6 +1022,7 @@
             // Requested index is in this buffer.
             u8b = (UTF8Buf *)ut->p;   // the current buffer
             mapIndex = ix - u8b->toUCharsMapStart;
+            U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars));
             ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
             return TRUE;
 
@@ -1282,6 +1289,10 @@
         // Can only do this if the incoming index is somewhere in the interior of the string.
         //   If index is at the end, there is no character there to look at.
         if (ix != ut->b) {
+            // Note: this function will only move the index back if it is on a trail byte
+            //       and there is a preceding lead byte and the sequence from the lead 
+            //       through this trail could be part of a valid UTF-8 sequence
+            //       Otherwise the index remains unchanged.
             U8_SET_CP_START(s8, 0, ix);
         }
 
@@ -1295,7 +1306,10 @@
         UChar   *buf = u8b->buf;
         uint8_t *mapToNative = u8b->mapToNative;
         uint8_t *mapToUChars = u8b->mapToUChars;
-        int32_t  toUCharsMapStart = ix - (UTF8_TEXT_CHUNK_SIZE*3 + 1);
+        int32_t  toUCharsMapStart = ix - sizeof(UTF8Buf::mapToUChars) + 1;
+        // Note that toUCharsMapStart can be negative. Happens when the remaining
+        // text from current position to the beginning is less than the buffer size.
+        // + 1 because mapToUChars must have a slot at the end for the bufNativeLimit entry.
         int32_t  destIx = UTF8_TEXT_CHUNK_SIZE+2;   // Start in the overflow region
                                                     //   at end of buffer to leave room
                                                     //   for a surrogate pair at the
@@ -1322,6 +1336,7 @@
             if (c<0x80) {
                 // Special case ASCII range for speed.
                 buf[destIx] = (UChar)c;
+                U_ASSERT(toUCharsMapStart <= srcIx);
                 mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx;
                 mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
             } else {
@@ -1351,6 +1366,7 @@
                 do {
                     mapToUChars[sIx-- - toUCharsMapStart] = (uint8_t)destIx;
                 } while (sIx >= srcIx);
+                U_ASSERT(toUCharsMapStart <= (srcIx+1));
 
                 // Set native indexing limit to be the current position.
                 //   We are processing a non-ascii, non-native-indexing char now;
@@ -1525,6 +1541,7 @@
     U_ASSERT(index>=ut->chunkNativeStart+ut->nativeIndexingLimit);
     U_ASSERT(index<=ut->chunkNativeLimit);
     int32_t mapIndex = index - u8b->toUCharsMapStart;
+    U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars));
     int32_t offset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
     U_ASSERT(offset>=0 && offset<=ut->chunkLength);
     return offset;
diff -Nura icu/source/i18n/gregoimp.cpp icu_new/source/i18n/gregoimp.cpp
--- icu/source/i18n/gregoimp.cpp	2013-10-05 04:48:52.000000000 +0800
+++ icu_new/source/i18n/gregoimp.cpp	2018-05-04 18:08:23.335412957 +0800
@@ -29,6 +29,11 @@
         numerator / denominator : ((numerator + 1) / denominator) - 1;
 }
 
+int64_t ClockMath::floorDivide(int64_t numerator, int64_t denominator) {
+    return (numerator >= 0) ?
+        numerator / denominator : ((numerator + 1) / denominator) - 1;
+}
+
 int32_t ClockMath::floorDivide(double numerator, int32_t denominator,
                           int32_t& remainder) {
     double quotient;
diff -Nura icu/source/i18n/gregoimp.h icu_new/source/i18n/gregoimp.h
--- icu/source/i18n/gregoimp.h	2013-10-05 04:48:56.000000000 +0800
+++ icu_new/source/i18n/gregoimp.h	2018-05-04 18:08:23.335412957 +0800
@@ -39,6 +39,17 @@
     static int32_t floorDivide(int32_t numerator, int32_t denominator);
 
     /**
+     * Divide two integers, returning the floor of the quotient.
+     * Unlike the built-in division, this is mathematically
+     * well-behaved.  E.g., <code>-1/4</code> => 0 but
+     * <code>floorDivide(-1,4)</code> => -1.
+     * @param numerator the numerator
+     * @param denominator a divisor which must be != 0
+     * @return the floor of the quotient
+     */
+    static int64_t floorDivide(int64_t numerator, int64_t denominator);
+
+    /**
      * Divide two numbers, returning the floor of the quotient.
      * Unlike the built-in division, this is mathematically
      * well-behaved.  E.g., <code>-1/4</code> => 0 but
diff -Nura icu/source/i18n/persncal.cpp icu_new/source/i18n/persncal.cpp
--- icu/source/i18n/persncal.cpp	2013-10-05 04:48:52.000000000 +0800
+++ icu_new/source/i18n/persncal.cpp	2018-05-04 18:08:23.335412957 +0800
@@ -211,7 +211,7 @@
     int32_t year, month, dayOfMonth, dayOfYear;
 
     int32_t daysSinceEpoch = julianDay - PERSIAN_EPOCH;
-    year = 1 + ClockMath::floorDivide(33 * daysSinceEpoch + 3, 12053);
+    year = 1 + (int32_t)ClockMath::floorDivide(33 * (int64_t)daysSinceEpoch + 3, (int64_t)12053);
 
     int32_t farvardin1 = 365 * (year - 1) + ClockMath::floorDivide(8 * year + 21, 33);
     dayOfYear = (daysSinceEpoch - farvardin1); // 0-based
diff -Nura icu/source/i18n/zonemeta.cpp icu_new/source/i18n/zonemeta.cpp
--- icu/source/i18n/zonemeta.cpp	2013-10-05 04:48:44.000000000 +0800
+++ icu_new/source/i18n/zonemeta.cpp	2018-05-04 18:08:23.335412957 +0800
@@ -685,7 +685,6 @@
                     mzMappings = new UVector(deleteOlsonToMetaMappingEntry, NULL, status);
                     if (U_FAILURE(status)) {
                         delete mzMappings;
-                        deleteOlsonToMetaMappingEntry(entry);
                         uprv_free(entry);
                         break;
                     }
diff -Nura icu/source/test/cintltst/cloctst.c icu_new/source/test/cintltst/cloctst.c
--- icu/source/test/cintltst/cloctst.c	2013-10-05 04:47:36.000000000 +0800
+++ icu_new/source/test/cintltst/cloctst.c	2018-05-04 18:08:23.336412969 +0800
@@ -2687,16 +2687,20 @@
         const char *icuSet;    /**< ? */
         const char *expect;    /**< The expected locale result */
         UAcceptResult res;     /**< The expected error code */
+        UErrorCode expectStatus; /**< expected status */
     } tests[] = { 
-        /*0*/{ 0, NULL, "mt_MT", ULOC_ACCEPT_VALID },
-        /*1*/{ 1, NULL, "en", ULOC_ACCEPT_VALID },
-        /*2*/{ 2, NULL, "en", ULOC_ACCEPT_FALLBACK },
-        /*3*/{ 3, NULL, "", ULOC_ACCEPT_FAILED },
-        /*4*/{ 4, NULL, "es", ULOC_ACCEPT_VALID },
-        
-        /*5*/{ 5, NULL, "en", ULOC_ACCEPT_VALID },  /* XF */
-        /*6*/{ 6, NULL, "ja", ULOC_ACCEPT_FALLBACK },  /* XF */
-        /*7*/{ 7, NULL, "zh", ULOC_ACCEPT_FALLBACK },  /* XF */
+        /*0*/{ 0, NULL, "mt_MT", ULOC_ACCEPT_VALID, U_ZERO_ERROR},
+        /*1*/{ 1, NULL, "en", ULOC_ACCEPT_VALID, U_ZERO_ERROR},
+        /*2*/{ 2, NULL, "en", ULOC_ACCEPT_FALLBACK, U_ZERO_ERROR},
+        /*3*/{ 3, NULL, "", ULOC_ACCEPT_FAILED, U_ZERO_ERROR},
+        /*4*/{ 4, NULL, "es", ULOC_ACCEPT_VALID, U_ZERO_ERROR},
+        /*5*/{ 5, NULL, "en", ULOC_ACCEPT_VALID, U_ZERO_ERROR},  /* XF */
+        /*6*/{ 6, NULL, "ja", ULOC_ACCEPT_FALLBACK, U_ZERO_ERROR},  /* XF */
+        /*7*/{ 7, NULL, "zh", ULOC_ACCEPT_FALLBACK, U_ZERO_ERROR},  /* XF */
+        /*8*/{ 8, NULL, "", ULOC_ACCEPT_FAILED, U_ZERO_ERROR },  /*  */
+        /*9*/{ 9, NULL, "", ULOC_ACCEPT_FAILED, U_ZERO_ERROR },  /*  */
+       /*10*/{10, NULL, "", ULOC_ACCEPT_FAILED, U_BUFFER_OVERFLOW_ERROR },  /*  */
+       /*11*/{11, NULL, "", ULOC_ACCEPT_FAILED, U_BUFFER_OVERFLOW_ERROR },  /*  */
     };
     const int32_t numTests = sizeof(tests)/sizeof(tests[0]);
     static const char *http[] = {
@@ -2711,11 +2715,26 @@
               "xxx-yyy;q=.01, xxx-yyy;q=.01, xxx-yyy;q=.01, xxx-yyy;q=.01, xxx-yyy;q=.01, "
               "xxx-yyy;q=.01, xxx-yyy;q=.01, xxx-yyy;q=.01, xxx-yyy;q=.01, xxx-yyy;q=.01, "
               "xxx-yyy;q=.01, xxx-yyy;q=.01, xxx-yyy;q=.01, xx-yy;q=.1, "
-              "es",
-              
+              "es",              
         /*5*/ "zh-xx;q=0.9, en;q=0.6",
         /*6*/ "ja-JA",
         /*7*/ "zh-xx;q=0.9",
+       /*08*/ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+              "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+              "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+              "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", // 156
+       /*09*/ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+              "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+              "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+              "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB", // 157 (this hits U_STRING_NOT_TERMINATED_WARNING )
+       /*10*/ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+              "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+              "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+              "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABC", // 158
+       /*11*/ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+              "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+              "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+              "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", // 163 bytes
     };
 
     for(i=0;i<numTests;i++) {
@@ -2730,6 +2749,10 @@
         (void)rc;    /* Suppress set but not used warning. */
         uenum_close(available);
         log_verbose(" got %s, %s [%s]\n", tmp[0]?tmp:"(EMPTY)", acceptResult(outResult), u_errorName(status));
+        if(status != tests[i].expectStatus) {
+          log_err_status(status, "FAIL: expected status %s but got %s\n", u_errorName(tests[i].expectStatus), u_errorName(status));
+        } else if(U_SUCCESS(tests[i].expectStatus)) {
+            /* don't check content if expected failure */
         if(outResult != tests[i].res) {
             log_err_status(status, "FAIL: #%d: expected outResult of %s but got %s\n", i, 
                 acceptResult( tests[i].res), 
@@ -2741,6 +2764,7 @@
             log_err_status(status, "FAIL: #%d: expected %s but got %s\n", i, tests[i].expect, tmp);
             log_info("test #%d: http[%s], ICU[%s], expect %s, %s\n", 
                 i, http[tests[i].httpSet], tests[i].icuSet, tests[i].expect, acceptResult(tests[i].res));
+            }
         }
     }
 }
diff -Nura icu/source/test/intltest/calregts.cpp icu_new/source/test/intltest/calregts.cpp
--- icu/source/test/intltest/calregts.cpp	2013-10-05 04:47:58.000000000 +0800
+++ icu_new/source/test/intltest/calregts.cpp	2018-05-04 18:08:23.337412982 +0800
@@ -10,6 +10,7 @@
 
 #include "calregts.h"
 
+#include "unicode/calendar.h"
 #include "unicode/gregocal.h"
 #include "unicode/simpletz.h"
 #include "unicode/smpdtfmt.h"
@@ -88,6 +89,7 @@
         CASE(48,TestT8596);
         CASE(49,Test9019);
         CASE(50,TestT9452);
+        CASE(52,TestPersianCalOverflow);
     default: name = ""; break;
     }
 }
@@ -2944,4 +2946,34 @@
     }
 }
 
+/**
+ * @bug ticket 13454
+ */
+void CalendarRegressionTest::TestPersianCalOverflow(void) {
+    const char* localeID = "bs_Cyrl@calendar=persian";
+    UErrorCode status = U_ZERO_ERROR;
+    Calendar* cal = Calendar::createInstance(Locale(localeID), status);
+    if(U_FAILURE(status)) {
+        dataerrln("FAIL: Calendar::createInstance for localeID %s: %s", localeID, u_errorName(status));
+    } else {
+        int32_t maxMonth = cal->getMaximum(UCAL_MONTH);
+        int32_t maxDayOfMonth = cal->getMaximum(UCAL_DATE);
+        int32_t jd, month, dayOfMonth;
+        for (jd = 67023580; jd <= 67023584; jd++) { // year 178171, int32_t overflow if jd >= 67023582
+            status = U_ZERO_ERROR;
+            cal->clear();
+            cal->set(UCAL_JULIAN_DAY, jd);
+            month = cal->get(UCAL_MONTH, status);
+            dayOfMonth = cal->get(UCAL_DATE, status);
+            if ( U_FAILURE(status) ) {
+                errln("FAIL: Calendar->get MONTH/DATE for localeID %s, julianDay %d, status %s\n", localeID, jd, u_errorName(status)); 
+            } else if (month > maxMonth || dayOfMonth > maxDayOfMonth) {
+                errln("FAIL: localeID %s, julianDay %d; maxMonth %d, got month %d; maxDayOfMonth %d, got dayOfMonth %d\n",
+                        localeID, jd, maxMonth, month, maxDayOfMonth, dayOfMonth); 
+            }
+        }
+        delete cal;
+    }
+}
+
 #endif /* #if !UCONFIG_NO_FORMATTING */
diff -Nura icu/source/test/intltest/calregts.h icu_new/source/test/intltest/calregts.h
--- icu/source/test/intltest/calregts.h	2013-10-05 04:47:56.000000000 +0800
+++ icu_new/source/test/intltest/calregts.h	2018-05-04 18:08:23.337412982 +0800
@@ -75,6 +75,7 @@
     void TestT8596(void);
     void Test9019(void);
     void TestT9452(void);
+  void TestPersianCalOverflow(void);
 
     void printdate(GregorianCalendar *cal, const char *string);
     void dowTest(UBool lenient) ;
diff -Nura icu/source/test/intltest/convtest.cpp icu_new/source/test/intltest/convtest.cpp
--- icu/source/test/intltest/convtest.cpp	2013-10-05 04:47:56.000000000 +0800
+++ icu_new/source/test/intltest/convtest.cpp	2018-05-07 15:54:38.740421990 +0800
@@ -6,7 +6,7 @@
 *
 *******************************************************************************
 *   file name:  convtest.cpp
-*   encoding:   US-ASCII
+*   encoding:   UTF-8
 *   tab size:   8 (not used)
 *   indentation:4
 *
@@ -35,13 +35,13 @@
 #include "unicode/uniset.h"
 #include "unicode/ustring.h"
 #include "unicode/ures.h"
+#include "unicode/utf16.h"
 #include "convtest.h"
+#include "cmemory.h"
 #include "unicode/tstdtmod.h"
 #include <string.h>
 #include <stdlib.h>
 
-#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
-
 enum {
     // characters used in test data for callbacks
     SUB_CB='?',
@@ -66,19 +66,16 @@
 void
 ConversionTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
     if (exec) logln("TestSuite ConversionTest: ");
-    switch (index) {
+    TESTCASE_AUTO_BEGIN;
 #if !UCONFIG_NO_FILE_IO
-        case 0: name="TestToUnicode"; if (exec) TestToUnicode(); break;
-        case 1: name="TestFromUnicode"; if (exec) TestFromUnicode(); break;
-        case 2: name="TestGetUnicodeSet"; if (exec) TestGetUnicodeSet(); break;
-#else
-        case 0:
-        case 1:
-        case 2: name="skip"; break;
+    TESTCASE_AUTO(TestToUnicode);
+    TESTCASE_AUTO(TestFromUnicode);
+    TESTCASE_AUTO(TestGetUnicodeSet);
 #endif
-        case 3: name="TestGetUnicodeSet2"; if (exec) TestGetUnicodeSet2(); break;
-        default: name=""; break; //needed to end loop
-    }
+    TESTCASE_AUTO(TestGetUnicodeSet2);
+    TESTCASE_AUTO(TestDefaultIgnorableCallback);
+    TESTCASE_AUTO(TestUTF8ToUTF8Overflow);
+    TESTCASE_AUTO_END;
 }
 
 // test data interface ----------------------------------------------------- ***
@@ -289,7 +286,7 @@
                     // read a substitution string, separated by an equal sign
                     p=s.getBuffer()+index+1;
                     length=s.length()-(index+1);
-                    if(length<0 || length>=LENGTHOF(cc.subString)) {
+                    if(length<0 || length>=UPRV_LENGTHOF(cc.subString)) {
                         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
                     } else {
                         u_memcpy(cc.subString, p, length);
@@ -443,7 +440,7 @@
                 if(!diffSet.isEmpty()) {
                     diffSet.toPattern(s, TRUE);
                     if(s.length()>100) {
-                        s.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));
+                        s.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
                     }
                     errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - conversion/getUnicodeSet test case %d",
                             charset, i);
@@ -455,7 +452,7 @@
                 if(!diffSet.isEmpty()) {
                     diffSet.toPattern(s, TRUE);
                     if(s.length()>100) {
-                        s.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));
+                        s.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
                     }
                     errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - conversion/getUnicodeSet test case %d",
                             charset, i);
@@ -553,7 +550,7 @@
     LocalUConverterPointer cnv;
     char buffer[1024];
     int32_t i;
-    for(i=0; i<LENGTHOF(cnvNames); ++i) {
+    for(i=0; i<UPRV_LENGTHOF(cnvNames); ++i) {
         UErrorCode errorCode=U_ZERO_ERROR;
         cnv.adoptInstead(cnv_open(cnvNames[i], errorCode));
         if(U_FAILURE(errorCode)) {
@@ -623,7 +620,7 @@
                 if(!diffSet.isEmpty()) {
                     diffSet.toPattern(out, TRUE);
                     if(out.length()>100) {
-                        out.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));
+                        out.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
                     }
                     errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - which set: %d",
                             cnvNames[i], which);
@@ -635,7 +632,7 @@
                 if(!diffSet.isEmpty()) {
                     diffSet.toPattern(out, TRUE);
                     if(out.length()>100) {
-                        out.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));
+                        out.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
                     }
                     errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - which set: %d",
                             cnvNames[i], which);
@@ -648,6 +645,151 @@
     delete [] s0;
 }
 
+// Test all codepoints which has the default ignorable Unicode property are ignored if they have no mapping
+// If there are any failures, the hard coded list (IS_DEFAULT_IGNORABLE_CODE_POINT) in ucnv_err.c should be updated
+void
+ConversionTest::TestDefaultIgnorableCallback() {
+    UErrorCode status = U_ZERO_ERROR;
+    const char *cnv_name = "euc-jp-2007";
+    const char *pattern_ignorable = "[:Default_Ignorable_Code_Point:]";
+    const char *pattern_not_ignorable = "[:^Default_Ignorable_Code_Point:]";
+
+    UnicodeSet *set_ignorable = new UnicodeSet(pattern_ignorable, status);
+    if (U_FAILURE(status)) {
+        dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_ignorable, u_errorName(status));
+        return;
+    }
+
+    UnicodeSet *set_not_ignorable = new UnicodeSet(pattern_not_ignorable, status);
+    if (U_FAILURE(status)) {
+        dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_not_ignorable, u_errorName(status));
+        return;
+    }
+
+    UConverter *cnv = cnv_open(cnv_name, status);
+    if (U_FAILURE(status)) {
+        dataerrln("Unable to open converter: %s - %s\n", cnv_name, u_errorName(status));
+        return;
+    }
+
+    // set callback for the converter 
+    ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL, &status);
+
+    UChar32 input[1];
+    char output[10];
+    int32_t outputLength;
+    
+    // test default ignorables are ignored
+    int size = set_ignorable->size();
+    for (int i = 0; i < size; i++) {
+        status = U_ZERO_ERROR;
+        outputLength= 0;
+
+        input[0] = set_ignorable->charAt(i);
+
+        outputLength = ucnv_fromUChars(cnv, output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status);
+        if (U_FAILURE(status) || outputLength != 0) {
+            errln("Ignorable code point: U+%04X not skipped as expected - %s", input[0], u_errorName(status));
+        }
+    }
+
+    // test non-ignorables are not ignored
+    size = set_not_ignorable->size();
+    for (int i = 0; i < size; i++) {
+        status = U_ZERO_ERROR;
+        outputLength= 0;
+
+        input[0] = set_not_ignorable->charAt(i);
+
+        if (input[0] == 0) {
+            continue;
+        }
+
+        ucnv_fromUChars(cnv, output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status);
+        if (U_FAILURE(status)) {
+            errln("Non-ignorable code point: U+%04X skipped unexpectedly - %s", input[0], u_errorName(status));
+        }
+    }
+    
+    ucnv_close(cnv);
+    delete set_not_ignorable;
+    delete set_ignorable;
+}
+
+void
+ConversionTest::TestUTF8ToUTF8Overflow() {
+    IcuTestErrorCode errorCode(*this, "TestUTF8ToUTF8Overflow");
+    LocalUConverterPointer cnv1(ucnv_open("UTF-8", errorCode));
+    LocalUConverterPointer cnv2(ucnv_open("UTF-8", errorCode));
+    static const char *text = "aä";  // ä: 2 bytes
+    const char *source = text;
+    const char *sourceLimit = text + strlen(text);
+    char result[20];
+    char *target = result;
+    const char *targetLimit = result + sizeof(result);
+    UChar buffer16[20];
+    UChar *pivotSource = buffer16;
+    UChar *pivotTarget = buffer16;
+    const UChar *pivotLimit = buffer16 + UPRV_LENGTHOF(buffer16);
+
+    // Convert with insufficient target capacity.
+    result[2] = 5;
+    ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
+                   &target, result + 2, &source, sourceLimit,
+                   buffer16, &pivotSource, &pivotTarget, pivotLimit,
+                   FALSE, FALSE, errorCode);
+    assertEquals("overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset());
+    int32_t length = (int32_t)(target - result);
+    assertEquals("number of bytes written", 2, length);
+    assertEquals("next byte not clobbered", 5, result[2]);
+
+    // Convert the rest and flush.
+    ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
+                   &target, targetLimit, &source, sourceLimit,
+                   buffer16, &pivotSource, &pivotTarget, pivotLimit,
+                   FALSE, TRUE, errorCode);
+
+    assertSuccess("UTF-8->UTF-8", errorCode);
+    length = (int32_t)(target - result);
+    assertEquals("3 bytes", 3, length);
+    if (length == 3) {
+        assertTrue("result same as input", memcmp(text, result, length) == 0);
+    }
+
+    ucnv_reset(cnv1.getAlias());
+    ucnv_reset(cnv2.getAlias());
+    memset(result, 0, sizeof(result));
+    static const char *text2 = "a🚲";  // U+1F6B2 bicycle: 4 bytes
+    source = text2;
+    sourceLimit = text2 + strlen(text2);
+    target = result;
+    pivotSource = pivotTarget = buffer16;
+
+    // Convert with insufficient target capacity.
+    result[3] = 5;
+    ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
+                   &target, result + 3, &source, sourceLimit,
+                   buffer16, &pivotSource, &pivotTarget, pivotLimit,
+                   FALSE, FALSE, errorCode);
+    assertEquals("text2 overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset());
+    length = (int32_t)(target - result);
+    assertEquals("text2 number of bytes written", 3, length);
+    assertEquals("text2 next byte not clobbered", 5, result[3]);
+
+    // Convert the rest and flush.
+    ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
+                   &target, targetLimit, &source, sourceLimit,
+                   buffer16, &pivotSource, &pivotTarget, pivotLimit,
+                   FALSE, TRUE, errorCode);
+
+    assertSuccess("text2 UTF-8->UTF-8", errorCode);
+    length = (int32_t)(target - result);
+    assertEquals("text2 5 bytes", 5, length);
+    if (length == 5) {
+        assertTrue("text2 result same as input", memcmp(text2, result, length) == 0);
+    }
+}
+
 // open testdata or ICU data converter ------------------------------------- ***
 
 UConverter *
@@ -949,6 +1091,7 @@
     // open the converter
     IcuTestErrorCode errorCode(*this, "ToUnicodeCase");
     LocalUConverterPointer cnv(cnv_open(cc.charset, errorCode));
+    // with no data, the above crashes with "pointer being freed was not allocated" for charset "x11-compound-text", see #13078
     if(errorCode.isFailure()) {
         errcheckln(errorCode, "toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",
                 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, errorCode.errorName());
@@ -990,7 +1133,7 @@
     int32_t i, step;
 
     ok=TRUE;
-    for(i=0; i<LENGTHOF(steps) && ok; ++i) {
+    for(i=0; i<UPRV_LENGTHOF(steps) && ok; ++i) {
         step=steps[i].step;
         if(step<0 && !cc.finalFlush) {
             // skip ucnv_getNextUChar() if !finalFlush because
@@ -1002,12 +1145,12 @@
             cc.offsets=NULL;
         }
         else {
-            memset(resultOffsets, -1, LENGTHOF(resultOffsets));
+            memset(resultOffsets, -1, UPRV_LENGTHOF(resultOffsets));
         }
-        memset(result, -1, LENGTHOF(result));
+        memset(result, -1, UPRV_LENGTHOF(result));
         errorCode.reset();
         resultLength=stepToUnicode(cc, cnv.getAlias(),
-                                result, LENGTHOF(result),
+                                result, UPRV_LENGTHOF(result),
                                 step==0 ? resultOffsets : NULL,
                                 step, errorCode);
         ok=checkToUnicode(
@@ -1037,7 +1180,7 @@
 
         errorCode.reset();
         resultLength=ucnv_toUChars(cnv.getAlias(),
-                        result, LENGTHOF(result),
+                        result, UPRV_LENGTHOF(result),
                         (const char *)cc.bytes, cc.bytesLength,
                         errorCode);
         ok=checkToUnicode(
@@ -1184,7 +1327,7 @@
         targetLimit=resultLimit;
         flush=cc.finalFlush;
 
-        pivotLimit=pivotBuffer+LENGTHOF(pivotBuffer);
+        pivotLimit=pivotBuffer+UPRV_LENGTHOF(pivotBuffer);
     } else {
         // start with empty partial buffers
         sourceLimit=source;
@@ -1403,7 +1546,7 @@
     // convert unicode to utf8
     char utf8[256];
     cc.utf8=utf8;
-    u_strToUTF8(utf8, LENGTHOF(utf8), &cc.utf8Length,
+    u_strToUTF8(utf8, UPRV_LENGTHOF(utf8), &cc.utf8Length,
                 cc.unicode, cc.unicodeLength,
                 &errorCode);
     if(U_FAILURE(errorCode)) {
@@ -1430,13 +1573,13 @@
     int32_t i, step;
 
     ok=TRUE;
-    for(i=0; i<LENGTHOF(steps) && ok; ++i) {
+    for(i=0; i<UPRV_LENGTHOF(steps) && ok; ++i) {
         step=steps[i].step;
-        memset(resultOffsets, -1, LENGTHOF(resultOffsets));
-        memset(result, -1, LENGTHOF(result));
+        memset(resultOffsets, -1, UPRV_LENGTHOF(resultOffsets));
+        memset(result, -1, UPRV_LENGTHOF(result));
         errorCode=U_ZERO_ERROR;
         resultLength=stepFromUnicode(cc, cnv,
-                                result, LENGTHOF(result),
+                                result, UPRV_LENGTHOF(result),
                                 step==0 ? resultOffsets : NULL,
                                 step, &errorCode);
         ok=checkFromUnicode(
@@ -1465,7 +1608,7 @@
         if(cc.utf8Length>=0) {
             errorCode=U_ZERO_ERROR;
             resultLength=stepFromUTF8(cc, utf8Cnv, cnv,
-                                    result, LENGTHOF(result),
+                                    result, UPRV_LENGTHOF(result),
                                     step, &errorCode);
             ok=checkFromUnicode(
                     cc, cnv, steps[i].utf8Name,
@@ -1488,7 +1631,7 @@
 
         errorCode=U_ZERO_ERROR;
         resultLength=ucnv_fromUChars(cnv,
-                        result, LENGTHOF(result),
+                        result, UPRV_LENGTHOF(result),
                         cc.unicode, cc.unicodeLength,
                         &errorCode);
         ok=checkFromUnicode(
@@ -1537,7 +1680,7 @@
     msg=NULL;
 
     errorCode=U_ZERO_ERROR;
-    resultInvalidLength=LENGTHOF(resultInvalidUChars);
+    resultInvalidLength=UPRV_LENGTHOF(resultInvalidUChars);
     ucnv_getInvalidUChars(cnv, resultInvalidUChars, &resultInvalidLength, &errorCode);
     if(U_FAILURE(errorCode)) {
         errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidUChars() failed - %s",
diff -Nura icu/source/test/intltest/convtest.h icu_new/source/test/intltest/convtest.h
--- icu/source/test/intltest/convtest.h	2013-10-05 04:47:50.000000000 +0800
+++ icu_new/source/test/intltest/convtest.h	2018-05-04 18:08:23.337412982 +0800
@@ -6,7 +6,7 @@
  *
  *******************************************************************************
  *   file name:  convtest.h
- *   encoding:   US-ASCII
+ *   encoding:   UTF-8
  *   tab size:   8 (not used)
  *   indentation:4
  *
@@ -73,6 +73,8 @@
     void TestFromUnicode();
     void TestGetUnicodeSet();
     void TestGetUnicodeSet2();
+    void TestDefaultIgnorableCallback();
+    void TestUTF8ToUTF8Overflow();
 
 private:
     UBool
diff -Nura icu/source/test/intltest/utxttest.cpp icu_new/source/test/intltest/utxttest.cpp
--- icu/source/test/intltest/utxttest.cpp	2013-10-05 04:47:58.000000000 +0800
+++ icu_new/source/test/intltest/utxttest.cpp	2018-05-04 18:08:23.338412994 +0800
@@ -57,6 +57,8 @@
             if (exec) Ticket5560();  break;
         case 4: name = "Ticket6847";
             if (exec) Ticket6847();  break;
+        case 8: name = "Ticket12888";
+            if (exec) Ticket12888(); break;
         default: name = "";          break;
     }
 }
@@ -1452,3 +1454,62 @@
     utext_close(ut);
 }
 
+// Ticket 12888: bad handling of illegal utf-8 containing many instances of the archaic, now illegal,
+//               six byte utf-8 forms. Original implementation had an assumption that
+//               there would be at most three utf-8 bytes per UTF-16 code unit.
+//               The five and six byte sequences map to a single replacement character.
+
+void UTextTest::Ticket12888() {
+    const char *badString = 
+            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+            "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80";
+
+    UErrorCode status = U_ZERO_ERROR;
+    LocalUTextPointer ut(utext_openUTF8(NULL, badString, -1, &status));
+    TEST_SUCCESS(status);
+    for (;;) {
+        UChar32 c = utext_next32(ut.getAlias());
+        if (c == U_SENTINEL) {
+            break;
+        }
+    }
+    int32_t endIdx = utext_getNativeIndex(ut.getAlias());
+    if (endIdx != (int32_t)strlen(badString)) {
+        errln("%s:%d expected=%d, actual=%d", __FILE__, __LINE__, strlen(badString), endIdx);
+        return;
+    }
+
+    for (int32_t prevIndex = endIdx; prevIndex>0;) {
+        UChar32 c = utext_previous32(ut.getAlias());
+        int32_t currentIndex = utext_getNativeIndex(ut.getAlias());
+        if (c != 0xfffd) {
+            errln("%s:%d (expected, actual, index) = (%d, %d, %d)\n",
+                    __FILE__, __LINE__, 0xfffd, c, currentIndex);
+            break;
+        }
+        if (currentIndex != prevIndex - 6) {
+            errln("%s:%d: wrong index. Expected, actual = %d, %d",
+                    __FILE__, __LINE__, prevIndex - 6, currentIndex);
+            break;
+        }
+        prevIndex = currentIndex;
+    }
+}
diff -Nura icu/source/test/intltest/utxttest.h icu_new/source/test/intltest/utxttest.h
--- icu/source/test/intltest/utxttest.h	2013-10-05 04:47:56.000000000 +0800
+++ icu_new/source/test/intltest/utxttest.h	2018-05-04 18:08:23.338412994 +0800
@@ -33,6 +33,7 @@
     void FreezeTest();
     void Ticket5560();
     void Ticket6847();
+    void Ticket12888();
 
 private:
     struct m {                              // Map between native indices & code points.

Places

File icu-CVE-2016-6293-2017-7867-2017-7868-2017-14952-2017-15422-2017-17484.patch of Package icu

Places