Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
SUSE:SLE-12-SP1:GA
expat.22988
expat-CVE-2022-25235.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File expat-CVE-2022-25235.patch of Package expat.22988
From ee2a5b50e7d1940ba8745715b62ceb9efd3a96da Mon Sep 17 00:00:00 2001 From: Sebastian Pipping <sebastian@pipping.org> Date: Tue, 8 Feb 2022 17:37:14 +0100 Subject: [PATCH 1/5] lib: Drop unused macro UTF8_GET_NAMING --- expat/lib/xmltok.c | 5 ----- 1 file changed, 5 deletions(-) Index: expat-2.1.0/lib/xmltok.c =================================================================== --- expat-2.1.0.orig/lib/xmltok.c +++ expat-2.1.0/lib/xmltok.c @@ -71,13 +71,6 @@ + ((((byte)[2]) >> 5) & 1)] \ & (1 << (((byte)[2]) & 0x1F))) -#define UTF8_GET_NAMING(pages, p, n) \ - ((n) == 2 \ - ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \ - : ((n) == 3 \ - ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \ - : 0)) - /* Detection of invalid UTF-8 sequences is based on Table 3.1B of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/ with the additional restriction of not allowing the Unicode Index: expat-2.1.0/lib/xmltok_impl.c =================================================================== --- expat-2.1.0.orig/lib/xmltok_impl.c +++ expat-2.1.0/lib/xmltok_impl.c @@ -34,7 +34,7 @@ case BT_LEAD ## n: \ if (end - ptr < n) \ return XML_TOK_PARTIAL_CHAR; \ - if (!IS_NAME_CHAR(enc, ptr, n)) { \ + if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NAME_CHAR(enc, ptr, n)) { \ *nextTokPtr = ptr; \ return XML_TOK_INVALID; \ } \ @@ -62,7 +62,7 @@ case BT_LEAD ## n: \ if (end - ptr < n) \ return XML_TOK_PARTIAL_CHAR; \ - if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \ + if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NMSTRT_CHAR(enc, ptr, n)) { \ *nextTokPtr = ptr; \ return XML_TOK_INVALID; \ } \ @@ -1097,6 +1097,10 @@ PREFIX(prologTok)(const ENCODING *enc, c case BT_LEAD ## n: \ if (end - ptr < n) \ return XML_TOK_PARTIAL_CHAR; \ + if (IS_INVALID_CHAR(enc, ptr, n)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_INVALID; \ + } \ if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ ptr += n; \ tok = XML_TOK_NAME; \ @@ -1210,7 +1214,9 @@ PREFIX(attributeValueTok)(const ENCODING while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ - case BT_LEAD ## n: ptr += n; break; + case BT_LEAD ## n: \ + ptr += n; /* NOTE: The encoding has already been validated. */ \ + break; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) #undef LEAD_CASE case BT_AMP: @@ -1268,7 +1274,9 @@ PREFIX(entityValueTok)(const ENCODING *e while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ - case BT_LEAD ## n: ptr += n; break; + case BT_LEAD ## n: \ + ptr += n; /* NOTE: The encoding has already been validated. */ \ + break; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) #undef LEAD_CASE case BT_AMP: @@ -1447,7 +1455,8 @@ PREFIX(getAtts)(const ENCODING *enc, con state = inName; \ } #define LEAD_CASE(n) \ - case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break; + case BT_LEAD ## n: /* NOTE: The encoding has already been validated. */ \ + START_NAME ptr += (n - MINBPC(enc)); break; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) #undef LEAD_CASE case BT_NONASCII: @@ -1702,8 +1711,10 @@ PREFIX(nameLength)(const ENCODING *enc, for (;;) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ - case BT_LEAD ## n: ptr += n; break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) + case BT_LEAD ## n: \ + ptr += n; /* NOTE: The encoding has already been validated. */ \ + break; + LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) #undef LEAD_CASE case BT_NONASCII: case BT_NMSTRT: @@ -1748,7 +1759,7 @@ PREFIX(updatePosition)(const ENCODING *e switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: \ - ptr += n; \ + ptr += n; /* NOTE: The encoding has already been validated. */ \ break; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) #undef LEAD_CASE Index: expat-2.1.0/tests/runtests.c =================================================================== --- expat-2.1.0.orig/tests/runtests.c +++ expat-2.1.0/tests/runtests.c @@ -13,6 +13,7 @@ #include <stdio.h> #include <string.h> #include <stdint.h> +#include <stdbool.h> #include <limits.h> // INT_MAX #include "expat.h" @@ -29,6 +30,29 @@ #define XML_FMT_INT_MOD "l" #endif +#ifdef XML_UNICODE_WCHAR_T +# define XML_FMT_CHAR "lc" +# define XML_FMT_STR "ls" +# include <wchar.h> +# define xcstrlen(s) wcslen(s) +# define xcstrcmp(s, t) wcscmp((s), (t)) +# define xcstrncmp(s, t, n) wcsncmp((s), (t), (n)) +# define XCS(s) _XCS(s) +# define _XCS(s) L ## s +#else +# ifdef XML_UNICODE +# error "No support for UTF-16 character without wchar_t in tests" +# else +# define XML_FMT_CHAR "c" +# define XML_FMT_STR "s" +# define xcstrlen(s) strlen(s) +# define xcstrcmp(s, t) strcmp((s), (t)) +# define xcstrncmp(s, t, n) strncmp((s), (t), (n)) +# define XCS(s) s +# endif /* XML_UNICODE */ +#endif /* XML_UNICODE_WCHAR_T */ + + static XML_Parser parser; @@ -149,6 +173,9 @@ dummy_start_element(void *userData, const XML_Char *name, const XML_Char **atts) {} +static void XMLCALL +dummy_end_element(void *userData, const XML_Char *name) +{} /* * Character & encoding tests. @@ -320,6 +347,8 @@ START_TEST(test_utf8_false_rejection) } END_TEST + + /* Regression test for SF bug #477667. This test assures that any 8-bit character followed by a 7-bit character will not be mistakenly interpreted as a valid UTF-8 @@ -346,6 +375,105 @@ START_TEST(test_illegal_utf8) } END_TEST +START_TEST(test_utf8_in_start_tags) { + struct test_case { + bool goodName; + bool goodNameStart; + const char *tagName; + }; + + // The idea with the tests below is this: + // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences + // go to isNever and are hence not a concern. + // + // We start with a character that is a valid name character + // (or even name-start character, see XML 1.0r4 spec) and then we flip + // single bits at places where (1) the result leaves the UTF-8 encoding space + // and (2) we stay in the same n-byte sequence family. + // + // The flipped bits are highlighted in angle brackets in comments, + // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped + // the most significant bit to 1 to leave UTF-8 encoding space. + struct test_case cases[] = { + // 1-byte UTF-8: [0xxx xxxx] + {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':' + {false, false, "\xBA"}, // [<1>011 1010] + {true, false, "\x39"}, // [0011 1001] = ASCII nine '9' + {false, false, "\xB9"}, // [<1>011 1001] + + // 2-byte UTF-8: [110x xxxx] [10xx xxxx] + {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] = + // Arabic small waw U+06E5 + {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101] + {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101] + {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101] + {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] = + // combining char U+0301 + {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001] + {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001] + {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001] + + // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx] + {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] = + // Devanagari Letter A U+0905 + {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101] + {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101] + {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101] + {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101] + {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101] + {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] = + // combining char U+0901 + {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001] + {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001] + {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001] + {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001] + {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001] + }; + const bool atNameStart[] = {true, false}; + + size_t i = 0; + char doc[1024]; + size_t failCount = 0; + + for (; i < sizeof(cases) / sizeof(cases[0]); i++) { + size_t j = 0; + for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) { + const bool expectedSuccess + = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName; + sprintf(doc, "<%s%s><!--", atNameStart[j] ? "" : "a", cases[i].tagName); + XML_Parser parser = XML_ParserCreate(NULL); + + const enum XML_Status status + = XML_Parse(parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE); + + bool success = true; + if ((status == XML_STATUS_OK) != expectedSuccess) { + success = false; + } + if ((status == XML_STATUS_ERROR) + && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) { + success = false; + } + + if (! success) { + fprintf( + stderr, + "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n", + (unsigned)i + 1u, atNameStart[j] ? " " : "not ", + (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser)); + failCount++; + } + + XML_ParserFree(parser); + } + } + + if (failCount > 0) { + fail("UTF-8 regression detected"); + } +} +END_TEST + START_TEST(test_utf16) { /* <?xml version="1.0" encoding="UTF-16"?> @@ -1138,6 +1266,15 @@ START_TEST(test_suspend_parser_between_c END_TEST +START_TEST(test_bad_doctype_utf8) { + char *text = "<!DOCTYPE \xDB\x25" + "doc><doc/>"; // [1101 1011] [<0>010 0101] + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Invalid UTF-8 in DOCTYPE not faulted"); +} +END_TEST + + /* * Namespaces tests. */ @@ -1618,6 +1755,7 @@ make_suite(void) tcase_add_test(tc_basic, test_bom_utf16_be); tcase_add_test(tc_basic, test_bom_utf16_le); tcase_add_test(tc_basic, test_illegal_utf8); + tcase_add_test(tc_basic, test_utf8_in_start_tags); tcase_add_test(tc_basic, test_utf16); tcase_add_test(tc_basic, test_utf16_le_epilog_newline); tcase_add_test(tc_basic, test_latin1_umlauts); @@ -1657,6 +1795,7 @@ make_suite(void) #ifdef XML_DTD tcase_add_test(tc_basic, test_misc_deny_internal_entity_closing_doctype_issue_317); #endif + tcase_add_test(tc_basic, test_bad_doctype_utf8); suite_add_tcase(s, tc_namespace); tcase_add_checked_fixture(tc_namespace,
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor