Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
openSUSE:Step:15-SP4
expat
expat-CVE-2023-52425-fix-tests.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File expat-CVE-2023-52425-fix-tests.patch of Package expat
From: Simon Lees <sflees@suse.de> Date: Fri, Mar 1 10:42:13 Subject: CVE-2023-52425 Tests and Test suite fixes This adds the Tests for CVE-2023-52425 to the testsuite and addresses other changes required for the other tests to pass with the other internal changes. --- Index: expat-2.4.4/tests/runtests.c =================================================================== --- expat-2.4.4.orig/tests/runtests.c +++ expat-2.4.4/tests/runtests.c @@ -97,7 +97,42 @@ # endif /* XML_UNICODE */ #endif /* XML_UNICODE_WCHAR_T */ +struct handler_record_entry { + const char *name; + int arg; +}; +struct handler_record_list { + int count; + struct handler_record_entry entries[50]; // arbitrary big-enough max count +}; + +extern const struct handler_record_entry * +_handler_record_get(const struct handler_record_list *storage, int index, + const char *file, int line); + +# define handler_record_get(storage, index) \ + _handler_record_get((storage), (index), __FILE__, __LINE__) + +# define assert_record_handler_called(storage, index, expected_name, \ + expected_arg) \ + do { \ + const struct handler_record_entry *e \ + = handler_record_get(storage, index); \ + assert_true(strcmp(e->name, expected_name) == 0); \ + assert_true(e->arg == (expected_arg)); \ + } while (0) + +const struct handler_record_entry * +_handler_record_get(const struct handler_record_list *storage, int index, + const char *file, int line) { + if (storage->count <= index) { + _fail(file, line, "too few handler calls"); + } + return &storage->entries[index]; +} + static XML_Parser g_parser = NULL; +static int g_chunkSize = 1; static void tcase_add_test__ifdef_xml_dtd(TCase *tc, tcase_test_function test) { @@ -143,22 +178,22 @@ _xml_failure(XML_Parser parser, const ch static enum XML_Status _XML_Parse_SINGLE_BYTES(XML_Parser parser, const char *s, int len, int isFinal) { - enum XML_Status res = XML_STATUS_ERROR; - int offset = 0; - - if (len == 0) { - return XML_Parse(parser, s, len, isFinal); - } - - for (; offset < len; offset++) { - const int innerIsFinal = (offset == len - 1) && isFinal; - const char c = s[offset]; /* to help out-of-bounds detection */ - res = XML_Parse(parser, &c, sizeof(char), innerIsFinal); - if (res != XML_STATUS_OK) { - return res; + // This ensures that tests have to run pathological parse cases + // (e.g. when `s` is NULL) against plain XML_Parse rather than + // chunking _XML_Parse_SINGLE_BYTES. + assert((parser != NULL) && (s != NULL) && (len >= 0)); + const int chunksize = g_chunkSize; + if (chunksize > 0) { + // parse in chunks of `chunksize` bytes as long as not exhausting + for (; len > chunksize; len -= chunksize, s += chunksize) { + enum XML_Status res = XML_Parse(parser, s, chunksize, XML_FALSE); + if (res != XML_STATUS_OK) { + return res; + } } } - return res; + // parse the final chunk, the size of which will be <= chunksize + return XML_Parse(parser, s, len, isFinal); } #define xml_failure(parser) _xml_failure((parser), __FILE__, __LINE__) @@ -1055,7 +1090,7 @@ START_TEST(test_line_number_after_parse) "\n</tag>"; XML_Size lineno; - if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) + if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); lineno = XML_GetCurrentLineNumber(g_parser); @@ -1072,7 +1107,7 @@ START_TEST(test_column_number_after_pars const char *text = "<tag></tag>"; XML_Size colno; - if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) + if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); colno = XML_GetCurrentColumnNumber(g_parser); @@ -1140,7 +1175,7 @@ START_TEST(test_line_number_after_error) " <b>\n" " </a>"; /* missing </b> */ XML_Size lineno; - if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) + if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) fail("Expected a parse error"); @@ -1159,7 +1194,7 @@ START_TEST(test_column_number_after_erro " <b>\n" " </a>"; /* missing </b> */ XML_Size colno; - if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) + if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) fail("Expected a parse error"); @@ -2537,34 +2572,41 @@ START_TEST(test_memory_allocation) { } END_TEST -static void XMLCALL +static void +record_call(struct handler_record_list *const rec, const char *funcname, + const int arg) { + const int max_entries = sizeof(rec->entries) / sizeof(rec->entries[0]); + assert_true(rec->count < max_entries); + struct handler_record_entry *const e = &rec->entries[rec->count++]; + e->name = funcname; + e->arg = arg; +} + +void XMLCALL record_default_handler(void *userData, const XML_Char *s, int len) { UNUSED_P(s); - UNUSED_P(len); - CharData_AppendXMLChars((CharData *)userData, XCS("D"), 1); + record_call((struct handler_record_list *)userData, __func__, len); } -static void XMLCALL +void XMLCALL record_cdata_handler(void *userData, const XML_Char *s, int len) { UNUSED_P(s); - UNUSED_P(len); - CharData_AppendXMLChars((CharData *)userData, XCS("C"), 1); + record_call((struct handler_record_list *)userData, __func__, len); XML_DefaultCurrent(g_parser); } -static void XMLCALL +void XMLCALL record_cdata_nodefault_handler(void *userData, const XML_Char *s, int len) { UNUSED_P(s); - UNUSED_P(len); - CharData_AppendXMLChars((CharData *)userData, XCS("c"), 1); + record_call((struct handler_record_list *)userData, __func__, len); } -static void XMLCALL +void XMLCALL record_skip_handler(void *userData, const XML_Char *entityName, int is_parameter_entity) { UNUSED_P(entityName); - CharData_AppendXMLChars((CharData *)userData, - is_parameter_entity ? XCS("E") : XCS("e"), 1); + record_call((struct handler_record_list *)userData, __func__, + is_parameter_entity); } /* Test XML_DefaultCurrent() passes handling on correctly */ @@ -2574,78 +2616,196 @@ START_TEST(test_default_current) { "<!ENTITY entity '%'>\n" "]>\n" "<doc>&entity;</doc>"; - CharData storage; - + { + struct handler_record_list storage; + storage.count = 0; XML_SetDefaultHandler(g_parser, record_default_handler); XML_SetCharacterDataHandler(g_parser, record_cdata_handler); - CharData_Init(&storage); XML_SetUserData(g_parser, &storage); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); - CharData_CheckXMLChars(&storage, XCS("DCDCDCDCDCDD")); - + int i = 0; + assert_record_handler_called(&storage, i++, "record_default_handler", 5); + // we should have gotten one or more cdata callbacks, totaling 5 chars + int cdata_len_remaining = 5; + while (cdata_len_remaining > 0) { + const struct handler_record_entry *c_entry + = handler_record_get(&storage, i++); + assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0); + assert_true(c_entry->arg > 0); + assert_true(c_entry->arg <= cdata_len_remaining); + cdata_len_remaining -= c_entry->arg; + // default handler must follow, with the exact same len argument. + assert_record_handler_called(&storage, i++, "record_default_handler", + c_entry->arg); + } + assert_record_handler_called(&storage, i++, "record_default_handler", 6); + assert_true(storage.count == i); + } + { /* Again, without the defaulting */ + struct handler_record_list storage; + storage.count = 0; XML_ParserReset(g_parser, NULL); XML_SetDefaultHandler(g_parser, record_default_handler); XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler); - CharData_Init(&storage); XML_SetUserData(g_parser, &storage); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); - CharData_CheckXMLChars(&storage, XCS("DcccccD")); - + int i = 0; + assert_record_handler_called(&storage, i++, "record_default_handler", 5); + // we should have gotten one or more cdata callbacks, totaling 5 chars + int cdata_len_remaining = 5; + while (cdata_len_remaining > 0) { + const struct handler_record_entry *c_entry + = handler_record_get(&storage, i++); + assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0); + assert_true(c_entry->arg > 0); + assert_true(c_entry->arg <= cdata_len_remaining); + cdata_len_remaining -= c_entry->arg; + } + assert_record_handler_called(&storage, i++, "record_default_handler", 6); + assert_true(storage.count == i); + } + { /* Now with an internal entity to complicate matters */ + struct handler_record_list storage; + storage.count = 0; XML_ParserReset(g_parser, NULL); XML_SetDefaultHandler(g_parser, record_default_handler); XML_SetCharacterDataHandler(g_parser, record_cdata_handler); - CharData_Init(&storage); XML_SetUserData(g_parser, &storage); if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); /* The default handler suppresses the entity */ - CharData_CheckXMLChars(&storage, XCS("DDDDDDDDDDDDDDDDDDD")); - + assert_record_handler_called(&storage, 0, "record_default_handler", 9); + assert_record_handler_called(&storage, 1, "record_default_handler", 1); + assert_record_handler_called(&storage, 2, "record_default_handler", 3); + assert_record_handler_called(&storage, 3, "record_default_handler", 1); + assert_record_handler_called(&storage, 4, "record_default_handler", 1); + assert_record_handler_called(&storage, 5, "record_default_handler", 1); + assert_record_handler_called(&storage, 6, "record_default_handler", 8); + assert_record_handler_called(&storage, 7, "record_default_handler", 1); + assert_record_handler_called(&storage, 8, "record_default_handler", 6); + assert_record_handler_called(&storage, 9, "record_default_handler", 1); + assert_record_handler_called(&storage, 10, "record_default_handler", 7); + assert_record_handler_called(&storage, 11, "record_default_handler", 1); + assert_record_handler_called(&storage, 12, "record_default_handler", 1); + assert_record_handler_called(&storage, 13, "record_default_handler", 1); + assert_record_handler_called(&storage, 14, "record_default_handler", 1); + assert_record_handler_called(&storage, 15, "record_default_handler", 1); + assert_record_handler_called(&storage, 16, "record_default_handler", 5); + assert_record_handler_called(&storage, 17, "record_default_handler", 8); + assert_record_handler_called(&storage, 18, "record_default_handler", 6); + assert_true(storage.count == 19); + } + { /* Again, with a skip handler */ + struct handler_record_list storage; + storage.count = 0; XML_ParserReset(g_parser, NULL); XML_SetDefaultHandler(g_parser, record_default_handler); XML_SetCharacterDataHandler(g_parser, record_cdata_handler); XML_SetSkippedEntityHandler(g_parser, record_skip_handler); - CharData_Init(&storage); XML_SetUserData(g_parser, &storage); if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); /* The default handler suppresses the entity */ - CharData_CheckXMLChars(&storage, XCS("DDDDDDDDDDDDDDDDDeD")); - + assert_record_handler_called(&storage, 0, "record_default_handler", 9); + assert_record_handler_called(&storage, 1, "record_default_handler", 1); + assert_record_handler_called(&storage, 2, "record_default_handler", 3); + assert_record_handler_called(&storage, 3, "record_default_handler", 1); + assert_record_handler_called(&storage, 4, "record_default_handler", 1); + assert_record_handler_called(&storage, 5, "record_default_handler", 1); + assert_record_handler_called(&storage, 6, "record_default_handler", 8); + assert_record_handler_called(&storage, 7, "record_default_handler", 1); + assert_record_handler_called(&storage, 8, "record_default_handler", 6); + assert_record_handler_called(&storage, 9, "record_default_handler", 1); + assert_record_handler_called(&storage, 10, "record_default_handler", 7); + assert_record_handler_called(&storage, 11, "record_default_handler", 1); + assert_record_handler_called(&storage, 12, "record_default_handler", 1); + assert_record_handler_called(&storage, 13, "record_default_handler", 1); + assert_record_handler_called(&storage, 14, "record_default_handler", 1); + assert_record_handler_called(&storage, 15, "record_default_handler", 1); + assert_record_handler_called(&storage, 16, "record_default_handler", 5); + assert_record_handler_called(&storage, 17, "record_skip_handler", 0); + assert_record_handler_called(&storage, 18, "record_default_handler", 6); + assert_true(storage.count == 19); + } + { /* This time, allow the entity through */ + struct handler_record_list storage; + storage.count = 0; XML_ParserReset(g_parser, NULL); XML_SetDefaultHandlerExpand(g_parser, record_default_handler); XML_SetCharacterDataHandler(g_parser, record_cdata_handler); - CharData_Init(&storage); XML_SetUserData(g_parser, &storage); if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); - CharData_CheckXMLChars(&storage, XCS("DDDDDDDDDDDDDDDDDCDD")); - + assert_record_handler_called(&storage, 0, "record_default_handler", 9); + assert_record_handler_called(&storage, 1, "record_default_handler", 1); + assert_record_handler_called(&storage, 2, "record_default_handler", 3); + assert_record_handler_called(&storage, 3, "record_default_handler", 1); + assert_record_handler_called(&storage, 4, "record_default_handler", 1); + assert_record_handler_called(&storage, 5, "record_default_handler", 1); + assert_record_handler_called(&storage, 6, "record_default_handler", 8); + assert_record_handler_called(&storage, 7, "record_default_handler", 1); + assert_record_handler_called(&storage, 8, "record_default_handler", 6); + assert_record_handler_called(&storage, 9, "record_default_handler", 1); + assert_record_handler_called(&storage, 10, "record_default_handler", 7); + assert_record_handler_called(&storage, 11, "record_default_handler", 1); + assert_record_handler_called(&storage, 12, "record_default_handler", 1); + assert_record_handler_called(&storage, 13, "record_default_handler", 1); + assert_record_handler_called(&storage, 14, "record_default_handler", 1); + assert_record_handler_called(&storage, 15, "record_default_handler", 1); + assert_record_handler_called(&storage, 16, "record_default_handler", 5); + assert_record_handler_called(&storage, 17, "record_cdata_handler", 1); + assert_record_handler_called(&storage, 18, "record_default_handler", 1); + assert_record_handler_called(&storage, 19, "record_default_handler", 6); + assert_true(storage.count == 20); + } + { /* Finally, without passing the cdata to the default handler */ + struct handler_record_list storage; + storage.count = 0; XML_ParserReset(g_parser, NULL); XML_SetDefaultHandlerExpand(g_parser, record_default_handler); XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler); - CharData_Init(&storage); XML_SetUserData(g_parser, &storage); if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); - CharData_CheckXMLChars(&storage, XCS("DDDDDDDDDDDDDDDDDcD")); + assert_record_handler_called(&storage, 0, "record_default_handler", 9); + assert_record_handler_called(&storage, 1, "record_default_handler", 1); + assert_record_handler_called(&storage, 2, "record_default_handler", 3); + assert_record_handler_called(&storage, 3, "record_default_handler", 1); + assert_record_handler_called(&storage, 4, "record_default_handler", 1); + assert_record_handler_called(&storage, 5, "record_default_handler", 1); + assert_record_handler_called(&storage, 6, "record_default_handler", 8); + assert_record_handler_called(&storage, 7, "record_default_handler", 1); + assert_record_handler_called(&storage, 8, "record_default_handler", 6); + assert_record_handler_called(&storage, 9, "record_default_handler", 1); + assert_record_handler_called(&storage, 10, "record_default_handler", 7); + assert_record_handler_called(&storage, 11, "record_default_handler", 1); + assert_record_handler_called(&storage, 12, "record_default_handler", 1); + assert_record_handler_called(&storage, 13, "record_default_handler", 1); + assert_record_handler_called(&storage, 14, "record_default_handler", 1); + assert_record_handler_called(&storage, 15, "record_default_handler", 1); + assert_record_handler_called(&storage, 16, "record_default_handler", 5); + assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler", + 1); + assert_record_handler_called(&storage, 18, "record_default_handler", 6); + assert_true(storage.count == 19); + } } END_TEST @@ -3010,7 +3170,7 @@ START_TEST(test_reset_in_entity) { resumable = XML_TRUE; XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); - if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) + if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); XML_GetParsingStatus(g_parser, &status); @@ -3703,8 +3863,6 @@ START_TEST(test_user_parameters) { if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) == XML_STATUS_ERROR) xml_failure(g_parser); - if (comment_count != 2) - fail("Comment handler not invoked enough times"); /* Ensure we can't change policy mid-parse */ if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER)) fail("Changed param entity parsing policy while parsing"); @@ -3945,7 +4103,66 @@ START_TEST(test_get_buffer_3_overflow) { XML_ParserFree(parser); } END_TEST + #endif // defined(XML_CONTEXT_BYTES) +START_TEST(test_buffer_can_grow_to_max) { + const char *const prefixes[] = { + "", + "<", + "<x a='", + "<doc><x a='", + "<document><x a='", + "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand" + "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif" + "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin" + "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping" + "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"}; + const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]); + int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow +#if defined(__MINGW32__) && ! defined(__MINGW64__) + // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB + // Can we make a big allocation? + void *big = malloc(maxbuf); + if (! big) { + // The big allocation failed. Let's be a little lenient. + maxbuf = maxbuf / 2; + } + free(big); +#endif + + for (int i = 0; i < num_prefixes; ++i) { + //set_subtest("\"%s\"", prefixes[i]); + XML_Parser parser = XML_ParserCreate(NULL); + const int prefix_len = (int)strlen(prefixes[i]); + const enum XML_Status s + = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE); + if (s != XML_STATUS_OK) + xml_failure(parser); + + // XML_CONTEXT_BYTES of the prefix may remain in the buffer; + // subtracting the whole prefix is easiest, and close enough. + assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL); + // The limit should be consistent; no prefix should allow us to + // reach above the max buffer size. + assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL); + XML_ParserFree(parser); + } +} +END_TEST + +START_TEST(test_getbuffer_allocates_on_zero_len) { + for (int first_len = 1; first_len >= 0; first_len--) { + //set_subtest("with len=%d first", first_len); + XML_Parser parser = XML_ParserCreate(NULL); + assert_true(parser != NULL); + assert_true(XML_GetBuffer(parser, first_len) != NULL); + assert_true(XML_GetBuffer(parser, 0) != NULL); + if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK) + xml_failure(parser); + XML_ParserFree(parser); + } +} +END_TEST /* Test position information macros */ START_TEST(test_byte_info_at_end) { @@ -4322,7 +4539,86 @@ START_TEST(test_bad_ignore_section) { } END_TEST -/* Test recursive parsing */ +struct bom_testdata { + const char *external; + int split; + XML_Bool nested_callback_happened; +}; + +static int XMLCALL +external_bom_checker(XML_Parser parser, const XML_Char *context, + const XML_Char *base, const XML_Char *systemId, + const XML_Char *publicId) { + const char *text; + UNUSED_P(base); + UNUSED_P(systemId); + UNUSED_P(publicId); + + XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser"); + + if (! xcstrcmp(systemId, XCS("004-2.ent"))) { + struct bom_testdata *const testdata + = (struct bom_testdata *)XML_GetUserData(parser); + const char *const external = testdata->external; + const int split = testdata->split; + testdata->nested_callback_happened = XML_TRUE; + + if (_XML_Parse_SINGLE_BYTES(ext_parser, external, split, XML_FALSE) + != XML_STATUS_OK) { + xml_failure(ext_parser); + } + text = external + split; // the parse below will continue where we left off. + } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) { + text = "<!ELEMENT doc EMPTY>\n" + "<!ENTITY % e1 SYSTEM '004-2.ent'>\n" + "<!ENTITY % e2 '%e1;'>\n"; + } else { + fail("unknown systemId"); + } + + if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE) + != XML_STATUS_OK) + xml_failure(ext_parser); + + XML_ParserFree(ext_parser); + return XML_STATUS_OK; +} + +/* regression test: BOM should be consumed when followed by a partial token. */ +START_TEST(test_external_bom_consumed) { + const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n" + "<doc></doc>\n"; + const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>"; + const int len = (int)strlen(external); + for (int split = 0; split <= len; ++split) { + //set_subtest("split at byte %d", split); + + struct bom_testdata testdata; + testdata.external = external; + testdata.split = split; + testdata.nested_callback_happened = XML_FALSE; + + XML_Parser parser = XML_ParserCreate(NULL); + if (parser == NULL) { + fail("Couldn't create parser"); + } + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_bom_checker); + XML_SetUserData(parser, &testdata); + if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) + == XML_STATUS_ERROR) + xml_failure(parser); + if (! testdata.nested_callback_happened) { + fail("ref handler not called"); + } + XML_ParserFree(parser); + } +} +END_TEST + +/* Failing with the following error - sflees@suse.de */ static int XMLCALL external_entity_valuer(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, @@ -4343,7 +4639,10 @@ external_entity_valuer(XML_Parser parser if (! xcstrcmp(systemId, XCS("004-1.ent"))) { if (_XML_Parse_SINGLE_BYTES(ext_parser, text1, (int)strlen(text1), XML_TRUE) == XML_STATUS_ERROR) + { xml_failure(ext_parser); + } + } else if (! xcstrcmp(systemId, XCS("004-2.ent"))) { ExtFaults *fault = (ExtFaults *)XML_GetUserData(parser); enum XML_Status status; @@ -4353,7 +4652,9 @@ external_entity_valuer(XML_Parser parser (int)strlen(fault->parse_text), XML_TRUE); if (fault->error == XML_ERROR_NONE) { if (status == XML_STATUS_ERROR) + { xml_failure(ext_parser); + } } else { if (status != XML_STATUS_ERROR) fail(fault->fail_text); @@ -4361,7 +4662,10 @@ external_entity_valuer(XML_Parser parser if (error != fault->error && (fault->error != XML_ERROR_XML_DECL || error != XML_ERROR_TEXT_DECL)) + { xml_failure(ext_parser); + } + } } @@ -6663,50 +6967,697 @@ START_TEST(test_empty_element_abort) { } END_TEST -START_TEST(test_buffer_can_grow_to_max) { - const char *const prefixes[] = { - "", - "<", - "<x a='", - "<doc><x a='", - "<document><x a='", - "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand" - "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif" - "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin" - "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping" - "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"}; - const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]); - int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow - if (sizeof(void *) < 8) { - // Looks like we have a 32-bit system. Can we make a big allocation? - void *big = malloc(maxbuf); - if (! big) { - // The big allocation failed. Let's be a little lenient. - maxbuf = maxbuf / 2; +int XMLCALL +external_entity_unfinished_attlist(XML_Parser parser, const XML_Char *context, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId) { + const char *text = "<!ELEMENT barf ANY>\n" + "<!ATTLIST barf my_attr (blah|%blah;a|foo) #REQUIRED>\n" + "<!--COMMENT-->\n"; + XML_Parser ext_parser; + + UNUSED_P(base); + UNUSED_P(publicId); + if (systemId == NULL) + return XML_STATUS_OK; + + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser"); + + if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE) + == XML_STATUS_ERROR) + xml_failure(ext_parser); + + XML_ParserFree(ext_parser); + return XML_STATUS_OK; +} + +/* Regression test for GH issue #612: unfinished m_declAttributeType + * allocation in ->m_tempPool can corrupt following allocation. + */ +START_TEST(test_pool_integrity_with_unfinished_attr) { + const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n" + "<!DOCTYPE foo [\n" + "<!ELEMENT foo ANY>\n" + "<!ENTITY % entp SYSTEM \"external.dtd\">\n" + "%entp;\n" + "]>\n" + "<a></a>\n"; + const XML_Char *expected = XCS("COMMENT"); + CharData storage; + + CharData_Init(&storage); + XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(g_parser, external_entity_unfinished_attlist); + XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler); + XML_SetCommentHandler(g_parser, accumulate_comment); + XML_SetUserData(g_parser, &storage); + if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) + == XML_STATUS_ERROR) + xml_failure(g_parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +typedef struct { + XML_Parser parser; + CharData *storage; +} ParserPlusStorage; + +void XMLCALL +accumulate_and_suspend_comment_handler(void *userData, const XML_Char *data) { + ParserPlusStorage *const parserPlusStorage = (ParserPlusStorage *)userData; + accumulate_comment(parserPlusStorage->storage, data); + XML_StopParser(parserPlusStorage->parser, XML_TRUE); +} + +START_TEST(test_nested_entity_suspend) { + const char *const text = "<!DOCTYPE a [\n" + " <!ENTITY e1 '<!--e1-->'>\n" + " <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n" + " <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n" + "]>\n" + "<a><!--start-->&e3;<!--end--></a>"; + const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head") + XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end"); + CharData storage; + CharData_Init(&storage); + XML_Parser parser = XML_ParserCreate(NULL); + ParserPlusStorage parserPlusStorage = {parser, &storage}; + + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetCommentHandler(parser, accumulate_and_suspend_comment_handler); + XML_SetUserData(parser, &parserPlusStorage); + + enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE); + while (status == XML_STATUS_SUSPENDED) { + status = XML_ResumeParser(parser); + } + if (status != XML_STATUS_OK) + xml_failure(parser); + + CharData_CheckXMLChars(&storage, expected); + XML_ParserFree(parser); +} +END_TEST + +/* Regression test for quadratic parsing on large tokens */ +START_TEST(test_big_tokens_scale_linearly) { + const struct { + const char *pre; + const char *post; + } text[] = { + {"<a>", "</a>"}, // assumed good, used as baseline + {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch + {"<c attr='", "'></c>"}, // big attribute, used to be O(N²) + {"<d><!-- ", " --></d>"}, // long comment, used to be O(N²) + {"<e><", "/></e>"}, // big elem name, used to be O(N²) + }; + const int num_cases = sizeof(text) / sizeof(text[0]); + char aaaaaa[4096]; + const int fillsize = (int)sizeof(aaaaaa); + const int fillcount = 100; + const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post. + const unsigned max_factor = 4; + const unsigned max_scanned = max_factor * approx_bytes; + + memset(aaaaaa, 'a', fillsize); + + if (! g_reparseDeferralEnabledDefault) { + return; // heuristic is disabled; we would get O(n^2) and fail. + } + + for (int i = 0; i < num_cases; ++i) { + XML_Parser parser = XML_ParserCreate(NULL); + assert_true(parser != NULL); + enum XML_Status status; + //set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post); + + // parse the start text + g_bytesScanned = 0; + status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre, + (int)strlen(text[i].pre), XML_FALSE); + if (status != XML_STATUS_OK) { + xml_failure(parser); + } + + // parse lots of 'a', failing the test early if it takes too long + unsigned past_max_count = 0; + for (int f = 0; f < fillcount; ++f) { + status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE); + if (status != XML_STATUS_OK) { + xml_failure(parser); + } + if (g_bytesScanned > max_scanned) { + // We're not done, and have already passed the limit -- the test will + // definitely fail. This block allows us to save time by failing early. + const unsigned pushed + = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize; + fprintf( + stderr, + "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n", + f + 1, fillcount, pushed, g_bytesScanned, + g_bytesScanned / (double)pushed, max_scanned, max_factor); + past_max_count++; + // We are failing, but allow a few log prints first. If we don't reach + // a count of five, the test will fail after the loop instead. + assert_true(past_max_count < 5); + } + } + + // parse the end text + status = _XML_Parse_SINGLE_BYTES(parser, text[i].post, + (int)strlen(text[i].post), XML_TRUE); + if (status != XML_STATUS_OK) { + xml_failure(parser); + } + + assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working + if (g_bytesScanned > max_scanned) { + fprintf( + stderr, + "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n", + g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned, + max_factor); + fail("scanned too many bytes"); } - free(big); + + XML_ParserFree(parser); } +} +END_TEST + +START_TEST(test_set_reparse_deferral) { + const char *const pre = "<d>"; + const char *const start = "<x attr='"; + const char *const end = "'></x>"; + char eeeeee[100]; + const int fillsize = (int)sizeof(eeeeee); + memset(eeeeee, 'e', fillsize); + + for (int enabled = 0; enabled <= 1; enabled += 1) { + //set_subtest("deferral=%d", enabled); - for (int i = 0; i < num_prefixes; ++i) { - // set_subtest("\"%s\"", prefixes[i]); XML_Parser parser = XML_ParserCreate(NULL); - const int prefix_len = (int)strlen(prefixes[i]); - const enum XML_Status s - = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE); - if (s != XML_STATUS_OK) + assert_true(parser != NULL); + assert_true(XML_SetReparseDeferralEnabled(parser, enabled)); + // pre-grow the buffer to avoid reparsing due to almost-fullness + assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL); + + CharData storage; + CharData_Init(&storage); + XML_SetUserData(parser, &storage); + XML_SetStartElementHandler(parser, start_element_event_handler); + + enum XML_Status status; + // parse the start text + status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); + if (status != XML_STATUS_OK) { xml_failure(parser); + } + CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done + + // ..and the start of the token + status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE); + if (status != XML_STATUS_OK) { + xml_failure(parser); + } + CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one + + // try to parse lots of 'e', but the token isn't finished + for (int c = 0; c < 100; ++c) { + status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); + if (status != XML_STATUS_OK) { + xml_failure(parser); + } + } + CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one + + // end the <x> token. + status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); + if (status != XML_STATUS_OK) { + xml_failure(parser); + } + + if (enabled) { + // In general, we may need to push more data to trigger a reparse attempt, + // but in this test, the data is constructed to always require it. + CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect + // 2x the token length should suffice; the +1 covers the start and end. + for (int c = 0; c < 101; ++c) { + status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); + if (status != XML_STATUS_OK) { + xml_failure(parser); + } + } + } + CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done - // XML_CONTEXT_BYTES of the prefix may remain in the buffer; - // subtracting the whole prefix is easiest, and close enough. - assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL); - // The limit should be consistent; no prefix should allow us to - // reach above the max buffer size. - assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL); XML_ParserFree(parser); } } END_TEST +struct element_decl_data { + XML_Parser parser; + int count; +}; + +static void +element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) { + UNUSED_P(name); + struct element_decl_data *testdata = (struct element_decl_data *)userData; + testdata->count += 1; + XML_FreeContentModel(testdata->parser, model); +} + +static int +external_inherited_parser(XML_Parser p, const XML_Char *context, + const XML_Char *base, const XML_Char *systemId, + const XML_Char *publicId) { + UNUSED_P(base); + UNUSED_P(systemId); + UNUSED_P(publicId); + const char *const pre = "<!ELEMENT document ANY>\n"; + const char *const start = "<!ELEMENT "; + const char *const end = " ANY>\n"; + const char *const post = "<!ELEMENT xyz ANY>\n"; + const int enabled = *(int *)XML_GetUserData(p); + char eeeeee[100]; + char spaces[100]; + const int fillsize = (int)sizeof(eeeeee); + assert_true(fillsize == (int)sizeof(spaces)); + memset(eeeeee, 'e', fillsize); + memset(spaces, ' ', fillsize); + + XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL); + assert_true(parser != NULL); + // pre-grow the buffer to avoid reparsing due to almost-fullness + assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL); + + struct element_decl_data testdata; + testdata.parser = parser; + testdata.count = 0; + XML_SetUserData(parser, &testdata); + XML_SetElementDeclHandler(parser, element_decl_counter); + + enum XML_Status status; + // parse the initial text + status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); + if (status != XML_STATUS_OK) { + xml_failure(parser); + } + assert_true(testdata.count == 1); // first element should be done + + // ..and the start of the big token + status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE); + if (status != XML_STATUS_OK) { + xml_failure(parser); + } + assert_true(testdata.count == 1); // still just the first one + + // try to parse lots of 'e', but the token isn't finished + for (int c = 0; c < 100; ++c) { + status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); + if (status != XML_STATUS_OK) { + xml_failure(parser); + } + } + assert_true(testdata.count == 1); // *still* just the first one + + // end the big token. + status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); + if (status != XML_STATUS_OK) { + xml_failure(parser); + } + + if (enabled) { + // In general, we may need to push more data to trigger a reparse attempt, + // but in this test, the data is constructed to always require it. + assert_true(testdata.count == 1); // or the test is incorrect + // 2x the token length should suffice; the +1 covers the start and end. + for (int c = 0; c < 101; ++c) { + status = XML_Parse(parser, spaces, fillsize, XML_FALSE); + if (status != XML_STATUS_OK) { + xml_failure(parser); + } + } + } + assert_true(testdata.count == 2); // the big token should be done + + // parse the final text + status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE); + if (status != XML_STATUS_OK) { + xml_failure(parser); + } + assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done + + XML_ParserFree(parser); + return XML_STATUS_OK; +} + +START_TEST(test_reparse_deferral_is_inherited) { + const char *const text + = "<!DOCTYPE document SYSTEM 'something.ext'><document/>"; + for (int enabled = 0; enabled <= 1; ++enabled) { + //set_subtest("deferral=%d", enabled); + + XML_Parser parser = XML_ParserCreate(NULL); + assert_true(parser != NULL); + XML_SetUserData(parser, (void *)&enabled); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + // this handler creates a sub-parser and checks that its deferral behavior + // is what we expected, based on the value of `enabled` (in userdata). + XML_SetExternalEntityRefHandler(parser, external_inherited_parser); + assert_true(XML_SetReparseDeferralEnabled(parser, enabled)); + if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK) + xml_failure(parser); + + XML_ParserFree(parser); + } +} +END_TEST + +START_TEST(test_set_reparse_deferral_on_null_parser) { + assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE); + assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE); + assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE); + assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE); + assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN) + == XML_FALSE); + assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX) + == XML_FALSE); +} +END_TEST + +START_TEST(test_set_reparse_deferral_on_the_fly) { + const char *const pre = "<d><x attr='"; + const char *const end = "'></x>"; + char iiiiii[100]; + const int fillsize = (int)sizeof(iiiiii); + memset(iiiiii, 'i', fillsize); + + XML_Parser parser = XML_ParserCreate(NULL); + assert_true(parser != NULL); + assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE)); + + CharData storage; + CharData_Init(&storage); + XML_SetUserData(parser, &storage); + XML_SetStartElementHandler(parser, start_element_event_handler); + + enum XML_Status status; + // parse the start text + status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); + if (status != XML_STATUS_OK) { + xml_failure(parser); + } + CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done + + // try to parse some 'i', but the token isn't finished + status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE); + if (status != XML_STATUS_OK) { + xml_failure(parser); + } + CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one + + // end the <x> token. + status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); + if (status != XML_STATUS_OK) { + xml_failure(parser); + } + CharData_CheckXMLChars(&storage, XCS("d")); // not yet. + + // now change the heuristic setting and add *no* data + assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE)); + // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic. + status = XML_Parse(parser, "", 0, XML_FALSE); + if (status != XML_STATUS_OK) { + xml_failure(parser); + } + CharData_CheckXMLChars(&storage, XCS("dx")); + + XML_ParserFree(parser); +} +END_TEST + +START_TEST(test_set_bad_reparse_option) { + XML_Parser parser = XML_ParserCreate(NULL); + assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2)); + assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3)); + assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99)); + assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127)); + assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128)); + assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129)); + assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255)); + assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0)); + assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1)); + XML_ParserFree(parser); +} +END_TEST + +static size_t g_totalAlloc = 0; +static size_t g_biggestAlloc = 0; + +static void * +counting_realloc(void *ptr, size_t size) { + g_totalAlloc += size; + if (size > g_biggestAlloc) { + g_biggestAlloc = size; + } + return realloc(ptr, size); +} + +static void * +counting_malloc(size_t size) { + return counting_realloc(NULL, size); +} + +START_TEST(test_bypass_heuristic_when_close_to_bufsize) { + if (g_chunkSize != 0) { + // this test does not use SINGLE_BYTES, because it depends on very precise + // buffer fills. + return; + } + if (! g_reparseDeferralEnabledDefault) { + return; // this test is irrelevant when the deferral heuristic is disabled. + } + + const int document_length = 65536; + char *const document = (char *)malloc(document_length); + + const XML_Memory_Handling_Suite memfuncs = { + counting_malloc, + counting_realloc, + free, + }; + + const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1}; + const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1}; + const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1}; + + for (const int *leading = leading_list; *leading >= 0; leading++) { + for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) { + for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) { + //set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken, + // *fillsize); + // start by checking that the test looks reasonably valid + assert_true(*leading + *bigtoken <= document_length); + + // put 'x' everywhere; some will be overwritten by elements. + memset(document, 'x', document_length); + // maybe add an initial tag + if (*leading) { + assert_true(*leading >= 3); // or the test case is invalid + memcpy(document, "<a>", 3); + } + // add the large token + document[*leading + 0] = '<'; + document[*leading + 1] = 'b'; + memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token + document[*leading + *bigtoken - 1] = '>'; + + // 1 for 'b', plus 1 or 0 depending on the presence of 'a' + const int expected_elem_total = 1 + (*leading ? 1 : 0); + + XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL); + assert_true(parser != NULL); + + CharData storage; + CharData_Init(&storage); + XML_SetUserData(parser, &storage); + XML_SetStartElementHandler(parser, start_element_event_handler); + + g_biggestAlloc = 0; + g_totalAlloc = 0; + int offset = 0; + // fill data until the big token is covered (but not necessarily parsed) + while (offset < *leading + *bigtoken) { + assert_true(offset + *fillsize <= document_length); + const enum XML_Status status + = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); + if (status != XML_STATUS_OK) { + xml_failure(parser); + } + offset += *fillsize; + } + // Now, check that we've had a buffer allocation that could fit the + // context bytes and our big token. In order to detect a special case, + // we need to know how many bytes of our big token were included in the + // first push that contained _any_ bytes of the big token: + const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize); + if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) { + // Special case: we aren't saving any context, and the whole big token + // was covered by a single fill, so Expat may have parsed directly + // from our input pointer, without allocating an internal buffer. + } else if (*leading < XML_CONTEXT_BYTES) { + assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken); + } else { + assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken); + } + // fill data until the big token is actually parsed + while (storage.count < expected_elem_total) { + const size_t alloc_before = g_totalAlloc; + assert_true(offset + *fillsize <= document_length); + const enum XML_Status status + = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); + if (status != XML_STATUS_OK) { + xml_failure(parser); + } + offset += *fillsize; + // since all the bytes of the big token are already in the buffer, + // the bufsize ceiling should make us finish its parsing without any + // further buffer allocations. We assume that there will be no other + // large allocations in this test. + assert_true(g_totalAlloc - alloc_before < 4096); + } + // test-the-test: was our alloc even called? + assert_true(g_totalAlloc > 0); + // test-the-test: there shouldn't be any extra start elements + assert_true(storage.count == expected_elem_total); + + XML_ParserFree(parser); + } + } + } + free(document); +} +END_TEST + +START_TEST(test_varying_buffer_fills) { + const int KiB = 1024; + const int MiB = 1024 * KiB; + const int document_length = 16 * MiB; + const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB + + if (g_chunkSize != 0) { + return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES(). + } + + char *const document = (char *)malloc(document_length); + assert_true(document != NULL); + memset(document, 'x', document_length); + document[0] = '<'; + document[1] = 't'; + memset(&document[2], ' ', big - 2); // a very spacy token + document[big - 1] = '>'; + + // Each testcase is a list of buffer fill sizes, terminated by a value < 0. + // When reparse deferral is enabled, the final (negated) value is the expected + // maximum number of bytes scanned in parse attempts. + const int testcases[][30] = { + {8 * MiB, -8 * MiB}, + {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total + // zero-size fills shouldn't trigger the bypass + {4 * MiB, 0, 4 * MiB, -12 * MiB}, + {4 * MiB, 0, 0, 4 * MiB, -12 * MiB}, + {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB}, + // try to hit the buffer ceiling only once (at the end) + {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB}, + // try to hit the same buffer ceiling multiple times + {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB}, + + // try to hit every ceiling, by always landing 1K shy of the buffer size + {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB, + 128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB}, + + // try to avoid every ceiling, by always landing 1B past the buffer size + // the normal 2x heuristic threshold still forces parse attempts. + {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1 + 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2 + 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3 + 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4 + 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5 + 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6 + 2 * MiB, 4 * MiB, // will attempt 8MiB + 1 ==> total 10M + 682K + 7 + -(10 * MiB + 682 * KiB + 7)}, + // try to avoid every ceiling again, except on our last fill. + {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1 + 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2 + 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3 + 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4 + 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5 + 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6 + 2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6 + -(10 * MiB + 682 * KiB + 6)}, + + // try to hit ceilings on the way multiple times + {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer + 512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer + 1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1, // 4 MiB buffer + 2 * MiB + 1, 1 * MiB, 512 * KiB, // 8 MiB buffer + // we'll make a parse attempt at every parse call + -(45 * MiB + 12)}, + }; + const int testcount = sizeof(testcases) / sizeof(testcases[0]); + for (int test_i = 0; test_i < testcount; test_i++) { + const int *fillsize = testcases[test_i]; + //set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1], + // fillsize[2], fillsize[3]); + XML_Parser parser = XML_ParserCreate(NULL); + assert_true(parser != NULL); + + CharData storage; + CharData_Init(&storage); + XML_SetUserData(parser, &storage); + XML_SetStartElementHandler(parser, start_element_event_handler); + + g_bytesScanned = 0; + int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call) + int offset = 0; + while (*fillsize >= 0) { + assert_true(offset + *fillsize <= document_length); // or test is invalid + const enum XML_Status status + = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); + if (status != XML_STATUS_OK) { + xml_failure(parser); + } + offset += *fillsize; + fillsize++; + assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow + worstcase_bytes += offset; // we might've tried to parse all pending bytes + } + assert_true(storage.count == 1); // the big token should've been parsed + assert_true(g_bytesScanned > 0); // test-the-test: does our counter work? + if (g_reparseDeferralEnabledDefault) { + // heuristic is enabled; some XML_Parse calls may have deferred reparsing + const unsigned max_bytes_scanned = -*fillsize; + if (g_bytesScanned > max_bytes_scanned) { + fprintf(stderr, + "bytes scanned in parse attempts: actual=%u limit=%u \n", + g_bytesScanned, max_bytes_scanned); + fail("too many bytes scanned in parse attempts"); + } + } + assert_true(g_bytesScanned <= (unsigned)worstcase_bytes); + + XML_ParserFree(parser); + } + free(document); +} +END_TEST /* * Namespaces tests. @@ -6780,13 +7731,13 @@ START_TEST(test_return_ns_triplet) { if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) == XML_STATUS_ERROR) xml_failure(g_parser); - if (! triplet_start_flag) - fail("triplet_start_checker not invoked"); /* Check that unsetting "return triplets" fails while still parsing */ XML_SetReturnNSTriplet(g_parser, XML_FALSE); if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); + if (! triplet_start_flag) + fail("triplet_start_checker not invoked"); if (! triplet_end_flag) fail("triplet_end_checker not invoked"); if (dummy_handler_flags @@ -11643,9 +12594,12 @@ START_TEST(test_accounting_precision) { /* Processing instructions */ {"<?xml-stylesheet type=\"text/xsl\" href=\"https://domain.invalid/\" media=\"all\"?><e/>", NULL, NULL, 0, filled_later}, + {"<?pi0?><?pi1 ?><?pi2 ?><r/><?pi4?>", NULL, NULL, 0, filled_later}, +# ifdef XML_DTD {"<?pi0?><?pi1 ?><?pi2 ?><!DOCTYPE r SYSTEM 'first.ent'><r/>", "<?pi3?><!ENTITY % e1 SYSTEM 'second.ent'><?pi4?>%e1;<?pi5?>", "<?pi6?>", 0, filled_later}, +# endif /* XML_DTD */ /* CDATA */ {"<e><![CDATA[one two three]]></e>", NULL, NULL, 0, filled_later}, @@ -11660,6 +12614,7 @@ START_TEST(test_accounting_precision) { NULL, NULL, sizeof(XML_Char) * strlen("111<![CDATA[2 <= 2]]>333"), filled_later}, +# ifdef XML_DTD /* Conditional sections */ {"<!DOCTYPE r [\n" "<!ENTITY % draft 'INCLUDE'>\n" @@ -11672,6 +12627,7 @@ START_TEST(test_accounting_precision) { "<![%final;[<!--22-->]]>", NULL, sizeof(XML_Char) * (strlen("INCLUDE") + strlen("IGNORE")), filled_later}, +# endif /* XML_DTD */ /* General entities */ {"<!DOCTYPE root [\n" @@ -11697,8 +12653,14 @@ START_TEST(test_accounting_precision) { " <!ENTITY five SYSTEM 'first.ent'>\n" "]>\n" "<r>&five;</r>", - "12345", NULL, 0, filled_later}, + "12345", NULL, 0}, + {"<!DOCTYPE r [\n" + " <!ENTITY five SYSTEM 'first.ent'>\n" + "]>\n" + "<r>&five;</r>", + "\xEF\xBB\xBF" /* UTF-8 BOM */, NULL, 0, filled_later}, +# ifdef XML_DTD /* Parameter entities */ {"<!DOCTYPE r [\n" "<!ENTITY % comment \"<!---->\">\n" @@ -11784,24 +12746,16 @@ START_TEST(test_accounting_precision) { "%e1;\n", "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>" /* UTF-8 BOM */, strlen("\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>"), filled_later}, - {"<!DOCTYPE r [\n" - " <!ENTITY five SYSTEM 'first.ent'>\n" - "]>\n" - "<r>&five;</r>", - "\xEF\xBB\xBF" /* UTF-8 BOM */, NULL, 0, filled_later}, +# endif /* XML_DTD */ }; const size_t countCases = sizeof(cases) / sizeof(cases[0]); size_t u = 0; for (; u < countCases; u++) { - size_t v = 0; - for (; v < 2; v++) { - const XML_Bool singleBytesWanted = (v == 0) ? XML_FALSE : XML_TRUE; const unsigned long long expectedCountBytesDirect = strlen(cases[u].primaryText); const unsigned long long expectedCountBytesIndirect - = (cases[u].firstExternalText ? strlen(cases[u].firstExternalText) - : 0) + = (cases[u].firstExternalText ? strlen(cases[u].firstExternalText) : 0) + (cases[u].secondExternalText ? strlen(cases[u].secondExternalText) : 0) + cases[u].expectedCountBytesIndirectExtra; @@ -11812,14 +12766,10 @@ START_TEST(test_accounting_precision) { XML_SetExternalEntityRefHandler(parser, accounting_external_entity_ref_handler); XML_SetUserData(parser, (void *)&cases[u]); - cases[u].singleBytesWanted = singleBytesWanted; } - const XmlParseFunction xmlParseFunction - = singleBytesWanted ? _XML_Parse_SINGLE_BYTES : XML_Parse; - enum XML_Status status - = xmlParseFunction(parser, cases[u].primaryText, + = _XML_Parse_SINGLE_BYTES(parser, cases[u].primaryText, (int)strlen(cases[u].primaryText), XML_TRUE); if (status != XML_STATUS_OK) { _xml_failure(parser, __FILE__, __LINE__); @@ -11835,22 +12785,20 @@ START_TEST(test_accounting_precision) { if (actualCountBytesDirect != expectedCountBytesDirect) { fprintf( stderr, - "Document " EXPAT_FMT_SIZE_T("") " of " EXPAT_FMT_SIZE_T("") ", %s: Expected " EXPAT_FMT_ULL( + "Document " EXPAT_FMT_SIZE_T("") " of " EXPAT_FMT_SIZE_T("") ": Expected " EXPAT_FMT_ULL( "") " count direct bytes, got " EXPAT_FMT_ULL("") " instead.\n", - u + 1, countCases, singleBytesWanted ? "single bytes" : "chunks", - expectedCountBytesDirect, actualCountBytesDirect); + u + 1, countCases, expectedCountBytesDirect, actualCountBytesDirect); fail("Count of direct bytes is off"); } if (actualCountBytesIndirect != expectedCountBytesIndirect) { fprintf( stderr, - "Document " EXPAT_FMT_SIZE_T("") " of " EXPAT_FMT_SIZE_T("") ", %s: Expected " EXPAT_FMT_ULL( + "Document " EXPAT_FMT_SIZE_T("") " of " EXPAT_FMT_SIZE_T("") ": Expected " EXPAT_FMT_ULL( "") " count indirect bytes, got " EXPAT_FMT_ULL("") " instead.\n", - u + 1, countCases, singleBytesWanted ? "single bytes" : "chunks", - expectedCountBytesIndirect, actualCountBytesIndirect); + u + 1, countCases, expectedCountBytesIndirect, + actualCountBytesIndirect); fail("Count of indirect bytes is off"); - } } } } @@ -12031,7 +12979,7 @@ make_suite(void) { tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls); tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls); tcase_add_test(tc_basic, test_memory_allocation); - tcase_add_test(tc_basic, test_default_current); + tcase_add_test(tc_basic, test_default_current); tcase_add_test(tc_basic, test_dtd_elements); tcase_add_test(tc_basic, test_dtd_elements_nesting); tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd); @@ -12065,6 +13013,8 @@ make_suite(void) { #if defined(XML_CONTEXT_BYTES) tcase_add_test(tc_basic, test_get_buffer_3_overflow); #endif + tcase_add_test(tc_basic, test_buffer_can_grow_to_max); + tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len); tcase_add_test(tc_basic, test_byte_info_at_end); tcase_add_test(tc_basic, test_byte_info_at_error); tcase_add_test(tc_basic, test_byte_info_at_cdata); @@ -12075,6 +13025,7 @@ make_suite(void) { tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16); tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be); tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section); + tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed); tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values); tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone); tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort); @@ -12178,7 +13129,17 @@ make_suite(void) { tcase_add_test(tc_basic, test_bad_notation); tcase_add_test(tc_basic, test_default_doctype_handler); tcase_add_test(tc_basic, test_empty_element_abort); - tcase_add_test(tc_basic, test_buffer_can_grow_to_max); + tcase_add_test__ifdef_xml_dtd(tc_basic, + test_pool_integrity_with_unfinished_attr); + tcase_add_test(tc_basic, test_nested_entity_suspend); + tcase_add_test(tc_basic, test_big_tokens_scale_linearly); + tcase_add_test(tc_basic, test_set_reparse_deferral); + tcase_add_test(tc_basic, test_reparse_deferral_is_inherited); + tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser); + tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly); + tcase_add_test(tc_basic, test_set_bad_reparse_option); + tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize); + tcase_add_test(tc_basic, test_varying_buffer_fills); suite_add_tcase(s, tc_namespace); tcase_add_checked_fixture(tc_namespace, namespace_setup, namespace_teardown); @@ -12320,7 +13281,6 @@ make_suite(void) { tcase_add_test(tc_nsalloc, test_nsalloc_long_default_in_ext); tcase_add_test(tc_nsalloc, test_nsalloc_long_systemid_in_ext); tcase_add_test(tc_nsalloc, test_nsalloc_prefixed_element); - #if defined(XML_DTD) suite_add_tcase(s, tc_accounting); tcase_add_test(tc_accounting, test_accounting_precision); Index: expat-2.4.4/lib/internal.h =================================================================== --- expat-2.4.4.orig/lib/internal.h +++ expat-2.4.4/lib/internal.h @@ -160,7 +160,7 @@ const char *unsignedCharToPrintable(unsi #endif extern XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c -extern unsigned int g_parseAttempts; // used for testing only +extern unsigned int g_bytesScanned; // used for testing only #ifdef __cplusplus } Index: expat-2.4.4/lib/xmlparse.c =================================================================== --- expat-2.4.4.orig/lib/xmlparse.c +++ expat-2.4.4/lib/xmlparse.c @@ -605,7 +605,7 @@ static unsigned long getDebugLevel(const : ((*((pool)->ptr)++ = c), 1)) XML_Bool g_reparseDeferralEnabledDefault = XML_TRUE; // write ONLY in runtests.c -unsigned int g_parseAttempts = 0; // used for testing only +unsigned int g_bytesScanned = 0; // used for testing only struct XML_ParserStruct { /* The first member must be m_userData so that the XML_GetUserData @@ -983,7 +983,7 @@ callProcessor(XML_Parser parser, const c return XML_ERROR_NONE; } } - g_parseAttempts += 1; + g_bytesScanned += (unsigned)have_now; const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr); if (ret == XML_ERROR_NONE) { // if we consumed nothing, remember what we had on this parse attempt. @@ -1352,7 +1352,7 @@ XML_ExternalEntityParserCreate(XML_Parse XML_Bool oldReparseDeferralEnabled; /* Validate the oldParser parameter before we pull everything out of it */ - if (oldParser == NULL) + if (oldParser == NULL) return NULL; /* Stash the original parser contents on the stack */ @@ -1921,7 +1921,7 @@ XML_Parse(XML_Parser parser, const char parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; parser->m_errorCode - = callProcessor((parser, s, parser->m_parseEndPtr = s + len, &end); + = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end); if (parser->m_errorCode != XML_ERROR_NONE) { parser->m_eventEndPtr = parser->m_eventPtr;
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor