Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
Please login to access the resource
openSUSE:Step:15
python-libxml2-python.31449
libxml2-Avoid-quadratic-checking-of-identity-co...
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File libxml2-Avoid-quadratic-checking-of-identity-constraints.patch of Package python-libxml2-python.31449
From 5aab6473018269c10bedf70aaa183c55c20b7ec2 Mon Sep 17 00:00:00 2001 From: Michael Matz <matz@suse.de> Date: Sat, 21 Nov 2020 01:21:56 +0100 Subject: [PATCH] Avoid quadratic checking of identity-constraints key/unique/keyref schema attributes currently use qudratic loops to check their various constraints (that keys are unique and that keyrefs refer to existing keys). That becomes extremely slow if there are many elements with keys. This happens in the wild with e.g. the OVAL XML descriptions of security patches. You need the openscap schemata, and then an example xml file: % zypper in openscap-utils % wget ftp://ftp.suse.com/pub/projects/security/oval/opensuse.leap.15.1.xml % time xmllint --schema /usr/share/openscap/schemas/oval/5.5/oval-definitions-schema.xsd opensuse.leap.15.1.xml > /dev/null opensuse.leap.15.1.xml validates real 16m59,857s user 16m55,787s sys 0m1,060s This patch makes libxml use a hash table to avoid the quadratic behaviour. The existing hash table only accepts strings as keys, so we're mostly reusing the canonical representation of key values to derive such strings (with the caveat given in a comment). The alternative would be to rework the hash table code to accept either numbers or free functions as hash workers, but the code is fast enough as is. With the patch we have this then: % time LD_LIBRARY_PATH=./libxml2/.libs/ ./libxml2/.libs/xmllint --schema /usr/share/openscap/schemas/oval/5.5/oval-definitions-schema.xsd opensuse.leap.15.1.xml > /dev/null opensuse.leap.15.1.xml validates real 0m3,531s user 0m3,427s sys 0m0,103s So, a ~300x speedup. This patch survives 'make check' and 'make tests'. --- xmlschemas.c | 189 +++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 167 insertions(+), 22 deletions(-) diff --git a/xmlschemas.c b/xmlschemas.c index cc200636..c455b4a3 100644 --- a/xmlschemas.c +++ b/xmlschemas.c @@ -860,6 +860,7 @@ struct _xmlSchemaIDCMatcher { int sizeKeySeqs; xmlSchemaItemListPtr targets; /* list of target-node (xmlSchemaPSVIIDCNodePtr) entries */ + xmlHashTablePtr htab; }; /* @@ -1055,6 +1056,18 @@ struct _xmlSchemaSubstGroup { xmlSchemaItemListPtr members; }; +/** + * xmlIDCHashEntry: + * + * an entry in hash tables to quickly look up keys/uniques + */ +typedef struct _xmlIDCHashEntry xmlIDCHashEntry; +typedef xmlIDCHashEntry *xmlIDCHashEntryPtr; +struct _xmlIDCHashEntry { + xmlIDCHashEntryPtr next; /* next item with same hash */ + int index; /* index into associated item list */ +}; + /************************************************************************ * * * Some predeclarations * @@ -1478,6 +1491,7 @@ xmlSchemaWildcardPCToString(int pc) * @val: the precomputed value * @retValue: the returned value * @ws: the whitespace type of the value + * @for_hash: non-zero if this is supposed to generate a string for hashing * * Get a the canonical representation of the value. * The caller has to free the returned retValue. @@ -1486,9 +1500,10 @@ xmlSchemaWildcardPCToString(int pc) * API errors or if the value type is not supported yet. */ static int -xmlSchemaGetCanonValueWhtspExt(xmlSchemaValPtr val, - xmlSchemaWhitespaceValueType ws, - xmlChar **retValue) +xmlSchemaGetCanonValueWhtspExt_1(xmlSchemaValPtr val, + xmlSchemaWhitespaceValueType ws, + xmlChar **retValue, + int for_hash) { int list; xmlSchemaValType valType; @@ -1522,6 +1537,20 @@ xmlSchemaGetCanonValueWhtspExt(xmlSchemaValPtr val, xmlFree((xmlChar *) value2); goto internal_error; } + if (for_hash && valType == XML_SCHEMAS_DECIMAL) { + /* We can mostly use the canonical value for hashing, + except in the case of decimal. There the canonical + representation requires a trailing '.0' even for + non-fractional numbers, but for the derived integer + types it forbids any decimal point. Nevertheless they + compare equal if the value is equal. We need to generate + the same hash value for this to work, and it's easiest + to just cut off the useless '.0' suffix for the + decimal type. */ + int len = xmlStrlen(value2); + if (len > 2 && value2[len-1] == '0' && value2[len-2] == '.') + ((xmlChar*)value2)[len-2] = 0; + } value = value2; } if (*retValue == NULL) @@ -1548,6 +1577,22 @@ internal_error: return (-1); } +static int +xmlSchemaGetCanonValueWhtspExt(xmlSchemaValPtr val, + xmlSchemaWhitespaceValueType ws, + xmlChar **retValue) +{ + return xmlSchemaGetCanonValueWhtspExt_1(val, ws, retValue, 0); +} + +static int +xmlSchemaGetCanonValueHash(xmlSchemaValPtr val, + xmlChar **retValue) +{ + return xmlSchemaGetCanonValueWhtspExt_1(val, XML_SCHEMA_WHITESPACE_COLLAPSE, + retValue, 1); +} + /** * xmlSchemaFormatItemForReport: * @buf: the string buffer @@ -22311,6 +22356,17 @@ xmlSchemaIDCFreeIDCTable(xmlSchemaPSVIIDCBindingPtr bind) } } +static void +xmlFreeIDCHashEntry (void *payload, const xmlChar *name ATTRIBUTE_UNUSED) +{ + xmlIDCHashEntryPtr e = payload, n; + while (e) { + n = e->next; + xmlFree(e); + e = n; + } +} + /** * xmlSchemaIDCFreeMatcherList: * @matcher: the first IDC matcher in the list @@ -22349,6 +22405,8 @@ xmlSchemaIDCFreeMatcherList(xmlSchemaIDCMatcherPtr matcher) } xmlSchemaItemListFree(matcher->targets); } + if (matcher->htab != NULL) + xmlHashFree(matcher->htab, xmlFreeIDCHashEntry); xmlFree(matcher); matcher = next; } @@ -22399,6 +22457,10 @@ xmlSchemaIDCReleaseMatcherList(xmlSchemaValidCtxtPtr vctxt, xmlSchemaItemListFree(matcher->targets); matcher->targets = NULL; } + if (matcher->htab != NULL) { + xmlHashFree(matcher->htab, xmlFreeIDCHashEntry); + matcher->htab = NULL; + } matcher->next = NULL; /* * Cache the matcher. @@ -22633,10 +22695,10 @@ next_sto: } static const xmlChar * -xmlSchemaFormatIDCKeySequence(xmlSchemaValidCtxtPtr vctxt, - xmlChar **buf, - xmlSchemaPSVIIDCKeyPtr *seq, - int count) +xmlSchemaFormatIDCKeySequence_1(xmlSchemaValidCtxtPtr vctxt, + xmlChar **buf, + xmlSchemaPSVIIDCKeyPtr *seq, + int count, int for_hash) { int i, res; xmlChar *value = NULL; @@ -22644,9 +22706,13 @@ xmlSchemaFormatIDCKeySequence(xmlSchemaValidCtxtPtr vctxt, *buf = xmlStrdup(BAD_CAST "["); for (i = 0; i < count; i++) { *buf = xmlStrcat(*buf, BAD_CAST "'"); - res = xmlSchemaGetCanonValueWhtspExt(seq[i]->val, - xmlSchemaGetWhiteSpaceFacetValue(seq[i]->type), - &value); + if (!for_hash) + res = xmlSchemaGetCanonValueWhtspExt(seq[i]->val, + xmlSchemaGetWhiteSpaceFacetValue(seq[i]->type), + &value); + else { + res = xmlSchemaGetCanonValueHash(seq[i]->val, &value); + } if (res == 0) *buf = xmlStrcat(*buf, BAD_CAST value); else { @@ -22668,6 +22734,24 @@ xmlSchemaFormatIDCKeySequence(xmlSchemaValidCtxtPtr vctxt, return (BAD_CAST *buf); } +static const xmlChar * +xmlSchemaFormatIDCKeySequence(xmlSchemaValidCtxtPtr vctxt, + xmlChar **buf, + xmlSchemaPSVIIDCKeyPtr *seq, + int count) +{ + return xmlSchemaFormatIDCKeySequence_1(vctxt, buf, seq, count, 0); +} + +static const xmlChar * +xmlSchemaHashKeySequence(xmlSchemaValidCtxtPtr vctxt, + xmlChar **buf, + xmlSchemaPSVIIDCKeyPtr *seq, + int count) +{ + return xmlSchemaFormatIDCKeySequence_1(vctxt, buf, seq, count, 1); +} + /** * xmlSchemaXPathPop: * @vctxt: the WXS validation context @@ -23029,15 +23113,25 @@ create_key: if ((idc->type != XML_SCHEMA_TYPE_IDC_KEYREF) && (targets->nbItems != 0)) { xmlSchemaPSVIIDCKeyPtr ckey, bkey, *bkeySeq; + xmlIDCHashEntryPtr e; - i = 0; res = 0; + + if (!matcher->htab) + e = NULL; + else { + xmlChar *value = NULL; + xmlSchemaHashKeySequence(vctxt, &value, *keySeq, nbKeys); + e = xmlHashLookup(matcher->htab, value); + FREE_AND_NULL(value); + } + /* * Compare the key-sequences, key by key. */ - do { + for (;e; e = e->next) { bkeySeq = - ((xmlSchemaPSVIIDCNodePtr) targets->items[i])->keys; + ((xmlSchemaPSVIIDCNodePtr) targets->items[e->index])->keys; for (j = 0; j < nbKeys; j++) { ckey = (*keySeq)[j]; bkey = bkeySeq[j]; @@ -23058,9 +23152,8 @@ create_key: */ break; } - i++; - } while (i < targets->nbItems); - if (i != targets->nbItems) { + } + if (e) { xmlChar *str = NULL, *strB = NULL; /* * TODO: Try to report the key-sequence. @@ -23138,6 +23231,24 @@ create_key: } return (-1); } + if (idc->type != XML_SCHEMA_TYPE_IDC_KEYREF) { + xmlChar *value = NULL; + xmlIDCHashEntryPtr r, e; + if (!matcher->htab) + matcher->htab = xmlHashCreate(4); + xmlSchemaHashKeySequence(vctxt, &value, ntItem->keys, nbKeys); + e = xmlMalloc(sizeof *e); + e->index = targets->nbItems - 1; + r = xmlHashLookup(matcher->htab, value); + if (r) { + e->next = r->next; + r->next = e; + } else { + e->next = NULL; + xmlHashAddEntry(matcher->htab, value, e); + } + FREE_AND_NULL(value); + } goto selector_leave; selector_key_error: @@ -23394,6 +23505,10 @@ xmlSchemaIDCFillNodeTables(xmlSchemaValidCtxtPtr vctxt, matcher->targets->items = NULL; matcher->targets->sizeItems = 0; matcher->targets->nbItems = 0; + if (matcher->htab) { + xmlHashFree(matcher->htab, xmlFreeIDCHashEntry); + matcher->htab = NULL; + } } else { /* * Compare the key-sequences and add to the IDC node-table. @@ -23841,6 +23956,7 @@ xmlSchemaCheckCVCIDCKeyRef(xmlSchemaValidCtxtPtr vctxt) int i, j, k, res, nbFields, hasDupls; xmlSchemaPSVIIDCKeyPtr *refKeys, *keys; xmlSchemaPSVIIDCNodePtr refNode = NULL; + xmlHashTablePtr table = NULL; nbFields = matcher->aidc->def->nbFields; @@ -23858,26 +23974,52 @@ xmlSchemaCheckCVCIDCKeyRef(xmlSchemaValidCtxtPtr vctxt) /* * Search for a matching key-sequences. */ + if (bind) { + table = xmlHashCreate(bind->nbNodes * 2); + for (j = 0; j < bind->nbNodes; j++) { + xmlChar *value; + xmlIDCHashEntryPtr r, e; + keys = bind->nodeTable[j]->keys; + xmlSchemaHashKeySequence(vctxt, &value, keys, nbFields); + e = xmlMalloc(sizeof *e); + e->index = j; + r = xmlHashLookup(table, value); + if (r) { + e->next = r->next; + r->next = e; + } else { + e->next = NULL; + xmlHashAddEntry(table, value, e); + } + FREE_AND_NULL(value); + } + } for (i = 0; i < matcher->targets->nbItems; i++) { res = 0; refNode = matcher->targets->items[i]; if (bind != NULL) { + xmlChar *value; + xmlIDCHashEntryPtr e; refKeys = refNode->keys; - for (j = 0; j < bind->nbNodes; j++) { - keys = bind->nodeTable[j]->keys; + xmlSchemaHashKeySequence(vctxt, &value, refKeys, nbFields); + e = xmlHashLookup(table, value); + FREE_AND_NULL(value); + res = 0; + for (;e; e = e->next) { + keys = bind->nodeTable[e->index]->keys; for (k = 0; k < nbFields; k++) { res = xmlSchemaAreValuesEqual(keys[k]->val, - refKeys[k]->val); + refKeys[k]->val); if (res == 0) - break; + break; else if (res == -1) { return (-1); } } if (res == 1) { /* - * Match found. - */ + * Match found. + */ break; } } @@ -23932,6 +24074,9 @@ xmlSchemaCheckCVCIDCKeyRef(xmlSchemaValidCtxtPtr vctxt) FREE_AND_NULL(strB); } } + if (table) { + xmlHashFree(table, xmlFreeIDCHashEntry); + } } matcher = matcher->next; } -- GitLab
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor