Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
openSUSE:Step:15-SP4
yum.8574
yum-3.4.3-speedup-bnc810074.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File yum-3.4.3-speedup-bnc810074.patch of Package yum.8574
commit 22586dce865ee6509daf94d3930ee53a20fc2a0e Author: Zdeněk Pavlas <zpavlas@redhat.com> Date: Fri Nov 16 11:21:00 2012 +0100 clean up misc.to_xml(), make it faster, add tests. BZ 716235. diff --git a/test/misc-tests.py b/test/misc-tests.py index d34c161..4bf0821 100644 --- a/test/misc-tests.py +++ b/test/misc-tests.py @@ -114,6 +114,39 @@ class MiscTests(DepsolveTests): res, msg = solver.buildTransaction() return self.res[res], msg + def testXML(self): + import yum.misc + for i in ( + +# valid utf8 and unicode +('\xc4\x9b\xc5\xa1\xc4\x8d', '\xc4\x9b\xc5\xa1\xc4\x8d'), +(u'\u011b\u0161\u010d', '\xc4\x9b\xc5\xa1\xc4\x8d'), + +# invalid utf8 +('\xc3\x28', '\xc3\x83\x28'), +('\xa0\xa1', '\xc2\xa0\xc2\xa1'), +('Skytt\xe4', 'Skytt\xc3\xa4'), + +# entity expansion +('&<>', '&<>'), + +# removal of invalid bytes +('\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f', '\t\n\r'), + +# attr flag +('&"\'', '&"\''), +('&"\'', True, '&"''), + +# weirdness +(None, ''), +('abc ', 'abc'), + + ): + i = list(i); ok = i.pop() + ret = yum.misc.to_xml(*i) + self.assertEqual(type(ret), str) + self.assertEqual(ret, ok) + def setup_logging(): logging.basicConfig() plainformatter = logging.Formatter("%(message)s") diff --git a/yum/misc.py b/yum/misc.py index a0bac7b..6c3c349 100644 --- a/yum/misc.py +++ b/yum/misc.py @@ -897,67 +897,39 @@ def seq_max_split(seq, max_entries): ret.append(seq[beg:]) return ret -def _ugly_utf8_string_hack(item): - """hands back a unicoded string""" - # this is backward compat for handling non-utf8 filenames - # and content inside packages. :( - # content that xml can cope with but isn't really kosher - - # if we're anything obvious - do them first - if item is None: - return '' - elif isinstance(item, unicode): - return item - - # this handles any bogon formats we see - du = False - try: - x = unicode(item, 'ascii') - du = True - except UnicodeError: - encodings = ['utf-8', 'iso-8859-1', 'iso-8859-15', 'iso-8859-2'] - for enc in encodings: - try: - x = unicode(item, enc) - except UnicodeError: - pass - - else: - if x.encode(enc) == item: - if enc != 'utf-8': - print '\n%s encoding on %s\n' % (enc, item) - return x.encode('utf-8') - - - # Kill bytes (or libxml will die) not in the small byte portion of: - # http://www.w3.org/TR/REC-xml/#NT-Char - # we allow high bytes, if it passed the utf8 check above. Eg. - # good chars = #x9 | #xA | #xD | [#x20-...] - newitem = '' - bad_small_bytes = range(0, 8) + [11, 12] + range(14, 32) - for char in item: - if ord(char) in bad_small_bytes: - pass # Just ignore these bytes... - elif not du and ord(char) > 127: - newitem = newitem + '?' # byte by byte equiv of escape - else: - newitem = newitem + char - return newitem +_deletechars = ''.join(chr(i) for i in range(32) if i not in (9, 10, 13)) -__cached_saxutils = None def to_xml(item, attrib=False): - global __cached_saxutils - if __cached_saxutils is None: - import xml.sax.saxutils - __cached_saxutils = xml.sax.saxutils + """ Returns xml-friendly utf-8 encoded string. + Accepts utf-8, iso-8859-1, or unicode. + """ + if type(item) is str: + # check if valid utf8 + try: unicode(item, 'utf-8') + except UnicodeDecodeError: + # assume iso-8859-1 + item = unicode(item, 'iso-8859-1').encode('utf-8') + elif type(item) is unicode: + item = item.encode('utf-8') + elif item is None: + return '' + else: + raise ValueError, 'String expected, got %s' % repr(item) - item = _ugly_utf8_string_hack(item) - item = to_utf8(item) + # compat cruft... item = item.rstrip() + + # kill ivalid low bytes + item = item.translate(None, _deletechars) + + # quote reserved XML characters + item = item.replace('&', '&') + item = item.replace('<', '<') + item = item.replace('>', '>') if attrib: - item = __cached_saxutils.escape(item, entities={'"':"""}) - else: - item = __cached_saxutils.escape(item) + item = item.replace('"', '"') + item = item.replace("'", ''') + return item def unlink_f(filename):
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor