Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
SUSE:SLE-15-SP2:Update
python-ujson
CVE-2022-31116-surrogate-chars.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File CVE-2022-31116-surrogate-chars.patch of Package python-ujson
From e0e5db9a46decfea1174217382486e06bbab4743 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist <JustAnotherArchivist@users.noreply.github.com> Date: Thu, 9 Jun 2022 17:23:15 +0000 Subject: [PATCH] Fix handling of surrogates on decoding This implements surrogate handling on decoding as it is in the standard library. Lone escaped surrogates and any raw surrogates in the input result in surrogates in the output, and escaped surrogate pairs get decoded into non-BMP characters. Note that raw surrogate pairs get treated differently on platforms/compilers with 16-bit `wchar_t`, e.g. Microsoft Windows. --- lib/ultrajsondec.c | 46 ++++++++++++++++++++-------------------------- python/JSONtoObj.c | 6 +++++- tests/tests.py | 2 ++ 3 files changed, 27 insertions(+), 27 deletions(-) --- a/lib/ultrajsondec.c +++ b/lib/ultrajsondec.c @@ -424,13 +424,15 @@ static const JSUINT8 g_decoderLookup[256 FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_string ( struct DecoderState *ds) { - JSUTF16 sur[2] = { 0 }; - int iSur = 0; int index; wchar_t *escOffset; wchar_t *escStart; size_t escLen = (ds->escEnd - ds->escStart); JSUINT8 *inputOffset; + JSUTF16 ch = 0; +#if WCHAR_MAX >= 0x10FFFF + JSUINT8 *lastHighSurrogate = NULL; +#endif JSUINT8 oct; JSUTF32 ucs; ds->lastType = JT_INVALID; @@ -530,7 +532,7 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode case '7': case '8': case '9': - sur[iSur] = (sur[iSur] << 4) + (JSUTF16) (*inputOffset - '0'); + ch = (ch << 4) + (JSUTF16) (*inputOffset - '0'); break; case 'a': @@ -539,7 +541,7 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode case 'd': case 'e': case 'f': - sur[iSur] = (sur[iSur] << 4) + 10 + (JSUTF16) (*inputOffset - 'a'); + ch = (ch << 4) + 10 + (JSUTF16) (*inputOffset - 'a'); break; case 'A': @@ -548,39 +550,31 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode case 'D': case 'E': case 'F': - sur[iSur] = (sur[iSur] << 4) + 10 + (JSUTF16) (*inputOffset - 'A'); + ch = (ch << 4) + 10 + (JSUTF16) (*inputOffset - 'A'); break; } inputOffset ++; } - if (iSur == 0) +#if WCHAR_MAX >= 0x10FFFF + if ((ch & 0xfc00) == 0xdc00 && lastHighSurrogate == inputOffset - 6 * sizeof(*inputOffset)) { - if((sur[iSur] & 0xfc00) == 0xd800) - { - // First of a surrogate pair, continue parsing - iSur ++; - break; - } - (*escOffset++) = (wchar_t) sur[iSur]; - iSur = 0; + // Low surrogate immediately following a high surrogate + // Overwrite existing high surrogate with combined character + *(escOffset-1) = (((*(escOffset-1) - 0xd800) <<10) | (ch - 0xdc00)) + 0x10000; } else - { - // Decode pair - if ((sur[1] & 0xfc00) != 0xdc00) - { - return SetError (ds, -1, "Unpaired high surrogate when decoding 'string'"); - } -#if WCHAR_MAX == 0xffff - (*escOffset++) = (wchar_t) sur[0]; - (*escOffset++) = (wchar_t) sur[1]; -#else - (*escOffset++) = (wchar_t) 0x10000 + (((sur[0] - 0xd800) << 10) | (sur[1] - 0xdc00)); #endif - iSur = 0; + { + *(escOffset++) = (wchar_t) ch; } +#if WCHAR_MAX >= 0x10FFFF + if ((ch & 0xfc00) == 0xd800) + { + lastHighSurrogate = inputOffset; + } +#endif break; } --- a/python/JSONtoObj.c +++ b/python/JSONtoObj.c @@ -161,7 +161,11 @@ PyObject* JSONToObj(PyObject* self, PyOb else if (PyUnicode_Check(arg)) { - sarg = PyUnicode_AsUTF8String(arg); +#if PY_MAJOR_VERSION >= 3 + sarg = PyUnicode_AsEncodedString(arg, NULL, "surrogatepass"); +#else + sarg = PyUnicode_AsEncodedString(arg, NULL, "ignore"); +#endif if (sarg == NULL) { //Exception raised above us by codec according to docs --- a/tests/tests.py +++ b/tests/tests.py @@ -224,6 +224,7 @@ class UltraJSONTests(unittest.TestCase): # Characters outside of Basic Multilingual Plane(larger than # 16 bits) are represented as \UXXXXXXXX in python but should be encoded # as \uXXXX\uXXXX in json. + @unittest.skipIf(six.PY2, "Doesn't work with Python 2") def testEncodeUnicodeBMP(self): s = '\U0001f42e\U0001f42e\U0001F42D\U0001F42D' # ๐ฎ๐ฎ๐ญ๐ญ encoded = ujson.dumps(s) @@ -251,6 +252,7 @@ class UltraJSONTests(unittest.TestCase): decoded = ujson.loads(encoded) self.assertEqual(s, decoded) + @unittest.skipIf(six.PY2, "Doesn't work with Python 2") def testEncodeSymbols(self): s = '\u273f\u2661\u273f' # โฟโกโฟ encoded = ujson.dumps(s)
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor