Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
openSUSE:Leap:15.4:Update
python-ujson.28874
CVE-2021-45958-fix-buffer-overflows.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File CVE-2021-45958-fix-buffer-overflows.patch of Package python-ujson.28874
From 60929c281b356fd71b8061921cb2cd1ac4f54b09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Br=C3=A9nainn=20Woodsend?= <bwoodsend@gmail.com> Date: Wed, 9 Feb 2022 20:44:01 +0000 Subject: [PATCH 01/11] Fix unchecked buffer overflows (CVE-2021-45958). Add a few extra memory reserve calls to account for the extra space that indentation needs. These kinds of memory issues are hard to spot because the buffer is resized in powers of 2 meaning that a miscalculation would only show any symptoms if the required buffer size is estimated to be just below a 2 power but is actually just above. Add a debug mode which replaces the 2 power scheme with reserving only the memory explicitly requested and adds some overflow checks. --- .github/workflows/test.yml | 8 + lib/ultrajsonenc.c | 64 ++- tests/334-reproducer.json | 857 +++++++++++++++++++++++++++++++++++++ tests/test_ujson.py | 24 ++ 4 files changed, 944 insertions(+), 9 deletions(-) create mode 100644 tests/334-reproducer.json Index: ujson-1.35/lib/ultrajsonenc.c =================================================================== --- ujson-1.35.orig/lib/ultrajsonenc.c +++ ujson-1.35/lib/ultrajsonenc.c @@ -41,6 +41,7 @@ http://www.opensource.apple.com/source/t #include <assert.h> #include <string.h> #include <stdlib.h> +#include <stddef.h> #include <math.h> #include <float.h> @@ -114,14 +115,25 @@ FIXME: Keep track of how big these get a That way we won't run our head into the wall each call */ void Buffer_Realloc (JSONObjectEncoder *enc, size_t cbNeeded) { + size_t free_space = enc->end - enc->offset; + if (free_space >= cbNeeded) + { + return; + } size_t curSize = enc->end - enc->start; - size_t newSize = curSize * 2; + size_t newSize = curSize; size_t offset = enc->offset - enc->start; +#ifdef DEBUG + // In debug mode, allocate only what is requested so that any miscalculation + // shows up plainly as a crash. + newSize = (enc->offset - enc->start) + cbNeeded; +#else while (newSize < curSize + cbNeeded) { newSize *= 2; } +#endif if (enc->heap) { @@ -148,6 +160,12 @@ void Buffer_Realloc (JSONObjectEncoder * enc->end = enc->start + newSize; } +#define Buffer_Reserve(__enc, __len) \ + if ( (size_t) ((__enc)->end - (__enc)->offset) < (size_t) (__len)) \ + { \ + Buffer_Realloc((__enc), (__len));\ + } \ + FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC Buffer_AppendShortHexUnchecked (char *outputOffset, unsigned short value) { *(outputOffset++) = g_hexChars[(value & 0xf000) >> 12]; @@ -267,6 +285,13 @@ int Buffer_EscapeStringValidated (JSOBJ for (;;) { +#ifdef DEBUG + // 6 is the maximum length of a single character (cf. RESERVE_STRING). + if ((io < end) && (enc->end - of < 6)) { + fprintf(stderr, "Ran out of buffer space during Buffer_EscapeStringValidated()\n"); + abort(); + } +#endif JSUINT8 utflen = g_asciiOutputTable[(unsigned char) *io]; switch (utflen) @@ -488,15 +513,28 @@ int Buffer_EscapeStringValidated (JSOBJ } } -#define Buffer_Reserve(__enc, __len) \ - if ( (size_t) ((__enc)->end - (__enc)->offset) < (size_t) (__len)) \ - { \ - Buffer_Realloc((__enc), (__len));\ - } \ - -#define Buffer_AppendCharUnchecked(__enc, __chr) \ - *((__enc)->offset++) = __chr; \ +static FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC Buffer_AppendCharUnchecked(JSONObjectEncoder *enc, char chr) +{ +#ifdef DEBUG + if (enc->end <= enc->offset) + { + fprintf(stderr, "Overflow writing byte %d '%c'. The last few characters were:\n'''", chr, chr); + char * recent = enc->offset - 1000; + if (enc->start > recent) + { + recent = enc->start; + } + for (; recent < enc->offset; recent++) + { + fprintf(stderr, "%c", *recent); + } + fprintf(stderr, "'''\n"); + abort(); + } +#endif + *(enc->offset++) = chr; +} FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC strreverse(char* begin, char* end) { @@ -729,14 +767,6 @@ void encode(JSOBJ obj, JSONObjectEncoder return; } - /* - This reservation must hold - - length of _name as encoded worst case + - maxLength of double to string OR maxLength of JSLONG to string - */ - - Buffer_Reserve(enc, 256 + RESERVE_STRING(cbName)); if (enc->errorMsg) { return; @@ -744,6 +774,8 @@ void encode(JSOBJ obj, JSONObjectEncoder if (name) { + // 2 extra for the colon and optional space after it + Buffer_Reserve(enc, RESERVE_STRING(cbName) + 2); Buffer_AppendCharUnchecked(enc, '\"'); if (enc->forceASCII) @@ -764,14 +796,22 @@ void encode(JSOBJ obj, JSONObjectEncoder Buffer_AppendCharUnchecked(enc, '\"'); Buffer_AppendCharUnchecked (enc, ':'); -#ifndef JSON_NO_EXTRA_WHITESPACE - Buffer_AppendCharUnchecked (enc, ' '); -#endif } tc.encoder_prv = enc->prv; enc->beginTypeContext(obj, &tc, enc); + /* + This reservation covers any additions on non-variable parts below, specifically: + - Opening brackets for JT_ARRAY and JT_OBJECT + - Number representation for JT_LONG, JT_ULONG, JT_INT, and JT_DOUBLE + - Constant value for JT_TRUE, JT_FALSE, JT_NULL + + The length of 128 is the worst case length of the Buffer_AppendDoubleDconv addition. + The other types above all have smaller representations. + */ + Buffer_Reserve (enc, 128); + switch (tc.type) { case JT_INVALID: @@ -788,12 +828,11 @@ void encode(JSOBJ obj, JSONObjectEncoder while (enc->iterNext(obj, &tc)) { + Buffer_Reserve (enc, enc->indent * (enc->level + 1) + 2); + if (count > 0) { Buffer_AppendCharUnchecked (enc, ','); -#ifndef JSON_NO_EXTRA_WHITESPACE - Buffer_AppendCharUnchecked (buffer, ' '); -#endif Buffer_AppendIndentNewlineUnchecked (enc); } @@ -806,8 +845,10 @@ void encode(JSOBJ obj, JSONObjectEncoder } enc->iterEnd(obj, &tc); + Buffer_Reserve (enc, enc->indent * enc->level + 1); Buffer_AppendIndentNewlineUnchecked (enc); Buffer_AppendIndentUnchecked (enc, enc->level); + Buffer_Reserve (enc, 1); Buffer_AppendCharUnchecked (enc, ']'); break; } @@ -821,12 +862,10 @@ void encode(JSOBJ obj, JSONObjectEncoder while (enc->iterNext(obj, &tc)) { + Buffer_Reserve (enc, enc->indent * (enc->level + 1) + 2); if (count > 0) { Buffer_AppendCharUnchecked (enc, ','); -#ifndef JSON_NO_EXTRA_WHITESPACE - Buffer_AppendCharUnchecked (enc, ' '); -#endif Buffer_AppendIndentNewlineUnchecked (enc); } @@ -840,8 +879,10 @@ void encode(JSOBJ obj, JSONObjectEncoder } enc->iterEnd(obj, &tc); + Buffer_Reserve (enc, enc->indent * enc->level + 1); Buffer_AppendIndentNewlineUnchecked (enc); Buffer_AppendIndentUnchecked (enc, enc->level); + Buffer_Reserve (enc, 1); Buffer_AppendCharUnchecked (enc, '}'); break; } @@ -953,7 +994,7 @@ void encode(JSOBJ obj, JSONObjectEncoder return; } - Buffer_Reserve(enc, RESERVE_STRING(szlen)); + Buffer_Reserve(enc, szlen); if (enc->errorMsg) { enc->endTypeContext(obj, &tc); Index: ujson-1.35/tests/tests.py =================================================================== --- ujson-1.35.orig/tests/tests.py +++ ujson-1.35/tests/tests.py @@ -952,6 +952,25 @@ def test_decodeStringUTF8(self): input = "someutfcharacters" raise NotImplementedError("Implement this test!") +@pytest.mark.parametrize("indent", list(range(65537, 65542))) +def test_dump_huge_indent(indent): + ujson.encode({"a": True}, indent=indent) + + +@pytest.mark.parametrize("first_length", list(range(2, 7))) +@pytest.mark.parametrize("second_length", list(range(10919, 10924))) +def test_dump_long_string(first_length, second_length): + ujson.dumps(["a" * first_length, "\x00" * second_length]) + + +def test_dump_indented_nested_list(): + a = _a = [] + for i in range(20): + _a.append(list(range(i))) + _a = _a[-1] + ujson.dumps(a, indent=i) + + """ if __name__ == "__main__": Index: ujson-1.35/tests/fuzz.py =================================================================== --- /dev/null +++ ujson-1.35/tests/fuzz.py @@ -0,0 +1,169 @@ +""" +A brute force fuzzer for detecting memory issues in ujson.dumps(). To use, first +compile ujson in debug mode: + + CFLAGS='-DDEBUG' python setup.py -q build_ext --inplace -f + +Then run without arguments: + + python tests/fuzz.py + +If it crashes, the last line of output is the arguments to reproduce the +failure. + + python tests/fuzz.py {{ last line of output before crash }} + +Adding --dump-python or --dump-json will print the object it intends to +serialise as either a Python literal or in JSON. + +""" + +import argparse +import itertools +import json +import math +import random +import re +from pprint import pprint + +import ujson + + +class FuzzGenerator: + """A random JSON serialisable object generator.""" + + def __init__(self, seed=None): + self._randomizer = random.Random(seed) + self._shrink = 1 + + def key(self): + key_types = [self.int, self.float, self.string, self.null, self.bool] + return self._randomizer.choice(key_types)() + + def item(self): + if self._randomizer.random() > 0.8: + return self.key() + return self._randomizer.choice([self.list, self.dict])() + + def int(self): + return int(self.float()) + + def float(self): + sign = self._randomizer.choice([-1, 1, 0]) + return sign * math.exp(self._randomizer.uniform(-40, 40)) + + def string(self): + characters = ["\x00", "\t", "a", "\U0001f680", "<></>", "\u1234"] + return self._randomizer.choice(characters) * self.length() + + def bool(self): + return self._randomizer.random() < 0.5 + + def null(self): + return None + + def list(self): + return [self.item() for i in range(self.length())] + + def dict(self): + return {self.key(): self.item() for i in range(self.length())} + + def length(self): + self._shrink *= 0.99 + return int(math.exp(self._randomizer.uniform(-0.5, 5)) * self._shrink) + + +def random_object(seed=None): + return FuzzGenerator(seed).item() + + +class RangeOption(argparse.Action): + def __call__(self, parser, namespace, values, option_string=None): + values = re.findall("[^: ]+", values) + if len(values) == 1: + values = (int(values[0]),) + else: + values = range(*map(int, values)) + setattr(namespace, self.dest, values) + + +class ListOption(argparse.Action): + def __call__(self, parser, namespace, values, option_string=None): + values = tuple(map(int, re.findall("[^, ]+", values))) + setattr(namespace, self.dest, values) + + +parser = argparse.ArgumentParser( + epilog=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter +) +parser.add_argument( + "--seed", + default=range(100), + action=RangeOption, + dest="seeds", + help="A seed or range of seeds (in the form start:end[:step]) " + "to initialise the randomizer.", +) +parser.add_argument( + "--indent", + default=(0, 1, 2, 3, 4, 5, 12, 100, 1000), + action=ListOption, + help="A comma separated sequence of indentation lengths to test.", +) +parser.add_argument( + "--ensure_ascii", + default=(0, 1), + action=ListOption, + help="Sets the ensure_ascii option to ujson.dumps(). " + "May be 0 or 1 or 0,1 to testboth.", +) +parser.add_argument( + "--encode_html_chars", + default=(0, 1), + action=ListOption, + help="Sets the encode_html_chars option to ujson.dumps(). " + "May be 0 or 1 or 0,1 to test both.", +) +parser.add_argument( + "--escape_forward_slashes", + default=(0, 1), + action=ListOption, + help="Sets the escape_forward_slashes option to ujson.dumps(). " + "May be 0 or 1 or 0,1 to test both.", +) +parser.add_argument( + "--dump-python", + action="store_true", + help="Print the randomly generated object as a Python literal and exit.", +) +parser.add_argument( + "--dump-json", + action="store_true", + help="Print the randomly generated object in JSON format and exit.", +) + + +def cli(args=None): + options = dict(parser.parse_args(args)._get_kwargs()) + if options.pop("dump_json"): + print(json.dumps(random_object(options["seeds"][0]), indent=2)) + elif options.pop("dump_python"): + pprint(random_object(options["seeds"][0])) + else: + fuzz(**options) + + +def fuzz(seeds, **options): + try: + for seed in seeds: + data = random_object(seed) + for permutation in itertools.product(*options.values()): + _options = dict(zip(options.keys(), permutation)) + print(f"--seed {seed}", *(f"--{k} {v}" for (k, v) in _options.items())) + ujson.dumps(data, **_options) + except KeyboardInterrupt: + pass + + +if __name__ == "__main__": + cli() Index: ujson-1.35/lib/ultrajson.h =================================================================== --- ujson-1.35.orig/lib/ultrajson.h +++ ujson-1.35/lib/ultrajson.h @@ -56,9 +56,6 @@ tree doesn't have cyclic references. #include <stdio.h> #include <wchar.h> -// Don't output any extra whitespaces when encoding -#define JSON_NO_EXTRA_WHITESPACE - // Max decimals to encode double floating point numbers with #ifndef JSON_DOUBLE_MAX_DECIMALS #define JSON_DOUBLE_MAX_DECIMALS 15
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor