File CVE-2021-45958-fix-buffer-overflows.patch of Package python-ujson.28874

Overview Repositories Revisions Requests Users Attributes Meta

File CVE-2021-45958-fix-buffer-overflows.patch of Package python-ujson.28874

From 60929c281b356fd71b8061921cb2cd1ac4f54b09 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Br=C3=A9nainn=20Woodsend?= <bwoodsend@gmail.com>
Date: Wed, 9 Feb 2022 20:44:01 +0000
Subject: [PATCH 01/11] Fix unchecked buffer overflows (CVE-2021-45958).

Add a few extra memory reserve calls to account for the extra space that
indentation needs.

These kinds of memory issues are hard to spot because the buffer is resized in
powers of 2 meaning that a miscalculation would only show any symptoms if the
required buffer size is estimated to be just below a 2 power but is actually
just above. Add a debug mode which replaces the 2 power scheme with reserving
only the memory explicitly requested and adds some overflow checks.
---
 .github/workflows/test.yml |   8 +
 lib/ultrajsonenc.c         |  64 ++-
 tests/334-reproducer.json  | 857 +++++++++++++++++++++++++++++++++++++
 tests/test_ujson.py        |  24 ++
 4 files changed, 944 insertions(+), 9 deletions(-)
 create mode 100644 tests/334-reproducer.json

Index: ujson-1.35/lib/ultrajsonenc.c
===================================================================
--- ujson-1.35.orig/lib/ultrajsonenc.c
+++ ujson-1.35/lib/ultrajsonenc.c
@@ -41,6 +41,7 @@ http://www.opensource.apple.com/source/t
 #include <assert.h>
 #include <string.h>
 #include <stdlib.h>
+#include <stddef.h>
 #include <math.h>
 
 #include <float.h>
@@ -114,14 +115,25 @@ FIXME: Keep track of how big these get a
 That way we won't run our head into the wall each call */
 void Buffer_Realloc (JSONObjectEncoder *enc, size_t cbNeeded)
 {
+  size_t free_space = enc->end - enc->offset;
+  if (free_space >= cbNeeded)
+  {
+    return;
+  }
   size_t curSize = enc->end - enc->start;
-  size_t newSize = curSize * 2;
+  size_t newSize = curSize;
   size_t offset = enc->offset - enc->start;
 
+#ifdef DEBUG
+  // In debug mode, allocate only what is requested so that any miscalculation
+  // shows up plainly as a crash.
+  newSize = (enc->offset - enc->start) + cbNeeded;
+#else
   while (newSize < curSize + cbNeeded)
   {
     newSize *= 2;
   }
+#endif
 
   if (enc->heap)
   {
@@ -148,6 +160,12 @@ void Buffer_Realloc (JSONObjectEncoder *
   enc->end = enc->start + newSize;
 }
 
+#define Buffer_Reserve(__enc, __len) \
+    if ( (size_t) ((__enc)->end - (__enc)->offset) < (size_t) (__len))  \
+    {   \
+      Buffer_Realloc((__enc), (__len));\
+    }   \
+
 FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC Buffer_AppendShortHexUnchecked (char *outputOffset, unsigned short value)
 {
   *(outputOffset++) = g_hexChars[(value & 0xf000) >> 12];
@@ -267,6 +285,13 @@ int Buffer_EscapeStringValidated (JSOBJ
 
   for (;;)
   {
+#ifdef DEBUG
+  // 6 is the maximum length of a single character (cf. RESERVE_STRING).
+  if ((io < end) && (enc->end - of < 6)) {
+    fprintf(stderr, "Ran out of buffer space during Buffer_EscapeStringValidated()\n");
+    abort();
+  }
+#endif
     JSUINT8 utflen = g_asciiOutputTable[(unsigned char) *io];
 
     switch (utflen)
@@ -488,15 +513,28 @@ int Buffer_EscapeStringValidated (JSOBJ
   }
 }
 
-#define Buffer_Reserve(__enc, __len) \
-    if ( (size_t) ((__enc)->end - (__enc)->offset) < (size_t) (__len))  \
-    {   \
-      Buffer_Realloc((__enc), (__len));\
-    }   \
-
 
-#define Buffer_AppendCharUnchecked(__enc, __chr) \
-                *((__enc)->offset++) = __chr; \
+static FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC Buffer_AppendCharUnchecked(JSONObjectEncoder *enc, char chr)
+{
+#ifdef DEBUG
+  if (enc->end <= enc->offset)
+  {
+    fprintf(stderr, "Overflow writing byte %d '%c'. The last few characters were:\n'''", chr, chr);
+    char * recent = enc->offset - 1000;
+    if (enc->start > recent)
+    {
+      recent = enc->start;
+    }
+    for (; recent < enc->offset; recent++)
+    {
+      fprintf(stderr, "%c", *recent);
+    }
+    fprintf(stderr, "'''\n");
+    abort();
+  }
+#endif
+  *(enc->offset++) = chr;
+}
 
 FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC strreverse(char* begin, char* end)
 {
@@ -729,14 +767,6 @@ void encode(JSOBJ obj, JSONObjectEncoder
     return;
   }
 
-  /*
-  This reservation must hold
-
-  length of _name as encoded worst case +
-  maxLength of double to string OR maxLength of JSLONG to string
-  */
-
-  Buffer_Reserve(enc, 256 + RESERVE_STRING(cbName));
   if (enc->errorMsg)
   {
     return;
@@ -744,6 +774,8 @@ void encode(JSOBJ obj, JSONObjectEncoder
 
   if (name)
   {
+    // 2 extra for the colon and optional space after it
+    Buffer_Reserve(enc, RESERVE_STRING(cbName) + 2);
     Buffer_AppendCharUnchecked(enc, '\"');
 
     if (enc->forceASCII)
@@ -764,14 +796,22 @@ void encode(JSOBJ obj, JSONObjectEncoder
     Buffer_AppendCharUnchecked(enc, '\"');
 
     Buffer_AppendCharUnchecked (enc, ':');
-#ifndef JSON_NO_EXTRA_WHITESPACE
-    Buffer_AppendCharUnchecked (enc, ' ');
-#endif
     }
 
     tc.encoder_prv = enc->prv;
     enc->beginTypeContext(obj, &tc, enc);
 
+    /*
+    This reservation covers any additions on non-variable parts below, specifically:
+    - Opening brackets for JT_ARRAY and JT_OBJECT
+    - Number representation for JT_LONG, JT_ULONG, JT_INT, and JT_DOUBLE
+    - Constant value for JT_TRUE, JT_FALSE, JT_NULL
+   
+    The length of 128 is the worst case length of the Buffer_AppendDoubleDconv addition.
+    The other types above all have smaller representations.
+    */
+    Buffer_Reserve (enc, 128);
+
     switch (tc.type)
     {
       case JT_INVALID:
@@ -788,12 +828,11 @@ void encode(JSOBJ obj, JSONObjectEncoder
 
         while (enc->iterNext(obj, &tc))
         {
+	  Buffer_Reserve (enc, enc->indent * (enc->level + 1) + 2);
+
           if (count > 0)
           {
             Buffer_AppendCharUnchecked (enc, ',');
-#ifndef JSON_NO_EXTRA_WHITESPACE
-            Buffer_AppendCharUnchecked (buffer, ' ');
-#endif
             Buffer_AppendIndentNewlineUnchecked (enc);
           }
 
@@ -806,8 +845,10 @@ void encode(JSOBJ obj, JSONObjectEncoder
       }
 
       enc->iterEnd(obj, &tc);
+      Buffer_Reserve (enc, enc->indent * enc->level + 1);
       Buffer_AppendIndentNewlineUnchecked (enc);
       Buffer_AppendIndentUnchecked (enc, enc->level);
+      Buffer_Reserve (enc, 1);
       Buffer_AppendCharUnchecked (enc, ']');
       break;
   }
@@ -821,12 +862,10 @@ void encode(JSOBJ obj, JSONObjectEncoder
 
     while (enc->iterNext(obj, &tc))
     {
+      Buffer_Reserve (enc, enc->indent * (enc->level + 1) + 2);
       if (count > 0)
       {
         Buffer_AppendCharUnchecked (enc, ',');
-#ifndef JSON_NO_EXTRA_WHITESPACE
-        Buffer_AppendCharUnchecked (enc, ' ');
-#endif
         Buffer_AppendIndentNewlineUnchecked (enc);
       }
 
@@ -840,8 +879,10 @@ void encode(JSOBJ obj, JSONObjectEncoder
     }
 
     enc->iterEnd(obj, &tc);
+    Buffer_Reserve (enc, enc->indent * enc->level + 1);
     Buffer_AppendIndentNewlineUnchecked (enc);
     Buffer_AppendIndentUnchecked (enc, enc->level);
+    Buffer_Reserve (enc, 1);
     Buffer_AppendCharUnchecked (enc, '}');
     break;
   }
@@ -953,7 +994,7 @@ void encode(JSOBJ obj, JSONObjectEncoder
         return;
       }
 
-      Buffer_Reserve(enc, RESERVE_STRING(szlen));
+      Buffer_Reserve(enc, szlen);
       if (enc->errorMsg)
       {
         enc->endTypeContext(obj, &tc);
Index: ujson-1.35/tests/tests.py
===================================================================
--- ujson-1.35.orig/tests/tests.py
+++ ujson-1.35/tests/tests.py
@@ -952,6 +952,25 @@ def test_decodeStringUTF8(self):
 input = "someutfcharacters"
 raise NotImplementedError("Implement this test!")
 
+@pytest.mark.parametrize("indent", list(range(65537, 65542)))
+def test_dump_huge_indent(indent):
+    ujson.encode({"a": True}, indent=indent)
+
+
+@pytest.mark.parametrize("first_length", list(range(2, 7)))
+@pytest.mark.parametrize("second_length", list(range(10919, 10924)))
+def test_dump_long_string(first_length, second_length):
+    ujson.dumps(["a" * first_length, "\x00" * second_length])
+
+
+def test_dump_indented_nested_list():
+    a = _a = []
+    for i in range(20):
+        _a.append(list(range(i)))
+        _a = _a[-1]
+        ujson.dumps(a, indent=i)
+
+
 """
 
 if __name__ == "__main__":
Index: ujson-1.35/tests/fuzz.py
===================================================================
--- /dev/null
+++ ujson-1.35/tests/fuzz.py
@@ -0,0 +1,169 @@
+"""
+A brute force fuzzer for detecting memory issues in ujson.dumps(). To use, first
+compile ujson in debug mode:
+
+    CFLAGS='-DDEBUG' python setup.py -q build_ext --inplace -f
+
+Then run without arguments:
+
+    python tests/fuzz.py
+
+If it crashes, the last line of output is the arguments to reproduce the
+failure.
+
+    python tests/fuzz.py {{ last line of output before crash }}
+
+Adding --dump-python or --dump-json will print the object it intends to
+serialise as either a Python literal or in JSON.
+
+"""
+
+import argparse
+import itertools
+import json
+import math
+import random
+import re
+from pprint import pprint
+
+import ujson
+
+
+class FuzzGenerator:
+    """A random JSON serialisable object generator."""
+
+    def __init__(self, seed=None):
+        self._randomizer = random.Random(seed)
+        self._shrink = 1
+
+    def key(self):
+        key_types = [self.int, self.float, self.string, self.null, self.bool]
+        return self._randomizer.choice(key_types)()
+
+    def item(self):
+        if self._randomizer.random() > 0.8:
+            return self.key()
+        return self._randomizer.choice([self.list, self.dict])()
+
+    def int(self):
+        return int(self.float())
+
+    def float(self):
+        sign = self._randomizer.choice([-1, 1, 0])
+        return sign * math.exp(self._randomizer.uniform(-40, 40))
+
+    def string(self):
+        characters = ["\x00", "\t", "a", "\U0001f680", "<></>", "\u1234"]
+        return self._randomizer.choice(characters) * self.length()
+
+    def bool(self):
+        return self._randomizer.random() < 0.5
+
+    def null(self):
+        return None
+
+    def list(self):
+        return [self.item() for i in range(self.length())]
+
+    def dict(self):
+        return {self.key(): self.item() for i in range(self.length())}
+
+    def length(self):
+        self._shrink *= 0.99
+        return int(math.exp(self._randomizer.uniform(-0.5, 5)) * self._shrink)
+
+
+def random_object(seed=None):
+    return FuzzGenerator(seed).item()
+
+
+class RangeOption(argparse.Action):
+    def __call__(self, parser, namespace, values, option_string=None):
+        values = re.findall("[^: ]+", values)
+        if len(values) == 1:
+            values = (int(values[0]),)
+        else:
+            values = range(*map(int, values))
+        setattr(namespace, self.dest, values)
+
+
+class ListOption(argparse.Action):
+    def __call__(self, parser, namespace, values, option_string=None):
+        values = tuple(map(int, re.findall("[^, ]+", values)))
+        setattr(namespace, self.dest, values)
+
+
+parser = argparse.ArgumentParser(
+    epilog=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
+)
+parser.add_argument(
+    "--seed",
+    default=range(100),
+    action=RangeOption,
+    dest="seeds",
+    help="A seed or range of seeds (in the form start:end[:step]) "
+    "to initialise the randomizer.",
+)
+parser.add_argument(
+    "--indent",
+    default=(0, 1, 2, 3, 4, 5, 12, 100, 1000),
+    action=ListOption,
+    help="A comma separated sequence of indentation lengths to test.",
+)
+parser.add_argument(
+    "--ensure_ascii",
+    default=(0, 1),
+    action=ListOption,
+    help="Sets the ensure_ascii option to ujson.dumps(). "
+    "May be 0 or 1 or 0,1 to testboth.",
+)
+parser.add_argument(
+    "--encode_html_chars",
+    default=(0, 1),
+    action=ListOption,
+    help="Sets the encode_html_chars option to ujson.dumps(). "
+    "May be 0 or 1 or 0,1 to test both.",
+)
+parser.add_argument(
+    "--escape_forward_slashes",
+    default=(0, 1),
+    action=ListOption,
+    help="Sets the escape_forward_slashes option to ujson.dumps(). "
+    "May be 0 or 1 or 0,1 to test both.",
+)
+parser.add_argument(
+    "--dump-python",
+    action="store_true",
+    help="Print the randomly generated object as a Python literal and exit.",
+)
+parser.add_argument(
+    "--dump-json",
+    action="store_true",
+    help="Print the randomly generated object in JSON format and exit.",
+)
+
+
+def cli(args=None):
+    options = dict(parser.parse_args(args)._get_kwargs())
+    if options.pop("dump_json"):
+        print(json.dumps(random_object(options["seeds"][0]), indent=2))
+    elif options.pop("dump_python"):
+        pprint(random_object(options["seeds"][0]))
+    else:
+        fuzz(**options)
+
+
+def fuzz(seeds, **options):
+    try:
+        for seed in seeds:
+            data = random_object(seed)
+            for permutation in itertools.product(*options.values()):
+                _options = dict(zip(options.keys(), permutation))
+                print(f"--seed {seed}", *(f"--{k} {v}" for (k, v) in _options.items()))
+                ujson.dumps(data, **_options)
+    except KeyboardInterrupt:
+        pass
+
+
+if __name__ == "__main__":
+    cli()
Index: ujson-1.35/lib/ultrajson.h
===================================================================
--- ujson-1.35.orig/lib/ultrajson.h
+++ ujson-1.35/lib/ultrajson.h
@@ -56,9 +56,6 @@ tree doesn't have cyclic references.
 #include <stdio.h>
 #include <wchar.h>
 
-// Don't output any extra whitespaces when encoding
-#define JSON_NO_EXTRA_WHITESPACE
-
 // Max decimals to encode double floating point numbers with
 #ifndef JSON_DOUBLE_MAX_DECIMALS
 #define JSON_DOUBLE_MAX_DECIMALS 15

Places

File CVE-2021-45958-fix-buffer-overflows.patch of Package python-ujson.28874

Places