Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
openSUSE:Backports:SLE-15-SP6:Update
lscsoft-glue
lscsoft-glue-python-3.10-fixes.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File lscsoft-glue-python-3.10-fixes.patch of Package lscsoft-glue
From 0df2bc1b9a994e5c3c741fa0925470a4183833b3 Mon Sep 17 00:00:00 2001 From: Duncan Macleod <duncan.macleod@ligo.org> Date: Tue, 30 Nov 2021 16:42:29 +0000 Subject: [PATCH 1/2] glue.ligolw: fix python3.10 compatibility This is a cross-project cherry-pick of https://git.ligo.org/kipp.cannon/python-ligo-lw/-/commit/93ecb764202ee8cb56722441eb339fd1e719b5aa Co-authored-by: Kipp Cannon <kipp.cannon@ligo.org> --- glue/ligolw/tokenizer.RowDumper.c | 8 +- glue/ligolw/tokenizer.Tokenizer.c | 138 +++++++++++++++++++----------- 2 files changed, 91 insertions(+), 55 deletions(-) diff --git a/glue/ligolw/tokenizer.RowDumper.c b/glue/ligolw/tokenizer.RowDumper.c index 796586fd..dfc97ff7 100644 --- a/glue/ligolw/tokenizer.RowDumper.c +++ b/glue/ligolw/tokenizer.RowDumper.c @@ -30,6 +30,8 @@ #include <structmember.h> #include <stdlib.h> #include <tokenizer.h> +#include <wchar.h> +#include <wctype.h> /* @@ -91,7 +93,7 @@ static void __del__(PyObject *self) static int __init__(PyObject *self, PyObject *args, PyObject *kwds) { ligolw_RowDumper *rowdumper = (ligolw_RowDumper *) self; - Py_UNICODE default_delimiter = ','; + wchar_t default_delimiter = L','; rowdumper->delimiter = NULL; if(!PyArg_ParseTuple(args, "OO|U", &rowdumper->attributes, &rowdumper->formats, &rowdumper->delimiter)) @@ -100,7 +102,7 @@ static int __init__(PyObject *self, PyObject *args, PyObject *kwds) if(rowdumper->delimiter) Py_INCREF(rowdumper->delimiter); else - rowdumper->delimiter = PyUnicode_FromUnicode(&default_delimiter, 1); + rowdumper->delimiter = PyUnicode_FromWideChar(&default_delimiter, 1); rowdumper->attributes = llwtokenizer_build_attributes(rowdumper->attributes); rowdumper->formats = llwtokenizer_build_formats(rowdumper->formats); if(!rowdumper->delimiter || !rowdumper->attributes || !rowdumper->formats) @@ -220,7 +222,7 @@ static PyObject *next(PyObject *self) } if(val == Py_None) - token = PyUnicode_FromUnicode(NULL, 0); /* u"" */ + token = PyUnicode_FromWideChar(NULL, 0); /* u"" */ else token = PyObject_CallFunctionObjArgs(PyTuple_GET_ITEM(rowdumper->formats, i), val, NULL); Py_DECREF(val); diff --git a/glue/ligolw/tokenizer.Tokenizer.c b/glue/ligolw/tokenizer.Tokenizer.c index f38151e3..e5c25996 100644 --- a/glue/ligolw/tokenizer.Tokenizer.c +++ b/glue/ligolw/tokenizer.Tokenizer.c @@ -32,6 +32,8 @@ #include <stdlib.h> #include <string.h> #include <tokenizer.h> +#include <wchar.h> +#include <wctype.h> /* @@ -49,7 +51,7 @@ */ -static const Py_UNICODE default_quote_characters[] = {'\'', '\"', '\0'}; +static const wchar_t default_quote_characters[] = {L'\'', L'\"', 0}; /* @@ -66,19 +68,19 @@ typedef struct { /* the type to which the next parsed token will be converted */ PyObject **type; /* delimiter character to be used in parsing */ - Py_UNICODE delimiter; + wchar_t delimiter; /* the character(s) to interpret as a quote character */ - const Py_UNICODE *quote_characters; + const wchar_t *quote_characters; /* the character to interpret as the escape character */ - Py_UNICODE escape_character; + wchar_t escape_character; /* size of internal buffer, minus null terminator */ Py_ssize_t allocation; /* internal buffer */ - Py_UNICODE *data; + wchar_t *data; /* end of internal buffer's contents (null terminator) */ - Py_UNICODE *length; + wchar_t *length; /* current offset in buffer */ - Py_UNICODE *pos; + wchar_t *pos; } ligolw_Tokenizer; @@ -90,7 +92,12 @@ typedef struct { static int add_to_data(ligolw_Tokenizer *tokenizer, PyObject *unicode) { + /* FIXME: remove GET_SIZE vers. when we require python >= 3.12 */ +#ifndef PyUnicode_GET_LENGTH Py_ssize_t n = PyUnicode_GET_SIZE(unicode); +#else + Py_ssize_t n = PyUnicode_GET_LENGTH(unicode); +#endif if(n) { if(tokenizer->length - tokenizer->data + n > tokenizer->allocation) { @@ -106,7 +113,7 @@ static int add_to_data(ligolw_Tokenizer *tokenizer, PyObject *unicode) * the null terminator */ - Py_UNICODE *old_data = tokenizer->data; + wchar_t *old_data = tokenizer->data; tokenizer->data = realloc(tokenizer->data, (tokenizer->allocation + n + 1) * sizeof(*tokenizer->data)); if(!tokenizer->data) { @@ -132,7 +139,7 @@ static int add_to_data(ligolw_Tokenizer *tokenizer, PyObject *unicode) * terminator */ - memcpy(tokenizer->length, PyUnicode_AsUnicode(unicode), n * sizeof(*tokenizer->length)); + PyUnicode_AsWideChar(unicode, tokenizer->length, n); tokenizer->length += n; *tokenizer->length = 0; } @@ -184,7 +191,7 @@ static void unref_types(ligolw_Tokenizer *tokenizer) */ -static void parse_error(PyObject *exception, const Py_UNICODE *buffer, const ptrdiff_t buffer_length, const Py_UNICODE *pos, const char *msg) +static void parse_error(PyObject *exception, const wchar_t *buffer, const ptrdiff_t buffer_length, const wchar_t *pos, const char *msg) { PyObject *buffer_str; PyObject *pos_str; @@ -194,15 +201,15 @@ static void parse_error(PyObject *exception, const Py_UNICODE *buffer, const ptr buffer_str = PyUnicode_Encode(buffer, buffer_length, NULL, NULL); pos_str = PyUnicode_Encode(pos, 1, NULL, NULL); #else - buffer_str = PyUnicode_FromUnicode(buffer, buffer_length); - pos_str = PyUnicode_FromUnicode(pos, 1); + buffer_str = PyUnicode_FromWideChar(buffer, buffer_length); + pos_str = PyUnicode_FromWideChar(pos, 1); #endif if(buffer_str && pos_str) #if PY_MAJOR_VERSION < 3 PyErr_Format(exception, "parse error in '%s' near '%s' at position %td: %s", PyString_AS_STRING(buffer_str), PyString_AS_STRING(pos_str), pos - buffer + 1, msg); #else - PyErr_Format(exception, "parse error in '%U' near '%U' at position %td: %s", buffer_str, pos_str, pos - buffer + 1, msg); + PyErr_Format(exception, "parse error in '%U' near '%U' at position %zd: %s", buffer_str, pos_str, (Py_ssize_t) (pos - buffer + 1), msg); #endif else PyErr_Format(exception, "parse error (details not available): %s", msg); @@ -212,28 +219,14 @@ static void parse_error(PyObject *exception, const Py_UNICODE *buffer, const ptr } -/* - * Py_UNICODE equivalent of strchr() - */ - - -static const Py_UNICODE *pyunicode_strchr(const Py_UNICODE *s, Py_UNICODE c) -{ - for(; *s; s++) - if(*s == c) - return s; - return NULL; -} - - /* * Unescape a string. */ -static int unescape(Py_UNICODE *s, Py_UNICODE **end, const Py_UNICODE *escapable_characters, Py_UNICODE escape_character) +static int unescape(wchar_t *s, wchar_t **end, const wchar_t *escapable_characters, wchar_t escape_character) { - Py_UNICODE *start = s; + wchar_t *start = s; int escaped = 0; while(*s) { @@ -251,7 +244,7 @@ static int unescape(Py_UNICODE *s, Py_UNICODE **end, const Py_UNICODE *escapable * Check for an unrecognized escape sequence. */ - if(!pyunicode_strchr(escapable_characters, *s)) { + if(!wcschr(escapable_characters, *s)) { parse_error(PyExc_ValueError, start, *end - start - 1, s - 1, "unrecognized escape sequence"); return -1; } @@ -304,12 +297,12 @@ static int unescape(Py_UNICODE *s, Py_UNICODE **end, const Py_UNICODE *escapable */ -static PyObject *next_token(ligolw_Tokenizer *tokenizer, Py_UNICODE **start, Py_UNICODE **end) +static PyObject *next_token(ligolw_Tokenizer *tokenizer, wchar_t **start, wchar_t **end) { - Py_UNICODE *pos = tokenizer->pos; - Py_UNICODE *bailout = tokenizer->length; + wchar_t *pos = tokenizer->pos; + wchar_t *bailout = tokenizer->length; PyObject *type = *tokenizer->type; - Py_UNICODE quote_character; + wchar_t quote_character; /* * The following code matches the pattern: @@ -337,10 +330,10 @@ static PyObject *next_token(ligolw_Tokenizer *tokenizer, Py_UNICODE **start, Py_ if(pos >= bailout) goto stop_iteration; - while(Py_UNICODE_ISSPACE(*pos)) + while(iswspace(*pos)) if(++pos >= bailout) goto stop_iteration; - if(pyunicode_strchr(tokenizer->quote_characters, *pos)) { + if(wcschr(tokenizer->quote_characters, *pos)) { /* * Found a quoted token. */ @@ -368,7 +361,7 @@ static PyObject *next_token(ligolw_Tokenizer *tokenizer, Py_UNICODE **start, Py_ quote_character = 0; *start = pos; - while(!Py_UNICODE_ISSPACE(*pos) && (*pos != tokenizer->delimiter)) + while(!iswspace(*pos) && (*pos != tokenizer->delimiter)) if(++pos >= bailout) goto stop_iteration; *end = pos; @@ -382,7 +375,7 @@ static PyObject *next_token(ligolw_Tokenizer *tokenizer, Py_UNICODE **start, Py_ *start = *end = NULL; } while(*pos != tokenizer->delimiter) { - if(!Py_UNICODE_ISSPACE(*pos)) { + if(!iswspace(*pos)) { parse_error(PyExc_ValueError, *start, tokenizer->length - *start - 1, pos, "expected whitespace or delimiter"); return NULL; } @@ -416,7 +409,7 @@ static PyObject *next_token(ligolw_Tokenizer *tokenizer, Py_UNICODE **start, Py_ **end = 0; if(quote_character) { /* FIXME: remove the delimiter */ - Py_UNICODE escapable_characters[] = {quote_character, tokenizer->escape_character, tokenizer->delimiter, '\0'}; + wchar_t escapable_characters[] = {quote_character, tokenizer->escape_character, tokenizer->delimiter, 0}; if(unescape(*start, end, escapable_characters, tokenizer->escape_character)) return NULL; } @@ -453,6 +446,10 @@ static PyObject *append(PyObject *self, PyObject *data) int fail; if(PyUnicode_Check(data)) { + /* FIXME: remove when we require Python >= 3.12 */ +#ifdef PyUnicode_READY + PyUnicode_READY(data); +#endif fail = add_to_data((ligolw_Tokenizer *) self, data); /* FIXME: remove when we require >= 3 */ #if PY_MAJOR_VERSION < 3 @@ -508,12 +505,25 @@ static int __init__(PyObject *self, PyObject *args, PyObject *kwds) if(!PyArg_ParseTuple(args, "U", &arg)) return -1; + /* FIXME: remove when we require Python >= 3.12 */ +#ifdef PyUnicode_READY + PyUnicode_READY(arg); +#endif + + /* FIXME: remove _GET_SIZE vers. when we require Python >= 3.3 */ +#ifndef PyUnicode_GET_LENGTH if(PyUnicode_GET_SIZE(arg) != 1) { PyErr_SetString(PyExc_ValueError, "len(delimiter) != 1"); return -1; } +#else + if(PyUnicode_GET_LENGTH(arg) != 1) { + PyErr_SetString(PyExc_ValueError, "len(delimiter) != 1"); + return -1; + } +#endif - tokenizer->delimiter = *PyUnicode_AS_UNICODE(arg); + PyUnicode_AsWideChar(arg, &tokenizer->delimiter, 1); tokenizer->quote_characters = default_quote_characters; tokenizer->escape_character = '\\'; tokenizer->types = malloc(1 * sizeof(*tokenizer->types)); @@ -552,7 +562,7 @@ static PyObject *next(PyObject *self) ligolw_Tokenizer *tokenizer = (ligolw_Tokenizer *) self; PyObject *type; PyObject *token; - Py_UNICODE *start, *end; + wchar_t *start, *end; /* * Identify the start and end of the next token. @@ -576,23 +586,25 @@ static PyObject *next(PyObject *self) Py_INCREF(Py_None); token = Py_None; } else if(type == (PyObject *) &PyFloat_Type) { - char ascii_buffer[end - start + 1]; - char *ascii_end; - if(PyUnicode_EncodeDecimal(start, end - start, ascii_buffer, NULL)) - return NULL; - token = PyFloat_FromDouble(strtod(ascii_buffer, &ascii_end)); - if(ascii_end == ascii_buffer || *ascii_end != 0) { + wchar_t buffer[end - start + 1]; + wchar_t *buffer_end; + memcpy(buffer, start, (void *) end - (void *) start); + buffer[end - start] = 0; + token = PyFloat_FromDouble(wcstod(buffer, &buffer_end)); + if(buffer_end == buffer || *buffer_end != 0) { /* - * strtod() couldn't convert the token, emulate + * wcstod() couldn't convert the token, emulate * float()'s error message */ Py_XDECREF(token); - PyErr_Format(PyExc_ValueError, "invalid literal for float(): '%s'", ascii_buffer); + token = PyUnicode_FromWideChar(buffer, -1); + PyErr_Format(PyExc_ValueError, "invalid literal for float(): '%U'", token); + Py_DECREF(token); token = NULL; } } else if(type == (PyObject *) &PyUnicode_Type) { - token = PyUnicode_FromUnicode(start, end - start); + token = PyUnicode_FromWideChar(start, end - start); /* FIXME: remove when we require >= 3 */ #if PY_MAJOR_VERSION < 3 } else if(type == (PyObject *) &PyString_Type) { @@ -601,7 +613,29 @@ static PyObject *next(PyObject *self) token = PyInt_FromUnicode(start, end - start, 0); #endif } else if(type == (PyObject *) &PyLong_Type) { - token = PyLong_FromUnicode(start, end - start, 0); + wchar_t buffer[end - start + 1]; + wchar_t *buffer_end; + memcpy(buffer, start, (void *) end - (void *) start); + buffer[end - start] = 0; + /* FIXME: although Python supports arbitrary precision + * integers, this can only handle numbers that fit into a C + * long long. in practice, since we invariably + * interoperate with C codes, that should be sufficient, + * but it's a limitation of the library and should probably + * be fixed */ + token = PyLong_FromLongLong(wcstoll(buffer, &buffer_end, 0)); + if(buffer_end == buffer || *buffer_end != 0) { + /* + * wcstoll() couldn't convert the token, emulate + * long()'s error message + */ + + Py_XDECREF(token); + token = PyUnicode_FromWideChar(buffer, -1); + PyErr_Format(PyExc_ValueError, "invalid literal for long(): '%U'", token); + Py_DECREF(token); + token = NULL; + } } else { token = PyObject_CallFunction(type, "u#", start, end - start); } @@ -679,7 +713,7 @@ static PyObject *attribute_get_data(PyObject *obj, void *data) { ligolw_Tokenizer *tokenizer = (ligolw_Tokenizer *) obj; - return PyUnicode_FromUnicode(tokenizer->pos, tokenizer->length - tokenizer->pos); + return PyUnicode_FromWideChar(tokenizer->pos, tokenizer->length - tokenizer->pos); } -- GitLab
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor