From 60438b168e0faaf3a55845a169fe231db2cc78e0 Mon Sep 17 00:00:00 2001 From: lipengyu Date: Wed, 3 Sep 2025 01:18:05 +0800 Subject: [PATCH] fix CVE-2025-4516 --- backport-CVE-2025-4516.patch | 441 +++++++++++++++++++++++++++++++++++ python3.spec | 9 +- 2 files changed, 449 insertions(+), 1 deletion(-) create mode 100644 backport-CVE-2025-4516.patch diff --git a/backport-CVE-2025-4516.patch b/backport-CVE-2025-4516.patch new file mode 100644 index 0000000..10bf37d --- /dev/null +++ b/backport-CVE-2025-4516.patch @@ -0,0 +1,441 @@ +From 8d35fd1b34935221aff23a1ab69a429dd156be77 Mon Sep 17 00:00:00 2001 +From: Serhiy Storchaka +Date: Mon, 2 Jun 2025 18:58:01 +0300 +Subject: [PATCH] [3.9] gh-133767: Fix use-after-free in the unicode-escape + decoder with an error handler (GH-129648) (GH-133944) (#134346) + +* [3.9] gh-133767: Fix use-after-free in the unicode-escape decoder with an error handler (GH-129648) (GH-133944) + +If the error handler is used, a new bytes object is created to set as +the object attribute of UnicodeDecodeError, and that bytes object then +replaces the original data. A pointer to the decoded data will became invalid +after destroying that temporary bytes object. So we need other way to return +the first invalid escape from _PyUnicode_DecodeUnicodeEscapeInternal(). + +_PyBytes_DecodeEscape() does not have such issue, because it does not +use the error handlers registry, but it should be changed for compatibility +with _PyUnicode_DecodeUnicodeEscapeInternal(). +(cherry picked from commit 9f69a58623bd01349a18ba0c7a9cb1dad6a51e8e) +(cherry picked from commit 6279eb8c076d89d3739a6edb393e43c7929b429d) +(cherry picked from commit a75953b347716fff694aa59a7c7c2489fa50d1f5) +(cherry picked from commit 0c33e5baedf18ebcb04bc41dff7cfc614d5ea5fe) +(cherry picked from commit 8b528cacbbde60504f6ac62784d04889d285f18b) + +Co-authored-by: Serhiy Storchaka +--- + Include/cpython/bytesobject.h | 4 ++ + Include/cpython/unicodeobject.h | 13 ++++++ + Lib/test/test_codeccallbacks.py | 36 ++++++++++++++- + Lib/test/test_codecs.py | 39 ++++++++++++---- + ...-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst | 2 + + Objects/bytesobject.c | 40 ++++++++++++----- + Objects/unicodeobject.c | 45 ++++++++++++++----- + Parser/pegen/parse_string.c | 26 ++++++----- + 8 files changed, 164 insertions(+), 41 deletions(-) + create mode 100644 Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst + +diff --git a/Include/cpython/bytesobject.h b/Include/cpython/bytesobject.h +index f284c5835df..a17a1af907b 100644 +--- a/Include/cpython/bytesobject.h ++++ b/Include/cpython/bytesobject.h +@@ -25,6 +25,10 @@ PyAPI_FUNC(PyObject*) _PyBytes_FromHex( + int use_bytearray); + + /* Helper for PyBytes_DecodeEscape that detects invalid escape chars. */ ++PyAPI_FUNC(PyObject*) _PyBytes_DecodeEscape2(const char *, Py_ssize_t, ++ const char *, ++ int *, const char **); ++// Export for binary compatibility. + PyAPI_FUNC(PyObject *) _PyBytes_DecodeEscape(const char *, Py_ssize_t, + const char *, const char **); + +diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h +index 1b460c9f189..7c0eaf73327 100644 +--- a/Include/cpython/unicodeobject.h ++++ b/Include/cpython/unicodeobject.h +@@ -866,6 +866,19 @@ PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeStateful( + ); + /* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape + chars. */ ++PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal2( ++ const char *string, /* Unicode-Escape encoded string */ ++ Py_ssize_t length, /* size of string */ ++ const char *errors, /* error handling */ ++ Py_ssize_t *consumed, /* bytes consumed */ ++ int *first_invalid_escape_char, /* on return, if not -1, contain the first ++ invalid escaped char (<= 0xff) or invalid ++ octal escape (> 0xff) in string. */ ++ const char **first_invalid_escape_ptr); /* on return, if not NULL, may ++ point to the first invalid escaped ++ char in string. ++ May be NULL if errors is not NULL. */ ++// Export for binary compatibility. + PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal( + const char *string, /* Unicode-Escape encoded string */ + Py_ssize_t length, /* size of string */ +diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py +index 4991330489d..73b63770716 100644 +--- a/Lib/test/test_codeccallbacks.py ++++ b/Lib/test/test_codeccallbacks.py +@@ -1124,7 +1124,7 @@ class CodecCallbackTest(unittest.TestCase): + text = 'abcghi'*n + text.translate(charmap) + +- def test_mutatingdecodehandler(self): ++ def test_mutating_decode_handler(self): + baddata = [ + ("ascii", b"\xff"), + ("utf-7", b"++"), +@@ -1159,6 +1159,40 @@ class CodecCallbackTest(unittest.TestCase): + for (encoding, data) in baddata: + self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242") + ++ def test_mutating_decode_handler_unicode_escape(self): ++ decode = codecs.unicode_escape_decode ++ def mutating(exc): ++ if isinstance(exc, UnicodeDecodeError): ++ r = data.get(exc.object[:exc.end]) ++ if r is not None: ++ exc.object = r[0] + exc.object[exc.end:] ++ return ('\u0404', r[1]) ++ raise AssertionError("don't know how to handle %r" % exc) ++ ++ codecs.register_error('test.mutating2', mutating) ++ data = { ++ br'\x0': (b'\\', 0), ++ br'\x3': (b'xxx\\', 3), ++ br'\x5': (b'x\\', 1), ++ } ++ def check(input, expected, msg): ++ with self.assertWarns(DeprecationWarning) as cm: ++ self.assertEqual(decode(input, 'test.mutating2'), (expected, len(input))) ++ self.assertIn(msg, str(cm.warning)) ++ ++ check(br'\x0n\z', '\u0404\n\\z', r"invalid escape sequence '\z'") ++ check(br'\x0z', '\u0404\\z', r"invalid escape sequence '\z'") ++ ++ check(br'\x3n\zr', '\u0404\n\\zr', r"invalid escape sequence '\z'") ++ check(br'\x3zr', '\u0404\\zr', r"invalid escape sequence '\z'") ++ check(br'\x3z5', '\u0404\\z5', r"invalid escape sequence '\z'") ++ check(memoryview(br'\x3z5x')[:-1], '\u0404\\z5', r"invalid escape sequence '\z'") ++ check(memoryview(br'\x3z5xy')[:-2], '\u0404\\z5', r"invalid escape sequence '\z'") ++ ++ check(br'\x5n\z', '\u0404\n\\z', r"invalid escape sequence '\z'") ++ check(br'\x5z', '\u0404\\z', r"invalid escape sequence '\z'") ++ check(memoryview(br'\x5zy')[:-1], '\u0404\\z', r"invalid escape sequence '\z'") ++ + # issue32583 + def test_crashing_decode_handler(self): + # better generating one more character to fill the extra space slot +diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py +index 3520cc00a1a..74250ac3444 100644 +--- a/Lib/test/test_codecs.py ++++ b/Lib/test/test_codecs.py +@@ -1178,20 +1178,32 @@ class EscapeDecodeTest(unittest.TestCase): + check(br"[\501]", b"[A]") + check(br"[\x41]", b"[A]") + check(br"[\x410]", b"[A0]") ++ ++ def test_warnings(self): ++ decode = codecs.escape_decode ++ check = coding_checker(self, decode) + for i in range(97, 123): + b = bytes([i]) + if b not in b'abfnrtvx': +- with self.assertWarns(DeprecationWarning): ++ with self.assertWarnsRegex(DeprecationWarning, ++ r"invalid escape sequence '\\%c'" % i): + check(b"\\" + b, b"\\" + b) +- with self.assertWarns(DeprecationWarning): ++ with self.assertWarnsRegex(DeprecationWarning, ++ r"invalid escape sequence '\\%c'" % (i-32)): + check(b"\\" + b.upper(), b"\\" + b.upper()) +- with self.assertWarns(DeprecationWarning): ++ with self.assertWarnsRegex(DeprecationWarning, ++ r"invalid escape sequence '\\8'"): + check(br"\8", b"\\8") + with self.assertWarns(DeprecationWarning): + check(br"\9", b"\\9") +- with self.assertWarns(DeprecationWarning): ++ with self.assertWarnsRegex(DeprecationWarning, ++ r"invalid escape sequence '\\\xfa'") as cm: + check(b"\\\xfa", b"\\\xfa") + ++ with self.assertWarnsRegex(DeprecationWarning, ++ r"invalid escape sequence '\\z'"): ++ self.assertEqual(decode(br'\x\z', 'ignore'), (b'\\z', 4)) ++ + def test_errors(self): + decode = codecs.escape_decode + self.assertRaises(ValueError, decode, br"\x") +@@ -2393,20 +2405,31 @@ class UnicodeEscapeTest(ReadTest, unittest.TestCase): + check(br"[\x410]", "[A0]") + check(br"\u20ac", "\u20ac") + check(br"\U0001d120", "\U0001d120") ++ ++ def test_decode_warnings(self): ++ decode = codecs.unicode_escape_decode ++ check = coding_checker(self, decode) + for i in range(97, 123): + b = bytes([i]) + if b not in b'abfnrtuvx': +- with self.assertWarns(DeprecationWarning): ++ with self.assertWarnsRegex(DeprecationWarning, ++ r"invalid escape sequence '\\%c'" % i): + check(b"\\" + b, "\\" + chr(i)) + if b.upper() not in b'UN': +- with self.assertWarns(DeprecationWarning): ++ with self.assertWarnsRegex(DeprecationWarning, ++ r"invalid escape sequence '\\%c'" % (i-32)): + check(b"\\" + b.upper(), "\\" + chr(i-32)) +- with self.assertWarns(DeprecationWarning): ++ with self.assertWarnsRegex(DeprecationWarning, ++ r"invalid escape sequence '\\8'"): + check(br"\8", "\\8") + with self.assertWarns(DeprecationWarning): + check(br"\9", "\\9") +- with self.assertWarns(DeprecationWarning): ++ with self.assertWarnsRegex(DeprecationWarning, ++ r"invalid escape sequence '\\\xfa'") as cm: + check(b"\\\xfa", "\\\xfa") ++ with self.assertWarnsRegex(DeprecationWarning, ++ r"invalid escape sequence '\\z'"): ++ self.assertEqual(decode(br'\x\z', 'ignore'), ('\\z', 4)) + + def test_decode_errors(self): + decode = codecs.unicode_escape_decode +diff --git a/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst b/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst +new file mode 100644 +index 00000000000..39d2f1e1a89 +--- /dev/null ++++ b/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst +@@ -0,0 +1,2 @@ ++Fix use-after-free in the "unicode-escape" decoder with a non-"strict" error ++handler. +diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c +index 25d9814dd6d..f684e2eb336 100644 +--- a/Objects/bytesobject.c ++++ b/Objects/bytesobject.c +@@ -1060,10 +1060,11 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, + } + + /* Unescape a backslash-escaped string. */ +-PyObject *_PyBytes_DecodeEscape(const char *s, ++PyObject *_PyBytes_DecodeEscape2(const char *s, + Py_ssize_t len, + const char *errors, +- const char **first_invalid_escape) ++ int *first_invalid_escape_char, ++ const char **first_invalid_escape_ptr) + { + int c; + char *p; +@@ -1077,7 +1078,8 @@ PyObject *_PyBytes_DecodeEscape(const char *s, + return NULL; + writer.overallocate = 1; + +- *first_invalid_escape = NULL; ++ *first_invalid_escape_char = -1; ++ *first_invalid_escape_ptr = NULL; + + end = s + len; + while (s < end) { +@@ -1152,9 +1154,10 @@ PyObject *_PyBytes_DecodeEscape(const char *s, + break; + + default: +- if (*first_invalid_escape == NULL) { +- *first_invalid_escape = s-1; /* Back up one char, since we've +- already incremented s. */ ++ if (*first_invalid_escape_char == -1) { ++ *first_invalid_escape_char = (unsigned char)s[-1]; ++ /* Back up one char, since we've already incremented s. */ ++ *first_invalid_escape_ptr = s - 1; + } + *p++ = '\\'; + s--; +@@ -1168,21 +1171,36 @@ PyObject *_PyBytes_DecodeEscape(const char *s, + return NULL; + } + ++// Export for binary compatibility. ++PyObject *_PyBytes_DecodeEscape(const char *s, ++ Py_ssize_t len, ++ const char *errors, ++ const char **first_invalid_escape) ++{ ++ int first_invalid_escape_char; ++ return _PyBytes_DecodeEscape2( ++ s, len, errors, ++ &first_invalid_escape_char, ++ first_invalid_escape); ++} ++ + PyObject *PyBytes_DecodeEscape(const char *s, + Py_ssize_t len, + const char *errors, + Py_ssize_t Py_UNUSED(unicode), + const char *Py_UNUSED(recode_encoding)) + { +- const char* first_invalid_escape; +- PyObject *result = _PyBytes_DecodeEscape(s, len, errors, +- &first_invalid_escape); ++ int first_invalid_escape_char; ++ const char *first_invalid_escape_ptr; ++ PyObject *result = _PyBytes_DecodeEscape2(s, len, errors, ++ &first_invalid_escape_char, ++ &first_invalid_escape_ptr); + if (result == NULL) + return NULL; +- if (first_invalid_escape != NULL) { ++ if (first_invalid_escape_char != -1) { + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, + "invalid escape sequence '\\%c'", +- (unsigned char)*first_invalid_escape) < 0) { ++ first_invalid_escape_char) < 0) { + Py_DECREF(result); + return NULL; + } +diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c +index bd08b198781..cd1a0130149 100644 +--- a/Objects/unicodeobject.c ++++ b/Objects/unicodeobject.c +@@ -6278,20 +6278,23 @@ PyUnicode_AsUTF16String(PyObject *unicode) + static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL; + + PyObject * +-_PyUnicode_DecodeUnicodeEscapeInternal(const char *s, ++_PyUnicode_DecodeUnicodeEscapeInternal2(const char *s, + Py_ssize_t size, + const char *errors, + Py_ssize_t *consumed, +- const char **first_invalid_escape) ++ int *first_invalid_escape_char, ++ const char **first_invalid_escape_ptr) + { + const char *starts = s; ++ const char *initial_starts = starts; + _PyUnicodeWriter writer; + const char *end; + PyObject *errorHandler = NULL; + PyObject *exc = NULL; + + // so we can remember if we've seen an invalid escape char or not +- *first_invalid_escape = NULL; ++ *first_invalid_escape_char = -1; ++ *first_invalid_escape_ptr = NULL; + + if (size == 0) { + if (consumed) { +@@ -6474,9 +6477,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s, + goto error; + + default: +- if (*first_invalid_escape == NULL) { +- *first_invalid_escape = s-1; /* Back up one char, since we've +- already incremented s. */ ++ if (*first_invalid_escape_char == -1) { ++ *first_invalid_escape_char = c; ++ if (starts == initial_starts) { ++ /* Back up one char, since we've already incremented s. */ ++ *first_invalid_escape_ptr = s - 1; ++ } + } + WRITE_ASCII_CHAR('\\'); + WRITE_CHAR(c); +@@ -6515,22 +6521,39 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s, + return NULL; + } + ++// Export for binary compatibility. ++PyObject * ++_PyUnicode_DecodeUnicodeEscapeInternal(const char *s, ++ Py_ssize_t size, ++ const char *errors, ++ Py_ssize_t *consumed, ++ const char **first_invalid_escape) ++{ ++ int first_invalid_escape_char; ++ return _PyUnicode_DecodeUnicodeEscapeInternal2( ++ s, size, errors, consumed, ++ &first_invalid_escape_char, ++ first_invalid_escape); ++} ++ + PyObject * + _PyUnicode_DecodeUnicodeEscapeStateful(const char *s, + Py_ssize_t size, + const char *errors, + Py_ssize_t *consumed) + { +- const char *first_invalid_escape; +- PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal(s, size, errors, ++ int first_invalid_escape_char; ++ const char *first_invalid_escape_ptr; ++ PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal2(s, size, errors, + consumed, +- &first_invalid_escape); ++ &first_invalid_escape_char, ++ &first_invalid_escape_ptr); + if (result == NULL) + return NULL; +- if (first_invalid_escape != NULL) { ++ if (first_invalid_escape_char != -1) { + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, + "invalid escape sequence '\\%c'", +- (unsigned char)*first_invalid_escape) < 0) { ++ first_invalid_escape_char) < 0) { + Py_DECREF(result); + return NULL; + } +diff --git a/Parser/pegen/parse_string.c b/Parser/pegen/parse_string.c +index 15a132b4e05..9df1313c103 100644 +--- a/Parser/pegen/parse_string.c ++++ b/Parser/pegen/parse_string.c +@@ -119,12 +119,15 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t) + len = p - buf; + s = buf; + +- const char *first_invalid_escape; +- v = _PyUnicode_DecodeUnicodeEscapeInternal(s, len, NULL, NULL, &first_invalid_escape); +- +- if (v != NULL && first_invalid_escape != NULL) { +- if (warn_invalid_escape_sequence(parser, *first_invalid_escape, t) < 0) { +- /* We have not decref u before because first_invalid_escape points ++ int first_invalid_escape_char; ++ const char *first_invalid_escape_ptr; ++ v = _PyUnicode_DecodeUnicodeEscapeInternal2(s, (Py_ssize_t)len, NULL, NULL, ++ &first_invalid_escape_char, ++ &first_invalid_escape_ptr); ++ ++ if (v != NULL && first_invalid_escape_ptr != NULL) { ++ if (warn_invalid_escape_sequence(parser, *first_invalid_escape_ptr, t) < 0) { ++ /* We have not decref u before because first_invalid_escape_ptr points + inside u. */ + Py_XDECREF(u); + Py_DECREF(v); +@@ -138,14 +141,17 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t) + static PyObject * + decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t) + { +- const char *first_invalid_escape; +- PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, &first_invalid_escape); ++ int first_invalid_escape_char; ++ const char *first_invalid_escape_ptr; ++ PyObject *result = _PyBytes_DecodeEscape2(s, len, NULL, ++ &first_invalid_escape_char, ++ &first_invalid_escape_ptr); + if (result == NULL) { + return NULL; + } + +- if (first_invalid_escape != NULL) { +- if (warn_invalid_escape_sequence(p, *first_invalid_escape, t) < 0) { ++ if (first_invalid_escape_ptr != NULL) { ++ if (warn_invalid_escape_sequence(p, *first_invalid_escape_ptr, t) < 0) { + Py_DECREF(result); + return NULL; + } +-- +2.47.0.windows.2 + diff --git a/python3.spec b/python3.spec index 7fcd354..16e141c 100644 --- a/python3.spec +++ b/python3.spec @@ -3,7 +3,7 @@ Summary: Interpreter of the Python3 programming language URL: https://www.python.org/ Version: 3.9.9 -Release: 39 +Release: 40 License: Python-2.0 %global branchversion 3.9 @@ -128,6 +128,7 @@ Patch6034: backport-CVE-2024-11168-3.9-gh-103848-Adds-checks-to-ensure-that-brac Patch6035: backport-CVE-2025-0938.patch Patch6036: backport-CVE-2025-8194.patch Patch6037: backport-CVE-2025-1795.patch +Patch6038: backport-CVE-2025-4516.patch Patch9000: add-the-sm3-method-for-obtaining-the-salt-value.patch Patch9001: python3-Add-sw64-architecture.patch @@ -838,6 +839,12 @@ export BEP_GTDLIST="$BEP_GTDLIST_TMP" %{_mandir}/*/* %changelog +* Thu Sep 04 2025 lipengyu - 3.9.9-40 +- Type:CVE +- CVE:CVE-2025-4516 +- SUG:NA +- DESC:fix CVE-2025-4516 + * Mon Sep 01 2025 lipengyu - 3.9.9-39 - Type:CVE - CVE:CVE-2025-1795 -- Gitee