From 60438b168e0faaf3a55845a169fe231db2cc78e0 Mon Sep 17 00:00:00 2001
From: lipengyu <lipengyu@kylinos.cn>
Date: Wed, 3 Sep 2025 01:18:05 +0800
Subject: [PATCH] fix CVE-2025-4516

---
 backport-CVE-2025-4516.patch | 441 +++++++++++++++++++++++++++++++++++
 python3.spec                 |   9 +-
 2 files changed, 449 insertions(+), 1 deletion(-)
 create mode 100644 backport-CVE-2025-4516.patch

diff --git a/backport-CVE-2025-4516.patch b/backport-CVE-2025-4516.patch
new file mode 100644
index 0000000..10bf37d
--- /dev/null
+++ b/backport-CVE-2025-4516.patch
@@ -0,0 +1,441 @@
+From 8d35fd1b34935221aff23a1ab69a429dd156be77 Mon Sep 17 00:00:00 2001
+From: Serhiy Storchaka <storchaka@gmail.com>
+Date: Mon, 2 Jun 2025 18:58:01 +0300
+Subject: [PATCH] [3.9] gh-133767: Fix use-after-free in the unicode-escape
+ decoder with an error handler (GH-129648) (GH-133944) (#134346)
+
+* [3.9] gh-133767: Fix use-after-free in the unicode-escape decoder with an error handler (GH-129648) (GH-133944)
+
+If the error handler is used, a new bytes object is created to set as
+the object attribute of UnicodeDecodeError, and that bytes object then
+replaces the original data. A pointer to the decoded data will became invalid
+after destroying that temporary bytes object. So we need other way to return
+the first invalid escape from _PyUnicode_DecodeUnicodeEscapeInternal().
+
+_PyBytes_DecodeEscape() does not have such issue, because it does not
+use the error handlers registry, but it should be changed for compatibility
+with _PyUnicode_DecodeUnicodeEscapeInternal().
+(cherry picked from commit 9f69a58623bd01349a18ba0c7a9cb1dad6a51e8e)
+(cherry picked from commit 6279eb8c076d89d3739a6edb393e43c7929b429d)
+(cherry picked from commit a75953b347716fff694aa59a7c7c2489fa50d1f5)
+(cherry picked from commit 0c33e5baedf18ebcb04bc41dff7cfc614d5ea5fe)
+(cherry picked from commit 8b528cacbbde60504f6ac62784d04889d285f18b)
+
+Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
+---
+ Include/cpython/bytesobject.h                 |  4 ++
+ Include/cpython/unicodeobject.h               | 13 ++++++
+ Lib/test/test_codeccallbacks.py               | 36 ++++++++++++++-
+ Lib/test/test_codecs.py                       | 39 ++++++++++++----
+ ...-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst |  2 +
+ Objects/bytesobject.c                         | 40 ++++++++++++-----
+ Objects/unicodeobject.c                       | 45 ++++++++++++++-----
+ Parser/pegen/parse_string.c                   | 26 ++++++-----
+ 8 files changed, 164 insertions(+), 41 deletions(-)
+ create mode 100644 Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst
+
+diff --git a/Include/cpython/bytesobject.h b/Include/cpython/bytesobject.h
+index f284c5835df..a17a1af907b 100644
+--- a/Include/cpython/bytesobject.h
++++ b/Include/cpython/bytesobject.h
+@@ -25,6 +25,10 @@ PyAPI_FUNC(PyObject*) _PyBytes_FromHex(
+     int use_bytearray);
+ 
+ /* Helper for PyBytes_DecodeEscape that detects invalid escape chars. */
++PyAPI_FUNC(PyObject*) _PyBytes_DecodeEscape2(const char *, Py_ssize_t,
++                                             const char *,
++                                             int *, const char **);
++// Export for binary compatibility.
+ PyAPI_FUNC(PyObject *) _PyBytes_DecodeEscape(const char *, Py_ssize_t,
+                                              const char *, const char **);
+ 
+diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h
+index 1b460c9f189..7c0eaf73327 100644
+--- a/Include/cpython/unicodeobject.h
++++ b/Include/cpython/unicodeobject.h
+@@ -866,6 +866,19 @@ PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeStateful(
+ );
+ /* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
+    chars. */
++PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal2(
++    const char *string,     /* Unicode-Escape encoded string */
++    Py_ssize_t length,      /* size of string */
++    const char *errors,     /* error handling */
++    Py_ssize_t *consumed,   /* bytes consumed */
++    int *first_invalid_escape_char, /* on return, if not -1, contain the first
++                                       invalid escaped char (<= 0xff) or invalid
++                                       octal escape (> 0xff) in string. */
++    const char **first_invalid_escape_ptr); /* on return, if not NULL, may
++                                        point to the first invalid escaped
++                                        char in string.
++                                        May be NULL if errors is not NULL. */
++// Export for binary compatibility.
+ PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal(
+         const char *string,     /* Unicode-Escape encoded string */
+         Py_ssize_t length,      /* size of string */
+diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py
+index 4991330489d..73b63770716 100644
+--- a/Lib/test/test_codeccallbacks.py
++++ b/Lib/test/test_codeccallbacks.py
+@@ -1124,7 +1124,7 @@ class CodecCallbackTest(unittest.TestCase):
+             text = 'abc<def>ghi'*n
+             text.translate(charmap)
+ 
+-    def test_mutatingdecodehandler(self):
++    def test_mutating_decode_handler(self):
+         baddata = [
+             ("ascii", b"\xff"),
+             ("utf-7", b"++"),
+@@ -1159,6 +1159,40 @@ class CodecCallbackTest(unittest.TestCase):
+         for (encoding, data) in baddata:
+             self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242")
+ 
++    def test_mutating_decode_handler_unicode_escape(self):
++        decode = codecs.unicode_escape_decode
++        def mutating(exc):
++            if isinstance(exc, UnicodeDecodeError):
++                r = data.get(exc.object[:exc.end])
++                if r is not None:
++                    exc.object = r[0] + exc.object[exc.end:]
++                    return ('\u0404', r[1])
++            raise AssertionError("don't know how to handle %r" % exc)
++
++        codecs.register_error('test.mutating2', mutating)
++        data = {
++            br'\x0': (b'\\', 0),
++            br'\x3': (b'xxx\\', 3),
++            br'\x5': (b'x\\', 1),
++        }
++        def check(input, expected, msg):
++            with self.assertWarns(DeprecationWarning) as cm:
++                self.assertEqual(decode(input, 'test.mutating2'), (expected, len(input)))
++            self.assertIn(msg, str(cm.warning))
++
++        check(br'\x0n\z', '\u0404\n\\z', r"invalid escape sequence '\z'")
++        check(br'\x0z', '\u0404\\z', r"invalid escape sequence '\z'")
++
++        check(br'\x3n\zr', '\u0404\n\\zr', r"invalid escape sequence '\z'")
++        check(br'\x3zr', '\u0404\\zr', r"invalid escape sequence '\z'")
++        check(br'\x3z5', '\u0404\\z5', r"invalid escape sequence '\z'")
++        check(memoryview(br'\x3z5x')[:-1], '\u0404\\z5', r"invalid escape sequence '\z'")
++        check(memoryview(br'\x3z5xy')[:-2], '\u0404\\z5', r"invalid escape sequence '\z'")
++
++        check(br'\x5n\z', '\u0404\n\\z', r"invalid escape sequence '\z'")
++        check(br'\x5z', '\u0404\\z', r"invalid escape sequence '\z'")
++        check(memoryview(br'\x5zy')[:-1], '\u0404\\z', r"invalid escape sequence '\z'")
++
+     # issue32583
+     def test_crashing_decode_handler(self):
+         # better generating one more character to fill the extra space slot
+diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
+index 3520cc00a1a..74250ac3444 100644
+--- a/Lib/test/test_codecs.py
++++ b/Lib/test/test_codecs.py
+@@ -1178,20 +1178,32 @@ class EscapeDecodeTest(unittest.TestCase):
+         check(br"[\501]", b"[A]")
+         check(br"[\x41]", b"[A]")
+         check(br"[\x410]", b"[A0]")
++
++    def test_warnings(self):
++        decode = codecs.escape_decode
++        check = coding_checker(self, decode)
+         for i in range(97, 123):
+             b = bytes([i])
+             if b not in b'abfnrtvx':
+-                with self.assertWarns(DeprecationWarning):
++                with self.assertWarnsRegex(DeprecationWarning,
++                        r"invalid escape sequence '\\%c'" % i):
+                     check(b"\\" + b, b"\\" + b)
+-            with self.assertWarns(DeprecationWarning):
++            with self.assertWarnsRegex(DeprecationWarning,
++                    r"invalid escape sequence '\\%c'" % (i-32)):
+                 check(b"\\" + b.upper(), b"\\" + b.upper())
+-        with self.assertWarns(DeprecationWarning):
++        with self.assertWarnsRegex(DeprecationWarning,
++                r"invalid escape sequence '\\8'"):
+             check(br"\8", b"\\8")
+         with self.assertWarns(DeprecationWarning):
+             check(br"\9", b"\\9")
+-        with self.assertWarns(DeprecationWarning):
++        with self.assertWarnsRegex(DeprecationWarning,
++                r"invalid escape sequence '\\\xfa'") as cm:
+             check(b"\\\xfa", b"\\\xfa")
+ 
++        with self.assertWarnsRegex(DeprecationWarning,
++                r"invalid escape sequence '\\z'"):
++            self.assertEqual(decode(br'\x\z', 'ignore'), (b'\\z', 4))
++
+     def test_errors(self):
+         decode = codecs.escape_decode
+         self.assertRaises(ValueError, decode, br"\x")
+@@ -2393,20 +2405,31 @@ class UnicodeEscapeTest(ReadTest, unittest.TestCase):
+         check(br"[\x410]", "[A0]")
+         check(br"\u20ac", "\u20ac")
+         check(br"\U0001d120", "\U0001d120")
++
++    def test_decode_warnings(self):
++        decode = codecs.unicode_escape_decode
++        check = coding_checker(self, decode)
+         for i in range(97, 123):
+             b = bytes([i])
+             if b not in b'abfnrtuvx':
+-                with self.assertWarns(DeprecationWarning):
++                with self.assertWarnsRegex(DeprecationWarning,
++                        r"invalid escape sequence '\\%c'" % i):
+                     check(b"\\" + b, "\\" + chr(i))
+             if b.upper() not in b'UN':
+-                with self.assertWarns(DeprecationWarning):
++                with self.assertWarnsRegex(DeprecationWarning,
++                        r"invalid escape sequence '\\%c'" % (i-32)):
+                     check(b"\\" + b.upper(), "\\" + chr(i-32))
+-        with self.assertWarns(DeprecationWarning):
++        with self.assertWarnsRegex(DeprecationWarning,
++                r"invalid escape sequence '\\8'"):
+             check(br"\8", "\\8")
+         with self.assertWarns(DeprecationWarning):
+             check(br"\9", "\\9")
+-        with self.assertWarns(DeprecationWarning):
++        with self.assertWarnsRegex(DeprecationWarning,
++                r"invalid escape sequence '\\\xfa'") as cm:
+             check(b"\\\xfa", "\\\xfa")
++        with self.assertWarnsRegex(DeprecationWarning,
++                r"invalid escape sequence '\\z'"):
++            self.assertEqual(decode(br'\x\z', 'ignore'), ('\\z', 4))
+ 
+     def test_decode_errors(self):
+         decode = codecs.unicode_escape_decode
+diff --git a/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst b/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst
+new file mode 100644
+index 00000000000..39d2f1e1a89
+--- /dev/null
++++ b/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst
+@@ -0,0 +1,2 @@
++Fix use-after-free in the "unicode-escape" decoder with a non-"strict" error
++handler.
+diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
+index 25d9814dd6d..f684e2eb336 100644
+--- a/Objects/bytesobject.c
++++ b/Objects/bytesobject.c
+@@ -1060,10 +1060,11 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
+ }
+ 
+ /* Unescape a backslash-escaped string. */
+-PyObject *_PyBytes_DecodeEscape(const char *s,
++PyObject *_PyBytes_DecodeEscape2(const char *s,
+                                 Py_ssize_t len,
+                                 const char *errors,
+-                                const char **first_invalid_escape)
++                                int *first_invalid_escape_char,
++                                const char **first_invalid_escape_ptr)
+ {
+     int c;
+     char *p;
+@@ -1077,7 +1078,8 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
+         return NULL;
+     writer.overallocate = 1;
+ 
+-    *first_invalid_escape = NULL;
++    *first_invalid_escape_char = -1;
++    *first_invalid_escape_ptr = NULL;
+ 
+     end = s + len;
+     while (s < end) {
+@@ -1152,9 +1154,10 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
+             break;
+ 
+         default:
+-            if (*first_invalid_escape == NULL) {
+-                *first_invalid_escape = s-1; /* Back up one char, since we've
+-                                                already incremented s. */
++            if (*first_invalid_escape_char == -1) {
++                *first_invalid_escape_char = (unsigned char)s[-1];
++                /* Back up one char, since we've already incremented s. */
++                *first_invalid_escape_ptr = s - 1;
+             }
+             *p++ = '\\';
+             s--;
+@@ -1168,21 +1171,36 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
+     return NULL;
+ }
+ 
++// Export for binary compatibility.
++PyObject *_PyBytes_DecodeEscape(const char *s,
++                                Py_ssize_t len,
++                                const char *errors,
++                                const char **first_invalid_escape)
++{
++    int first_invalid_escape_char;
++    return _PyBytes_DecodeEscape2(
++            s, len, errors,
++            &first_invalid_escape_char,
++            first_invalid_escape);
++}
++
+ PyObject *PyBytes_DecodeEscape(const char *s,
+                                 Py_ssize_t len,
+                                 const char *errors,
+                                 Py_ssize_t Py_UNUSED(unicode),
+                                 const char *Py_UNUSED(recode_encoding))
+ {
+-    const char* first_invalid_escape;
+-    PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
+-                                             &first_invalid_escape);
++    int first_invalid_escape_char;
++    const char *first_invalid_escape_ptr;
++    PyObject *result = _PyBytes_DecodeEscape2(s, len, errors,
++                                             &first_invalid_escape_char,
++                                             &first_invalid_escape_ptr);
+     if (result == NULL)
+         return NULL;
+-    if (first_invalid_escape != NULL) {
++    if (first_invalid_escape_char != -1) {
+         if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
+                              "invalid escape sequence '\\%c'",
+-                             (unsigned char)*first_invalid_escape) < 0) {
++                             first_invalid_escape_char) < 0) {
+             Py_DECREF(result);
+             return NULL;
+         }
+diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
+index bd08b198781..cd1a0130149 100644
+--- a/Objects/unicodeobject.c
++++ b/Objects/unicodeobject.c
+@@ -6278,20 +6278,23 @@ PyUnicode_AsUTF16String(PyObject *unicode)
+ static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;
+ 
+ PyObject *
+-_PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
++_PyUnicode_DecodeUnicodeEscapeInternal2(const char *s,
+                                Py_ssize_t size,
+                                const char *errors,
+                                Py_ssize_t *consumed,
+-                               const char **first_invalid_escape)
++                               int *first_invalid_escape_char,
++                               const char **first_invalid_escape_ptr)
+ {
+     const char *starts = s;
++    const char *initial_starts = starts;
+     _PyUnicodeWriter writer;
+     const char *end;
+     PyObject *errorHandler = NULL;
+     PyObject *exc = NULL;
+ 
+     // so we can remember if we've seen an invalid escape char or not
+-    *first_invalid_escape = NULL;
++    *first_invalid_escape_char = -1;
++    *first_invalid_escape_ptr = NULL;
+ 
+     if (size == 0) {
+         if (consumed) {
+@@ -6474,9 +6477,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
+             goto error;
+ 
+         default:
+-            if (*first_invalid_escape == NULL) {
+-                *first_invalid_escape = s-1; /* Back up one char, since we've
+-                                                already incremented s. */
++            if (*first_invalid_escape_char == -1) {
++                *first_invalid_escape_char = c;
++                if (starts == initial_starts) {
++                    /* Back up one char, since we've already incremented s. */
++                    *first_invalid_escape_ptr = s - 1;
++                }
+             }
+             WRITE_ASCII_CHAR('\\');
+             WRITE_CHAR(c);
+@@ -6515,22 +6521,39 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
+     return NULL;
+ }
+ 
++// Export for binary compatibility.
++PyObject *
++_PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
++                               Py_ssize_t size,
++                               const char *errors,
++                               Py_ssize_t *consumed,
++                               const char **first_invalid_escape)
++{
++    int first_invalid_escape_char;
++    return _PyUnicode_DecodeUnicodeEscapeInternal2(
++            s, size, errors, consumed,
++            &first_invalid_escape_char,
++            first_invalid_escape);
++}
++
+ PyObject *
+ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s,
+                               Py_ssize_t size,
+                               const char *errors,
+                               Py_ssize_t *consumed)
+ {
+-    const char *first_invalid_escape;
+-    PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal(s, size, errors,
++    int first_invalid_escape_char;
++    const char *first_invalid_escape_ptr;
++    PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal2(s, size, errors,
+                                                       consumed,
+-                                                      &first_invalid_escape);
++                                                      &first_invalid_escape_char,
++                                                      &first_invalid_escape_ptr);
+     if (result == NULL)
+         return NULL;
+-    if (first_invalid_escape != NULL) {
++    if (first_invalid_escape_char != -1) {
+         if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
+                              "invalid escape sequence '\\%c'",
+-                             (unsigned char)*first_invalid_escape) < 0) {
++                             first_invalid_escape_char) < 0) {
+             Py_DECREF(result);
+             return NULL;
+         }
+diff --git a/Parser/pegen/parse_string.c b/Parser/pegen/parse_string.c
+index 15a132b4e05..9df1313c103 100644
+--- a/Parser/pegen/parse_string.c
++++ b/Parser/pegen/parse_string.c
+@@ -119,12 +119,15 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t)
+     len = p - buf;
+     s = buf;
+ 
+-    const char *first_invalid_escape;
+-    v = _PyUnicode_DecodeUnicodeEscapeInternal(s, len, NULL, NULL, &first_invalid_escape);
+-
+-    if (v != NULL && first_invalid_escape != NULL) {
+-        if (warn_invalid_escape_sequence(parser, *first_invalid_escape, t) < 0) {
+-            /* We have not decref u before because first_invalid_escape points
++    int first_invalid_escape_char;
++    const char *first_invalid_escape_ptr;
++    v = _PyUnicode_DecodeUnicodeEscapeInternal2(s, (Py_ssize_t)len, NULL, NULL,
++                                                &first_invalid_escape_char,
++                                                &first_invalid_escape_ptr);
++
++    if (v != NULL && first_invalid_escape_ptr != NULL) {
++        if (warn_invalid_escape_sequence(parser, *first_invalid_escape_ptr, t) < 0) {
++            /* We have not decref u before because first_invalid_escape_ptr points
+                inside u. */
+             Py_XDECREF(u);
+             Py_DECREF(v);
+@@ -138,14 +141,17 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t)
+ static PyObject *
+ decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t)
+ {
+-    const char *first_invalid_escape;
+-    PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, &first_invalid_escape);
++    int first_invalid_escape_char;
++    const char *first_invalid_escape_ptr;
++    PyObject *result = _PyBytes_DecodeEscape2(s, len, NULL,
++                                              &first_invalid_escape_char,
++                                              &first_invalid_escape_ptr);
+     if (result == NULL) {
+         return NULL;
+     }
+ 
+-    if (first_invalid_escape != NULL) {
+-        if (warn_invalid_escape_sequence(p, *first_invalid_escape, t) < 0) {
++    if (first_invalid_escape_ptr != NULL) {
++        if (warn_invalid_escape_sequence(p, *first_invalid_escape_ptr, t) < 0) {
+             Py_DECREF(result);
+             return NULL;
+         }
+-- 
+2.47.0.windows.2
+
diff --git a/python3.spec b/python3.spec
index 7fcd354..16e141c 100644
--- a/python3.spec
+++ b/python3.spec
@@ -3,7 +3,7 @@ Summary: Interpreter of the Python3 programming language
 URL: https://www.python.org/
 
 Version: 3.9.9
-Release: 39
+Release: 40 
 License: Python-2.0
 
 %global branchversion 3.9
@@ -128,6 +128,7 @@ Patch6034: backport-CVE-2024-11168-3.9-gh-103848-Adds-checks-to-ensure-that-brac
 Patch6035: backport-CVE-2025-0938.patch
 Patch6036: backport-CVE-2025-8194.patch
 Patch6037: backport-CVE-2025-1795.patch
+Patch6038: backport-CVE-2025-4516.patch
 
 Patch9000: add-the-sm3-method-for-obtaining-the-salt-value.patch
 Patch9001: python3-Add-sw64-architecture.patch
@@ -838,6 +839,12 @@ export BEP_GTDLIST="$BEP_GTDLIST_TMP"
 %{_mandir}/*/*
 
 %changelog
+* Thu Sep 04 2025 lipengyu <lipengyu@kylinos.cn> - 3.9.9-40
+- Type:CVE
+- CVE:CVE-2025-4516
+- SUG:NA
+- DESC:fix CVE-2025-4516
+
 * Mon Sep 01 2025 lipengyu <lipengyu@kylinos.cn> - 3.9.9-39
 - Type:CVE
 - CVE:CVE-2025-1795
-- 
Gitee