From 5a39b9e05d60ec51e1e1b9edc213b5aaf98dce84 Mon Sep 17 00:00:00 2001
From: PshySimon <caixiaomeng2@huawei.com>
Date: Fri, 4 Jul 2025 17:35:38 +0800
Subject: [PATCH] fix CVE-2025-52566

---
 backport-CVE-2025-52566.patch | 61 +++++++++++++++++++++++++++++++++++
 llama.cpp.spec                |  6 +++-
 2 files changed, 66 insertions(+), 1 deletion(-)
 create mode 100644 backport-CVE-2025-52566.patch

diff --git a/backport-CVE-2025-52566.patch b/backport-CVE-2025-52566.patch
new file mode 100644
index 0000000..6a11a3a
--- /dev/null
+++ b/backport-CVE-2025-52566.patch
@@ -0,0 +1,61 @@
+From 5084d9fc8b876172678ce3d3ba81223e7934be4b Mon Sep 17 00:00:00 2001
+From: Ruikai Peng <retr0@retr0.blog>
+Date: Fri, 20 Jun 2025 22:13:06 +0800
+Subject: [PATCH] vocab : prevent tokenizer overflow (#14301)
+
+* vocab : prevent stack overflow in tokenize
+
+* vocab : return error instead of aborting on oversized token count
+
+* vocab : INT32_MIN from llama_tokenize on overflow
+---
+ common/common.cpp   | 3 +++
+ include/llama.h     | 1 +
+ src/llama-vocab.cpp | 4 ++++
+ 3 files changed, 8 insertions(+)
+
+diff --git a/common/common.cpp b/common/common.cpp
+index 6143516..c139773 100644
+--- a/common/common.cpp
++++ b/common/common.cpp
+@@ -1584,6 +1584,9 @@ std::vector<llama_token> common_tokenize(
+     int n_tokens = text.length() + 2 * add_special;
+     std::vector<llama_token> result(n_tokens);
+     n_tokens = llama_tokenize(model, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
++    if (n_tokens == std::numeric_limits<int32_t>::min()) {
++        throw std::runtime_error("Tokenization failed: input text too large, tokenization result exceeds int32_t limit");
++    }
+     if (n_tokens < 0) {
+         result.resize(-n_tokens);
+         int check = llama_tokenize(model, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
+diff --git a/include/llama.h b/include/llama.h
+index 36945cd..50a1ca3 100644
+--- a/include/llama.h
++++ b/include/llama.h
+@@ -929,6 +929,7 @@ extern "C" {
+     /// @param tokens The tokens pointer must be large enough to hold the resulting tokens.
+     /// @return Returns the number of tokens on success, no more than n_tokens_max
+     /// @return Returns a negative number on failure - the number of tokens that would have been returned
++    /// @return Returns INT32_MIN on overflow (e.g., tokenization result size exceeds int32_t limit)
+     /// @param add_special Allow to add BOS and EOS tokens if model is configured to do so.
+     /// @param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated
+     ///                      as plaintext. Does not insert a leading space.
+diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
+index 6974a33..497d780 100644
+--- a/src/llama-vocab.cpp
++++ b/src/llama-vocab.cpp
+@@ -1744,6 +1744,10 @@ int32_t llama_tokenize_impl(
+                             bool   add_special,
+                             bool   parse_special) {
+     auto res = llama_tokenize_internal(vocab, std::string(text, text_len), add_special, parse_special);
++    if (res.size() >= static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
++        LLAMA_LOG_ERROR("%s: tokenization result size %zu exceeds int32_t limit\n", __func__, res.size());
++        return std::numeric_limits<int32_t>::min();
++    }
+     if (n_tokens_max < (int) res.size()) {
+         // LLAMA_LOG_ERROR("%s: too many tokens\n", __func__);
+         return -((int) res.size());
+-- 
+2.43.0
+
+
diff --git a/llama.cpp.spec b/llama.cpp.spec
index 5d3691b..b169596 100644
--- a/llama.cpp.spec
+++ b/llama.cpp.spec
@@ -3,7 +3,7 @@
 
 Name:       llama.cpp
 Version:    20241210
-Release:    2
+Release:    3
 License:    MIT
 Summary:    Port of English lagre model LLaMA implemented based on C/C++
 
@@ -11,6 +11,7 @@ URL:            https://github.com/ggerganov/llama.cpp
 Source0:        https://github.com/ggerganov/llama.cpp/archive/refs/tags/%{llama_commitid}.tar.gz
 
 Patch001:        backport-CVE-2025-49847.patch
+Patch002:        backport-CVE-2025-52566.patch
 
 BuildRequires:  gcc,gcc-c++,cmake
 
@@ -55,6 +56,9 @@ it can be used for model dialogue based on local laptops.
 %{_exec_prefix}/lib/pkgconfig/llama.pc
 
 %changelog
+* Fri Jul 4 2025 PshySimon <caixiaomeng2@huawei.com> - 20241210-3
+- fix CVE-2025-52566
+
 * Wed Jul 2 2025 PshySimon <caixiaomeng2@huawei.com> - 20241210-2
 - fix CVE-2025-49847 
 
-- 
Gitee