From 5a39b9e05d60ec51e1e1b9edc213b5aaf98dce84 Mon Sep 17 00:00:00 2001 From: PshySimon Date: Fri, 4 Jul 2025 17:35:38 +0800 Subject: [PATCH] fix CVE-2025-52566 --- backport-CVE-2025-52566.patch | 61 +++++++++++++++++++++++++++++++++++ llama.cpp.spec | 6 +++- 2 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 backport-CVE-2025-52566.patch diff --git a/backport-CVE-2025-52566.patch b/backport-CVE-2025-52566.patch new file mode 100644 index 0000000..6a11a3a --- /dev/null +++ b/backport-CVE-2025-52566.patch @@ -0,0 +1,61 @@ +From 5084d9fc8b876172678ce3d3ba81223e7934be4b Mon Sep 17 00:00:00 2001 +From: Ruikai Peng +Date: Fri, 20 Jun 2025 22:13:06 +0800 +Subject: [PATCH] vocab : prevent tokenizer overflow (#14301) + +* vocab : prevent stack overflow in tokenize + +* vocab : return error instead of aborting on oversized token count + +* vocab : INT32_MIN from llama_tokenize on overflow +--- + common/common.cpp | 3 +++ + include/llama.h | 1 + + src/llama-vocab.cpp | 4 ++++ + 3 files changed, 8 insertions(+) + +diff --git a/common/common.cpp b/common/common.cpp +index 6143516..c139773 100644 +--- a/common/common.cpp ++++ b/common/common.cpp +@@ -1584,6 +1584,9 @@ std::vector common_tokenize( + int n_tokens = text.length() + 2 * add_special; + std::vector result(n_tokens); + n_tokens = llama_tokenize(model, text.data(), text.length(), result.data(), result.size(), add_special, parse_special); ++ if (n_tokens == std::numeric_limits::min()) { ++ throw std::runtime_error("Tokenization failed: input text too large, tokenization result exceeds int32_t limit"); ++ } + if (n_tokens < 0) { + result.resize(-n_tokens); + int check = llama_tokenize(model, text.data(), text.length(), result.data(), result.size(), add_special, parse_special); +diff --git a/include/llama.h b/include/llama.h +index 36945cd..50a1ca3 100644 +--- a/include/llama.h ++++ b/include/llama.h +@@ -929,6 +929,7 @@ extern "C" { + /// @param tokens The tokens pointer must be large enough to hold the resulting tokens. + /// @return Returns the number of tokens on success, no more than n_tokens_max + /// @return Returns a negative number on failure - the number of tokens that would have been returned ++ /// @return Returns INT32_MIN on overflow (e.g., tokenization result size exceeds int32_t limit) + /// @param add_special Allow to add BOS and EOS tokens if model is configured to do so. + /// @param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated + /// as plaintext. Does not insert a leading space. +diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp +index 6974a33..497d780 100644 +--- a/src/llama-vocab.cpp ++++ b/src/llama-vocab.cpp +@@ -1744,6 +1744,10 @@ int32_t llama_tokenize_impl( + bool add_special, + bool parse_special) { + auto res = llama_tokenize_internal(vocab, std::string(text, text_len), add_special, parse_special); ++ if (res.size() >= static_cast(std::numeric_limits::max())) { ++ LLAMA_LOG_ERROR("%s: tokenization result size %zu exceeds int32_t limit\n", __func__, res.size()); ++ return std::numeric_limits::min(); ++ } + if (n_tokens_max < (int) res.size()) { + // LLAMA_LOG_ERROR("%s: too many tokens\n", __func__); + return -((int) res.size()); +-- +2.43.0 + + diff --git a/llama.cpp.spec b/llama.cpp.spec index 5d3691b..b169596 100644 --- a/llama.cpp.spec +++ b/llama.cpp.spec @@ -3,7 +3,7 @@ Name: llama.cpp Version: 20241210 -Release: 2 +Release: 3 License: MIT Summary: Port of English lagre model LLaMA implemented based on C/C++ @@ -11,6 +11,7 @@ URL: https://github.com/ggerganov/llama.cpp Source0: https://github.com/ggerganov/llama.cpp/archive/refs/tags/%{llama_commitid}.tar.gz Patch001: backport-CVE-2025-49847.patch +Patch002: backport-CVE-2025-52566.patch BuildRequires: gcc,gcc-c++,cmake @@ -55,6 +56,9 @@ it can be used for model dialogue based on local laptops. %{_exec_prefix}/lib/pkgconfig/llama.pc %changelog +* Fri Jul 4 2025 PshySimon - 20241210-3 +- fix CVE-2025-52566 + * Wed Jul 2 2025 PshySimon - 20241210-2 - fix CVE-2025-49847 -- Gitee