diff --git a/backport-CVE-2025-52566.patch b/backport-CVE-2025-52566.patch
new file mode 100644
index 0000000000000000000000000000000000000000..d3cc419ad6f167f309793dc9acf02bebb9319e06
--- /dev/null
+++ b/backport-CVE-2025-52566.patch
@@ -0,0 +1,51 @@
+From e4e28809e4b48d41e380ee326474f56819855021 Mon Sep 17 00:00:00 2001
+From: Ruikai Peng <retr0@retr0.blog>
+Date: Fri, 20 Jun 2025 22:13:06 +0800
+Subject: [PATCH] vocab : prevent tokenizer overflow (#14301)
+
+* vocab : prevent stack overflow in tokenize
+
+* vocab : return error instead of aborting on oversized token count
+
+* vocab : INT32_MIN from llama_tokenize on overflow
+---
+ examples/train-text-from-scratch/train-text-from-scratch.cpp | 5 +++++
+ llama.cpp                                                    | 5 +++++
+ 2 files changed, 10 insertions(+)
+
+diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp
+index 54dc2be..f1867ea 100644
+--- a/examples/train-text-from-scratch/train-text-from-scratch.cpp
++++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp
+@@ -2191,6 +2191,11 @@ int tokenize_file(struct llama_context * lctx, const char * filename, std::vecto
+     out.resize(buf.size());
+ 
+     int n_tokens = llama_tokenize(lctx, buf.data(), out.data(), buf.size(), false);
++
++    if (n_tokens == std::numeric_limits<int32_t>::min()) {
++        throw std::runtime_error("Tokenization failed: input text too large, tokenization result exceeds int32_t limit");
++    }
++
+     if (n_tokens >= 0) {
+         out.resize(n_tokens);
+     }
+diff --git a/llama.cpp b/llama.cpp
+index c8ab313..cfa908f 100644
+--- a/llama.cpp
++++ b/llama.cpp
+@@ -4134,6 +4134,11 @@ int llama_tokenize_with_model(
+                         bool   add_bos) {
+     auto res = llama_tokenize(model->vocab, text, add_bos);
+ 
++    if (res.size() >= static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
++        LLAMA_LOG_ERROR("%s: tokenization result size %zu exceeds int32_t limit\n", __func__, res.size());
++        return std::numeric_limits<int32_t>::min();
++    }
++
+     if (n_max_tokens < (int) res.size()) {
+         LLAMA_LOG_ERROR("%s: too many tokens\n", __func__);
+         return -((int) res.size());
+-- 
+2.33.0
+
+
diff --git a/llama.cpp.spec b/llama.cpp.spec
index 19552ecbbab08e47308ead4de885eb5238ea174c..c057fc2423e9e60e6659e782b775b31beb9bc8cc 100644
--- a/llama.cpp.spec
+++ b/llama.cpp.spec
@@ -3,13 +3,14 @@
 
 Name:       llama.cpp
 Version:    20230815
-Release:    4
+Release:    5
 License:    MIT
 Summary:    Port of English lagre model LLaMA implemented based on C/C++
 
 URL:            https://github.com/ggerganov/llama.cpp
 Source0:        https://github.com/ggerganov/llama.cpp/archive/refs/tags/%{llama_commitid}.tar.gz
-Patch0:     add-loongarch64-support.patch
+Patch0:         add-loongarch64-support.patch
+Patch1:         backport-CVE-2025-52566.patch
 
 BuildRequires:  gcc,gcc-c++,cmake
 
@@ -40,6 +41,9 @@ popd
 %{_libdir}/libembdinput.a
 
 %changelog
+* Thu Jul 10 2025 PshySimon <caixiaomeng2@huawei.com> - 20230815-5
+- fix CVE-2025-52566
+
 * Tue May 14 2024 wangshuo <wangshuo@kylinos.cn> - 20230815-4
 - add loongarch64 support