From e2d1bed014563be82aa1ecda77a14a589cc2c818 Mon Sep 17 00:00:00 2001 From: wangsl20 Date: Sat, 26 Jul 2025 18:03:10 +0800 Subject: [PATCH 1/3] feat: add query rewrite functionality --- src/prompts/query_rewrite.md | 37 +++++++++++++++ src/query_understanding/query_rewrite.py | 57 ++++++++++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100644 src/prompts/query_rewrite.md create mode 100644 src/query_understanding/query_rewrite.py diff --git a/src/prompts/query_rewrite.md b/src/prompts/query_rewrite.md new file mode 100644 index 0000000..dffc5b5 --- /dev/null +++ b/src/prompts/query_rewrite.md @@ -0,0 +1,37 @@ +--- +CURRENT_TIME: {{CURRENT_TIME}} +--- + +You are a professional `query rewriting` agent. You need to forget your previous knowledge, carefully read the problem statement, identify the key information required, infer the intent of the query, and obtain the actual query/search/question. + +# Execution Steps + +**Understand the problem and perform intent detection and problem rewriting**: +- Your goal is to forget your previous knowledge, carefully read the problem statement, identify the required key information, deduce the intent of the query, obtain the actual query/question, and then rewrite it based on the problem statement and intent to generate diverse web search queries. +- Instructions: + 1. Each query should focus on one special aspect of the original question. + 2. Produce at least 3 queries. + 3. Queries should be diverse, if the topic is broad, general more than 1 query. + 4. Don't general multiple similar queries, 1 is enough. + 5. Query should ensure that the most current information is gathered. The current date is {{CURRENT_TIME}}. +- Format: + 1. Format your response as a JSON object with All three of these exact keys. + 2. "rationale": Brief explanation of why these queries are relevant. + 3. "query": A list of search queries. + 4. Example: For example, the output structure is: + {% set data = { + "rationale": "To answer this comparative growth question accurately, we need specific data points on Apple's stock performance and iPhone sales metrics. These queries target the precise financial information needed: company revenue trends, product-specific unit sales figures, and stock price movement over the same fiscal period for direct comparison.", + "query": [ + "Apple total revenue growth fiscal year 2024", + "iPhone unit sales growth fiscal year 2024", + "Apple stock price growth fiscal year 2024" + ] } %} + - Topic: What revenue grew more 2024 Apple stock or the number of people buying an iPhone + - answer: {{data}} + +# Notes + +- Each query should focus on one specific aspect of the original question, and at least 3 queries need to be produced. +- The above is just an example to teach you the format for rewriting queries. **Do not** directly return the above content. +- Do not copy the content of the example, instead, understand its query rewriting format, and then rewrite the query from multiple perspectives based on your understanding. +- The final response must be in the same language as the query: **{{language}}** \ No newline at end of file diff --git a/src/query_understanding/query_rewrite.py b/src/query_understanding/query_rewrite.py new file mode 100644 index 0000000..5b5c149 --- /dev/null +++ b/src/query_understanding/query_rewrite.py @@ -0,0 +1,57 @@ +# ****************************************************************************** +# Copyright (c) 2025 Huawei Technologies Co., Ltd. +# jiuwen-deepsearch is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ****************************************************************************** + +import json +import logging + +from langchain_core.runnables import RunnableConfig +from langchain_core.messages import HumanMessage + +from src.llm.llm_wrapper import LLMWrapper +from src.prompts.template import apply_system_prompt +from src.utils.json_utils import normalize_json_output + +logger = logging.getLogger(__name__) + + +def query_rewrite(query: str) -> list: + """ + 查询改写 + Args: + query: needs to be rewritten current + Returns: + list: the rewritten query list + """ + logger.info(f"Before rewrite query: {query}") + + config = RunnableConfig() + search_context = {"message": HumanMessage(content=query)} + prompt = apply_system_prompt("query_rewrite", search_context, config) + try: + response = LLMWrapper("basic").invoke(prompt) + logger.info(f"query rewrite response: {response}") + + # 规范化和解析JSON + normalized_response = normalize_json_output(response.content) + data_dict = json.loads(normalized_response) + query_list = data_dict.get("query", [query]) + logger.info(f"After rewrite query: {query_list}") + except (json.JSONDecodeError, KeyError, AttributeError) as e: + # 处理响应解析的异常 + logger.error(f"JSON解析或数据提取失败: {e}") + query_list = [query] + except Exception as e: + # 其他异常处理 + logger.error(f"Agent调用失败: {e}") + query_list = [query] + + return query_list -- Gitee From 32f958eac4cdcffde10579c695e447e5856c2a3a Mon Sep 17 00:00:00 2001 From: wangsl20 Date: Sat, 26 Jul 2025 18:17:55 +0800 Subject: [PATCH 2/3] feat: add query rewrite functionality --- src/query_understanding/query_rewrite.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/query_understanding/query_rewrite.py b/src/query_understanding/query_rewrite.py index 5b5c149..5bf446a 100644 --- a/src/query_understanding/query_rewrite.py +++ b/src/query_understanding/query_rewrite.py @@ -34,7 +34,7 @@ def query_rewrite(query: str) -> list: logger.info(f"Before rewrite query: {query}") config = RunnableConfig() - search_context = {"message": HumanMessage(content=query)} + search_context = {"messages": HumanMessage(content=query)} prompt = apply_system_prompt("query_rewrite", search_context, config) try: response = LLMWrapper("basic").invoke(prompt) -- Gitee From b2075a53c44f094cba7b239a4f15d56971e5a2d0 Mon Sep 17 00:00:00 2001 From: wangsl20 Date: Sat, 26 Jul 2025 18:23:19 +0800 Subject: [PATCH 3/3] feat: add query rewrite functionality --- src/query_understanding/query_rewrite.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/query_understanding/query_rewrite.py b/src/query_understanding/query_rewrite.py index 5bf446a..3eb8532 100644 --- a/src/query_understanding/query_rewrite.py +++ b/src/query_understanding/query_rewrite.py @@ -34,7 +34,7 @@ def query_rewrite(query: str) -> list: logger.info(f"Before rewrite query: {query}") config = RunnableConfig() - search_context = {"messages": HumanMessage(content=query)} + search_context = {"messages": [HumanMessage(content=query)]} prompt = apply_system_prompt("query_rewrite", search_context, config) try: response = LLMWrapper("basic").invoke(prompt) @@ -55,3 +55,8 @@ def query_rewrite(query: str) -> list: query_list = [query] return query_list + +if __name__ == "__main__": + query = "熊猫体重" + query_list= query_rewrite(query) + print(query_list) -- Gitee