diff --git a/service.yaml.example b/service.yaml.example index 9f73015e4c64ed4e448d0c12ab72de20d63def44..47116f71190c3a65707973a6901982761d157286 100644 --- a/service.yaml.example +++ b/service.yaml.example @@ -6,6 +6,9 @@ workflow: max_report_generated_num: 1 recursion_limit: 30 +outliner: + max_section_num: 2 + planner: max_step_num: 2 diff --git a/src/manager/search_context.py b/src/manager/search_context.py index 88b017bd5df3c76e08de0124d95070b32ae709ac..eb9ca6baad234ff50957f8d982f31f33bd959270 100644 --- a/src/manager/search_context.py +++ b/src/manager/search_context.py @@ -15,6 +15,24 @@ from langgraph.graph import MessagesState from pydantic import BaseModel, Field +class Section(BaseModel): + ''' + 章节模型:表示研究报告大纲结构中的每个章节 + ''' + title: str = Field(..., description="章节标题,概括本章节整体目标") + description: str = Field(..., description="章节研究任务详细说明,明确指定需要收集的数据或执行的编程任务") + + +class Outline(BaseModel): + ''' + 大纲模型:表示研究报告的大纲结构 + ''' + language: str = Field(default="zh-CN", description="用户语言:zh-CN、en-US等") + thought: str = Field(..., description="研究报告大纲背后的思考过程") + title: str = Field(..., description="研究报告标题,概括整体研究任务") + sections: List[Section] = Field(default_factory=list, description="最终研究报告的章节") + + class StepType(str, Enum): ''' 任务类型枚举 diff --git a/src/prompts/outliner.md b/src/prompts/outliner.md new file mode 100644 index 0000000000000000000000000000000000000000..4e6458804cf3fce274c26d7eff83b44271052137 --- /dev/null +++ b/src/prompts/outliner.md @@ -0,0 +1,43 @@ +--- +Current Time: {{CURRENT_TIME}} +--- + +As a professional Deep Research outliner, skilled in planning systematic research report structures. +Your responsibility is to generate a complete outline of the research report based on the given problem, and each item of the outline will be assigned to a team of specialized agents to collect more comprehensive data. + +# Core Principles +- **Comprehensive Coverage**: All aspects + multi-perspective views (mainstream + alternative) +- **Depth Requirement**: Reject superficial data; require detailed data points + multi-source analysis + +## Analysis Framework (8 Dimensions) +1. **Historical Context**: Evolution timeline +2. **Current Status**: Data points + recent developments +3. **Future Indicators**: Predictive models + scenario planning +4. **Stakeholder Data**: Group impact + perspective mapping +5. **Quantitative Data**: Multi-source statistics +6. **Qualitative Data**: Case studies + testimonies +7. **Comparative Analysis**: Cross-case benchmarking +8. **Risk Assessment**: Challenges + contingency plans + +## Execution Constraints +- Max sections: {{ max_section_num }} (require high focus) +- Language consistency: **{{ language }}** + +## Output Rules + +- Keep in mind, directly output the original JSON format of `Outline` without using "```json". +- The structure of the `Outline` is defined as follows, and each of the following fields is indispensable. + +```ts +interface Section { + title: string; + description: string; // Each research section +} + +interface Outline { + language: string; + thought: string; + title: string; + sections: Section[]; // Section list of the final report +} +``` diff --git a/src/query_understanding/outliner.py b/src/query_understanding/outliner.py new file mode 100644 index 0000000000000000000000000000000000000000..f7c89ef45f3434577973284cf08bffb2958c601d --- /dev/null +++ b/src/query_understanding/outliner.py @@ -0,0 +1,77 @@ +# ****************************************************************************** +# Copyright (c) 2025 Huawei Technologies Co., Ltd. +# jiuwen-deepsearch is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ****************************************************************************** + +import json +import logging +from json import JSONDecodeError + +from langchain_core.exceptions import OutputParserException +from langchain_core.messages import AIMessage, HumanMessage +from langchain_core.runnables import RunnableConfig + +from src.llm import LLMWrapper +from src.manager.search_context import SearchContext, Outline +from src.prompts import apply_system_prompt +from src.utils.llm_utils import normalize_json_output + +logger = logging.getLogger(__name__) + + +class Outliner: + def __init__(self): + self.llm = LLMWrapper("basic").with_structured_output(schema=Outline, method="json_mode") + + def generate_outline(self, context: SearchContext, config: RunnableConfig) -> dict: + """Generating an outline of the report.""" + logger.info("Outliner starting") + messages = apply_system_prompt("outliner", context, config) + + llm_result = "" + outline = {} + try: + # invoke LLM + response = self.llm.invoke(messages) + llm_result = response.model_dump_json(indent=4) + logger.info(f"Outliner LLM result: {llm_result}") + + generated_outline = json.loads(normalize_json_output(llm_result)) + # validation + outline = Outline.model_validate(generated_outline) + except JSONDecodeError: + logger.error("Outliner LLM response failed JSON deserialization") + except OutputParserException as e: + logger.error(f"LLM output does not match the structure of the outline: {e}") + except Exception as e: + logger.error(f"Error when Outliner generating a outline: {e}") + + return { + "messages": [AIMessage(name="outliner", content=llm_result)], + "current_outline": outline + } + + +if __name__ == "__main__": + context: SearchContext = { + "messages": [ + HumanMessage("中国平均海拔"), + ] + } + + config: RunnableConfig = { + "configurable": { + "max_section_num": 2, + "language": "zh-CN" + } + } + + outliner = Outliner() + print(outliner.generate_outline(context, config))