diff --git a/service.yaml.example b/service.yaml.example index 9f73015e4c64ed4e448d0c12ab72de20d63def44..40ff1739f6497e69f29086eaa4b5c3a512ffaf6e 100644 --- a/service.yaml.example +++ b/service.yaml.example @@ -14,4 +14,7 @@ info_collector: max_crawl_length: 3000 report: - output_path: "" # Results storage directory path, defaults to empty string: no report generated. \ No newline at end of file + output_path: "" # Results storage directory path, defaults to empty string: no report generated. + +template: + save_path: "" # The storage directory path for template. Default value is empty: can't save or get any template. \ No newline at end of file diff --git a/src/manager/search_context.py b/src/manager/search_context.py index 88b017bd5df3c76e08de0124d95070b32ae709ac..45ad967958c3f0a0fb7ba522fdc42ae3cd370dae 100644 --- a/src/manager/search_context.py +++ b/src/manager/search_context.py @@ -52,3 +52,4 @@ class SearchContext(MessagesState): report: str = "" report_generated_num: int = 0 report_evaluation: str = "" + report_template: str = "" diff --git a/src/manager/workflow.py b/src/manager/workflow.py index 064aca3b5a8922fb685f2118eeaaa680b7e1fcdc..84209c2a72b6444cbf82e008d6493b412bd7136d 100644 --- a/src/manager/workflow.py +++ b/src/manager/workflow.py @@ -59,6 +59,7 @@ class Workflow: messages: str, session_id: str, local_datasets: List[str], + report_template: str = "", report_style: str = "", report_format: str = "", ): input = { @@ -67,6 +68,7 @@ class Workflow: "report": "", "current_plan": None, "collected_infos": [], + "report_template": report_template, } config = { "recursion_limit": Configuration.get_conf("workflow", "recursion_limit", expected_type=int), diff --git a/src/prompts/__init__.py b/src/prompts/__init__.py index fbba84b0c485c2ce8cfac30b0880f36af21c1a92..8f9c08c786b6f3ab367c49d8e591cd24e2821232 100644 --- a/src/prompts/__init__.py +++ b/src/prompts/__init__.py @@ -8,8 +8,9 @@ # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. # See the Mulan PSL v2 for more details. -from .template import apply_system_prompt +from .template import apply_system_prompt, apply_template_prompt __all__ = [ "apply_system_prompt" + "apply_template_prompt" ] \ No newline at end of file diff --git a/src/prompts/report_markdown.md b/src/prompts/report_markdown.md index a9f1408fae427f82b84453bdf4f7fc38cd81437a..f7deb7a781038bd7a91f39f1320fc4c9b017e708 100644 --- a/src/prompts/report_markdown.md +++ b/src/prompts/report_markdown.md @@ -31,7 +31,14 @@ You are a professional, objective, and insightful journalist # Report Structure Create your report in the following format - +{%if has_template%} +Generate a report based on the following outline. The selection of the outline content falls into three scenarios: +1.If the topic matches the search results exactly, use the topic. +2.If the topic is similar to the search content, use that topic but replace the content based on the actual search results. +3.If the topic does not match the search content, do not use that topic, and the generated report may exclude this mismatched topic. +The following content is the report outline. +{{ report_template }} +{% else %} 1. **Title** - The title is in the first-level title format - The title is concise and highlights the core of the report. The title contains no more than 10 words @@ -80,6 +87,7 @@ Create your report in the following format - Can include comparative analysis, tables and detailed functional breakdowns - For shorter reports, this section is optional {% endif %} +{% endif %} 5. **Key Reference Articles** - Hyperlinks with the titles of reference articles as content diff --git a/src/prompts/report_tmpl.md b/src/prompts/report_tmpl.md new file mode 100644 index 0000000000000000000000000000000000000000..602b128533fc8d8f046377c7be1a1aeddb9f0254 --- /dev/null +++ b/src/prompts/report_tmpl.md @@ -0,0 +1,86 @@ +### 任务描述 +你作为专业文档解析助手,需从用户提供的完整报告中提取结构化模板框架。忽略具体报告内容,仅分析文档的章节层级结构和功能描述,并以标准化的Markdown文档格式输出章节模板。 +尽量保持原报告章节顺序与层级关系,要注意抽取标题必须具有迁移性、可复用性,适配其他地区/实体/年份/政策背景。 + +### 执行步骤 +1. **结构识别**: + - 扫描全文识别所有标题(h1-h6层级) + + - 标注章节间的嵌套关系(如"3.1"为"3"的子章节) + + - 删除页码、页眉/页脚等非结构元素 + +2. **功能描述生成**: + + - 根据章节标题和首段内容,提炼该部分的核心功能或作用,不包含任何具体实体(如时间、组织名、政策编号、地理位置等) + + - 若章节标题含有特指元素,需统一抽象为通用术语,如“某组织”、“典型项目”、“目标区域”等,以实现模板的通用性 + + - 示例: + + - 原标题:“2024年X项目实施报告” → 模板化标题:“项目实施情况概述” + + - 原标题:“公司A违规处罚通报” → 模板化标题:“行为处理情况说明” + + - 原标题:“案例分析:A系统建设经验” → 模板化标题:“典型项目分析” + +3. **Markdown结构化输出**: + +- 使用 # ~ ###### 表示h1-h6级标题 + +- 一级标题 大章节的格式和分析问题的角度逻辑,结构 总分 概述每个章节的维度,不涉及具体 + +- 每个标题下以> 功能描述:...格式展示对应功能描述 + +- 所有子章节应嵌套在其上级章节之下,保持清晰层级结构 + +### Markdown输出示例 + +# 临床研究试验报告 + +## 摘要 +> 功能描述:概述研究目标、方法和关键结论 + +## 引言 +> 功能描述:说明研究背景、问题陈述及研究价值 + +### 研究背景 +> 功能描述:描述疾病现状和相关医学进展 + + +### 注意事项 +1. **层级处理**: + +- 层级必须连续(禁止出现1级→3级的跳级) + +- 同级标题保持数组顺序 + +2. **容错机制**: + +- 遇到非常规格式(如无编号章节)保留原标题文字 + +- 若章节无正文内容,描述字段写"功能描述缺失" + +3. **格式规范**: + +- 禁用HTML或非标准Markdown语法 + +- 标题用标准 # 级别标识,功能描述使用 > 功能描述: 前缀 + +4. **特殊场景**: + +- 附录、参考文献等视为独立的一级标题 + +- 合并连续的子标题(如“4.1a”和“4.1b”合并为“4.1”) + +### 设计要点说明 + +1. **结构化思维**:分步引导LLM识别报告层次结构并输出标准模板 + +2. **容错性设计**:制定异常结构处理规则确保鲁棒性 + +3. **可读性保障**:Markdown格式清晰、易编辑、可视化良好 + +4. **工具兼容性**:输出符合Markdown标准,可直接导入文档编辑器或转换为其他格式(如PDF、PPT等) + +5. **章节标题脱敏与通用化要求**:抽取标题时不得保留具有特指性的地区、时间年份、学校名、城市名、项目名,事件名称等具体词汇 ,可使用中性表达如“区域A”“区域B”“目标城市”“本报告”“当前周期”“典型案例”“相关主体”等 ,所有标题必须具有迁移性、可复用性,适配其他地区/实体/年份/政策背景 \ No newline at end of file diff --git a/src/prompts/template.py b/src/prompts/template.py index 0fa86c74a1ae241c87c44ef414780d6987f8576e..c3e8e6962c6fbe4b6890e1a3660a2e19a50368d4 100644 --- a/src/prompts/template.py +++ b/src/prompts/template.py @@ -49,3 +49,27 @@ def apply_system_prompt(prompt_template_file: str, context: SearchContext, confi raise ValueError(error_msg) from e except Exception as e: raise ValueError(f"Applying system prompt template with {prompt_template_file}.md failed: {e}") + + +def apply_template_prompt(prompt_template_file: str, report_path: str) -> list: + if not os.path.isfile(report_path): + raise FileNotFoundError(f"Report file not found: {report_path}") + + with open(report_path, "r", encoding="utf-8") as f: + report_content = f.read() + + context_vars = { + "CURRENT_TIME": datetime.now().strftime("%a %b %d %Y %H:%M:%S %z"), + "report_filename": os.path.basename(report_path), + } + + try: + prompt_template = jinja_env.get_template(f"{prompt_template_file}.md") + system_prompt = prompt_template.render(**context_vars) + return [ + {"role": "system", "content": system_prompt}, + {"role": "user", + "content": f"Below is the report provided by the user for extracting the template:\n\n{report_content}"} + ] + except Exception as e: + raise ValueError(f"Applying system prompt template with {prompt_template_file}.md failed: {e}") diff --git a/src/report/report.py b/src/report/report.py index 6654f75314a10b61d8a5db1994d03c7d97be8eaf..ea720ff64ae2ae5d3b5c8e857162e2ca05cfbd64 100644 --- a/src/report/report.py +++ b/src/report/report.py @@ -43,6 +43,13 @@ class Reporter: """Reporter node that write a final report.""" configurable = config.get("configurable", {}) report_format = configurable.get("report_format", ReportFormat.MARKDOWN) + file_outline = context.get("report_template", "") + + if file_outline != "": + configurable["has_template"] = True + else: + configurable["has_template"] = False + if not isinstance(report_format, ReportFormat): return False, f"Error: Report format is not instance of ReportFormat {report_format}" diff --git a/src/server/app.py b/src/server/app.py index d15aa95a3e6b80c274140ab6d0015d75c3c74a59..697335b93cb7c68d80f90bed19611e6cefd8f73d 100644 --- a/src/server/app.py +++ b/src/server/app.py @@ -14,6 +14,9 @@ from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from .routes import router from src.adapter.df import adapter +from dotenv import load_dotenv + +load_dotenv() app = FastAPI( title="Jiuwen Deep Search", diff --git a/src/server/research_message.py b/src/server/research_message.py index c07dae026c27931bb448dad19cf31f81c716c16e..4946669dd2968731f20e0b8cc7bb280b6b858c83 100644 --- a/src/server/research_message.py +++ b/src/server/research_message.py @@ -22,7 +22,12 @@ class ResearchRequest(BaseModel): max_step_num: Optional[int] = Field(10, description="max step number, default 10") report_style: Optional[str] = Field(None, description="report style") report_type: Optional[str] = Field(None, description="report type") - + report_template: Optional[str] = Field(None, description="report template") class ResearchResponse(BaseModel): content: str = Field(None, description="research content, markdown format") + + +class ImportTemplateRequest(BaseModel): + report: str = Field(None, description="Path to the original report file") + name: str = Field(None, description="Name of the new template") diff --git a/src/server/routes.py b/src/server/routes.py index 499d6f69f129938add49b4267515b8e523625187..bc2b234e5ae7aa5c310aef0bedb8adb3f5e86e70 100644 --- a/src/server/routes.py +++ b/src/server/routes.py @@ -11,10 +11,12 @@ # See the Mulan PSL v2 for more details. # ******************************************************************************/ import logging -from fastapi import APIRouter +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel from fastapi.responses import StreamingResponse -from .research_message import ResearchRequest, ResearchResponse +from .research_message import ResearchRequest, ResearchResponse, ImportTemplateRequest from src.manager.workflow import Workflow +from src.template_manager.report_template import ReportTemplate router = APIRouter( prefix="/api", @@ -28,11 +30,54 @@ workflow.build_graph() @router.post("/research", response_model=ResearchResponse) async def research(request: ResearchRequest): logging.info(f"research request {request.model_dump_json()}") + report_template = "" + if request.report_template != "": + report_template = ReportTemplate.get_template_content(request.report_template) return StreamingResponse( workflow.run( messages=request.messages, session_id=request.session_id, local_datasets=request.local_datasets, + report_template=report_template, ), media_type="text/event-stream", ) + + +@router.post("/report/template/import") +async def import_report_template(req: ImportTemplateRequest): + try: + path = ReportTemplate.generate_template_from_report( + report_path=req.report, + name=req.name + ) + return {"message": "Template imported", "path": path} + except FileNotFoundError as e: + raise HTTPException(status_code=500, detail=f"Report file not found: {e}") + except Exception as e: + raise HTTPException(status_code=500, detail=f"Failed to import template: {e}") + + +@router.get("/report/template/list") +async def list_template(): + logging.info(f"Start listing templates.") + return ReportTemplate.get_template_list() + + +@router.get("/report/template/{name}") +async def get_template(name: str): + logging.info(f"Start getting template_manager.") + if name is None: + logging.error(f"Required parameter missing.") + raise HTTPException(status_code=500, detail=f"Required parameter missing.") + template = ReportTemplate.get_template_content(name) + return {"report_template": template} + + +@router.delete("/report/template/{name}") +async def del_template(name: str): + logging.info(f"Start getting template_manager.") + if name is None or name == "": + return {"result": "success"} + result = ReportTemplate.del_template(name) + return {"result": result} diff --git a/src/template_manager/__init__.py b/src/template_manager/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..81365635441f61bfdb8bfffd96fc698cab58c8f8 --- /dev/null +++ b/src/template_manager/__init__.py @@ -0,0 +1,12 @@ +#!/usr/bin/python3 +# ****************************************************************************** +# Copyright (c) 2025 Huawei Technologies Co., Ltd. +# jiuwen-deepsearch is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ******************************************************************************/ diff --git a/src/template_manager/report_template.py b/src/template_manager/report_template.py new file mode 100644 index 0000000000000000000000000000000000000000..cfedebfa3c18595f3d0db5067393ae1a195fab44 --- /dev/null +++ b/src/template_manager/report_template.py @@ -0,0 +1,68 @@ +#!/usr/bin/python3 +# ****************************************************************************** +# Copyright (c) 2025 Huawei Technologies Co., Ltd. +# jiuwen-deepsearch is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ******************************************************************************/ + +import logging +import os +from pathlib import Path +from fastapi import HTTPException + +from src.config.configuration import Configuration +from src.llm.llm_wrapper import LLMWrapper +from src.prompts import apply_system_prompt, apply_template_prompt + + +class ReportTemplate: + @staticmethod + def generate_template_from_report(report_path: str, name: str, + prompt_template_file: str = "report_tmpl") -> str: + save_path = Configuration.get_conf("template", "save_path", expected_type=str) + if save_path == "": + raise HTTPException(status_code=500, detail=f"Output path is empty.") + chat_prompt = apply_template_prompt(prompt_template_file, report_path) + response = LLMWrapper("basic").invoke(chat_prompt) + output_path = os.path.join(save_path, f"{name}.md") + with open(output_path, "w", encoding="utf-8") as f: + f.write(response.content) + return output_path + + @staticmethod + def get_template_list(): + template_path = Configuration.get_conf("template", "save_path", expected_type=str) + folder_path = Path(template_path) + if folder_path.exists(): + template_files_name = [f.name for f in folder_path.iterdir() if f.is_file()] + return template_files_name + else: + logging.error(f"Template files path is invalid: {template_path}") + raise HTTPException(status_code=500, detail=f"{template_path} is invalid.") + + @staticmethod + def get_template_content(file_name: str): + file_path = Configuration.get_conf("template", "save_path", expected_type=str) + "/" + file_name + try: + rs = Path(file_path).read_bytes().decode("utf-8") + return rs + except FileNotFoundError: + logging.error(f"{file_name} is not exist.") + raise HTTPException(status_code=500, detail=f"{file_name} is not exist.") + + @staticmethod + def del_template(file_name: str): + file_path = Configuration.get_conf("template", "save_path", expected_type=str) + "/" + file_name + file = Path(file_path) + try: + file.unlink(missing_ok=True) + except FileNotFoundError: + logging.error(f"Delete file {file_name} failed.") + raise HTTPException(status_code=500, detail=f"Failed to delete template: {file_name}") + return "success" \ No newline at end of file diff --git a/tests/templates/2024beijing.html b/tests/templates/2024beijing.html new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391