feat(api+app): 对话阶段化、回忆录流水线与客户端会话体验
- DB: segments 用户输入文本(Alembic 0002) - Chat: 阶段检测/阶段提示/回复限制,编排与访谈/画像 prompts 调整 - Memoir: 忠实度检查 agent,叙事与分类等链路更新 - Core: agent 日志、Alembic 启动、LangChain/日志/配置等 - Story: time_hints;Memory 检索与相关测试 - Expo: 助手头像、会话页与消息拆分、实时会话与文案/i18n - Docs/scripts/tests: 迁移脚本、LLM JSON/记忆检索文档、新增单测
This commit is contained in:
@@ -2,6 +2,7 @@
|
||||
|
||||
from app.agents.memoir.classification_agent import ClassificationAgent
|
||||
from app.agents.memoir.extraction_agent import ExtractionAgent, ExtractionResult
|
||||
from app.agents.memoir.fidelity_check_agent import FidelityCheckAgent
|
||||
from app.agents.memoir.narrative_agent import NarrativeAgent
|
||||
from app.agents.memoir.orchestrator import MemoirOrchestrator, PreparedMemoirBatches
|
||||
from app.agents.memoir.story_route_agent import (
|
||||
@@ -24,4 +25,5 @@ __all__ = [
|
||||
"ExtractionResult",
|
||||
"ClassificationAgent",
|
||||
"NarrativeAgent",
|
||||
"FidelityCheckAgent",
|
||||
]
|
||||
|
||||
@@ -8,14 +8,17 @@ ClassificationAgent:将内容分类到 8 个章节类别,或判定无价值
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from typing import Any, Optional
|
||||
|
||||
from app.agents.memoir.prompts import (
|
||||
CHAPTER_CATEGORIES,
|
||||
get_chapter_classification_prompt,
|
||||
get_chapter_classification_json_prompt,
|
||||
)
|
||||
from app.core.langchain_llm import invoke_json_object
|
||||
from app.core.logging import get_logger
|
||||
from app.features.memoir.memoir_images.json_payload import extract_json_payload
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
@@ -89,6 +92,20 @@ def _normalize_llm_category(raw: str) -> str:
|
||||
return s
|
||||
|
||||
|
||||
def _parse_category_from_llm_response(raw: str) -> str:
|
||||
"""优先解析 JSON ``{"category": "..."}``,失败则按纯文本 key 处理。"""
|
||||
s = (raw or "").strip()
|
||||
if not s:
|
||||
return ""
|
||||
try:
|
||||
data = json.loads(extract_json_payload(s))
|
||||
if isinstance(data, dict) and "category" in data:
|
||||
return _normalize_llm_category(str(data["category"]))
|
||||
except (json.JSONDecodeError, TypeError, ValueError):
|
||||
pass
|
||||
return _normalize_llm_category(s)
|
||||
|
||||
|
||||
class ClassificationAgent:
|
||||
"""将内容分类到 8 个章节类别之一,或判定无价值返回 None"""
|
||||
|
||||
@@ -105,7 +122,7 @@ class ClassificationAgent:
|
||||
"""
|
||||
if _looks_like_fragment_only(text):
|
||||
logger.debug(
|
||||
"零散档案/极短标签句,跳过回忆录 Story: text_len=%s text=%s",
|
||||
"零散档案/极短标签句,跳过回忆录 Story: text_len={} text={}",
|
||||
len(text or ""),
|
||||
text or "",
|
||||
)
|
||||
@@ -113,12 +130,17 @@ class ClassificationAgent:
|
||||
|
||||
if llm:
|
||||
try:
|
||||
prompt = get_chapter_classification_prompt(text)
|
||||
response = llm.invoke(prompt)
|
||||
category = _normalize_llm_category(response.content or "")
|
||||
prompt = get_chapter_classification_json_prompt(text)
|
||||
raw = invoke_json_object(
|
||||
llm,
|
||||
prompt,
|
||||
max_tokens=256,
|
||||
agent="ClassificationAgent.classify",
|
||||
)
|
||||
category = _parse_category_from_llm_response(raw)
|
||||
if category == "none":
|
||||
logger.debug(
|
||||
"LLM 判定内容不足以成篇,跳过: text_len=%s text=%s",
|
||||
"LLM 判定内容不足以成篇,跳过: text_len={} text={}",
|
||||
len(text or ""),
|
||||
text or "",
|
||||
)
|
||||
@@ -126,7 +148,7 @@ class ClassificationAgent:
|
||||
if category in CHAPTER_CATEGORIES:
|
||||
return category
|
||||
except Exception as e:
|
||||
logger.warning("ClassificationAgent LLM 章节分类失败: %s", e)
|
||||
logger.warning("ClassificationAgent LLM 章节分类失败: {}", e)
|
||||
|
||||
stage = _detect_stage(text, fallback_stage)
|
||||
return _STAGE_TO_DEFAULT_CATEGORY.get(
|
||||
|
||||
@@ -10,7 +10,7 @@ from dataclasses import dataclass
|
||||
from typing import Any, Dict
|
||||
|
||||
from app.agents.memoir.prompts import get_state_extraction_prompt
|
||||
from app.core.langchain_llm import bind_json_object_mode
|
||||
from app.core.langchain_llm import invoke_json_object
|
||||
from app.core.logging import get_logger
|
||||
from app.features.memoir.memoir_images.json_payload import extract_json_payload
|
||||
|
||||
@@ -56,15 +56,19 @@ class ExtractionAgent:
|
||||
for k, v in (stage_slots or {}).items()
|
||||
},
|
||||
)
|
||||
json_llm = bind_json_object_mode(llm, max_tokens=1024)
|
||||
response = json_llm.invoke(prompt)
|
||||
parsed = json.loads(extract_json_payload(response.content))
|
||||
raw = invoke_json_object(
|
||||
llm,
|
||||
prompt,
|
||||
max_tokens=1024,
|
||||
agent="ExtractionAgent.extract",
|
||||
)
|
||||
parsed = json.loads(extract_json_payload(raw))
|
||||
detected_stage = parsed.get("detected_stage", detected_stage)
|
||||
raw_slots = parsed.get("slots", {}) or {}
|
||||
extracted_slots = {
|
||||
k: v if isinstance(v, str) else str(v) for k, v in raw_slots.items()
|
||||
}
|
||||
except (json.JSONDecodeError, Exception) as e:
|
||||
logger.warning("ExtractionAgent LLM 解析失败: %s", e)
|
||||
logger.warning("ExtractionAgent LLM 解析失败: {}", e)
|
||||
|
||||
return ExtractionResult(detected_stage=detected_stage, slots=extracted_slots)
|
||||
|
||||
88
api/app/agents/memoir/fidelity_check_agent.py
Normal file
88
api/app/agents/memoir/fidelity_check_agent.py
Normal file
@@ -0,0 +1,88 @@
|
||||
"""
|
||||
FidelityCheckAgent:比较「用户口述」与叙事 JSON 输出,判定是否存在明显编造或越界。
|
||||
失败时由流水线回退为口述正文(见 story_pipeline_sync)。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.langchain_llm import invoke_json_object
|
||||
from app.core.logging import get_logger
|
||||
from app.features.memoir.memoir_images.json_payload import extract_json_payload
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
# 生成稿中出现的四位年份,若口述中未出现同串,仅打日志(不误杀)
|
||||
_YEAR_4_RE = re.compile(r"(?<!\d)(19|20)\d{2}(?!\d)")
|
||||
|
||||
|
||||
def _log_suspicious_years_not_in_oral(oral_text: str, narrative_json: str) -> None:
|
||||
oral = oral_text or ""
|
||||
gen = narrative_json or ""
|
||||
for m in _YEAR_4_RE.finditer(gen):
|
||||
y = m.group(0)
|
||||
if y not in oral:
|
||||
logger.debug(
|
||||
"event=fidelity_heuristic_year_not_in_oral year={} oral_len={} gen_len={}",
|
||||
y,
|
||||
len(oral),
|
||||
len(gen),
|
||||
)
|
||||
|
||||
|
||||
class FidelityCheckAgent:
|
||||
"""叙事忠实度检查(json_object);失败时上层应回退为口述原文。"""
|
||||
|
||||
def passes(
|
||||
self,
|
||||
*,
|
||||
oral_text: str,
|
||||
narrative_json: str,
|
||||
llm: Any,
|
||||
) -> bool:
|
||||
if not llm or not settings.memoir_fidelity_check_enabled:
|
||||
return True
|
||||
oral = (oral_text or "").strip()
|
||||
gen = (narrative_json or "").strip()
|
||||
if not oral or not gen:
|
||||
return True
|
||||
_log_suspicious_years_not_in_oral(oral, gen)
|
||||
prompt = f"""你是事实核对员。比较下面两段文字。
|
||||
|
||||
【用户口述】(亲历内容)
|
||||
{oral[:8000]}
|
||||
|
||||
【模型生成的 JSON 叙事】(应只含口述中已有事实的整理,不得添油加醋)
|
||||
{gen[:16000]}
|
||||
|
||||
判断:生成稿是否出现**口述中明显没有**的具体人名、地名、时间、数字、事件经过、对话,或把摘录/档案里才有的信息写成了用户亲口经历?
|
||||
若存在明显编造或越界,pass=false;若仅口语转书面、删赘词、合并指代,pass=true。
|
||||
|
||||
**JSON 输出**:只输出一个合法 JSON 对象。
|
||||
{{"pass": true, "reason": null}}
|
||||
或
|
||||
{{"pass": false, "reason": "一句话说明"}}
|
||||
|
||||
只输出 JSON,不要其它文字。"""
|
||||
try:
|
||||
raw = invoke_json_object(
|
||||
llm,
|
||||
prompt,
|
||||
max_tokens=settings.memoir_fidelity_check_max_tokens,
|
||||
agent="FidelityCheckAgent.passes",
|
||||
)
|
||||
data = json.loads(extract_json_payload(raw))
|
||||
ok = bool(data.get("pass", True))
|
||||
if not ok:
|
||||
logger.warning(
|
||||
"event=fidelity_check_fail reason={}",
|
||||
(data.get("reason") or "")[:200],
|
||||
)
|
||||
return ok
|
||||
except Exception as e:
|
||||
logger.warning("FidelityCheckAgent 解析失败,放行: {}", e)
|
||||
return True
|
||||
@@ -1,18 +1,21 @@
|
||||
"""
|
||||
NarrativeAgent:生成创意标题和叙事改写。
|
||||
对应现有逻辑:get_creative_title_prompt、get_narrative_prompt
|
||||
对应现有逻辑:get_creative_title_json_prompt、get_narrative_json_prompt
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from app.agents.memoir.prompts import (
|
||||
get_creative_title_prompt,
|
||||
get_creative_title_json_prompt,
|
||||
get_narrative_json_prompt,
|
||||
get_narrative_merge_json_prompt,
|
||||
)
|
||||
from app.core.langchain_llm import bind_json_object_mode
|
||||
from app.core.langchain_llm import invoke_json_object
|
||||
from app.core.logging import get_logger
|
||||
from app.features.memoir.memoir_images.json_payload import extract_json_payload
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
@@ -33,17 +36,26 @@ class NarrativeAgent:
|
||||
if not llm:
|
||||
return f"{stage} 回忆"
|
||||
try:
|
||||
prompt = get_creative_title_prompt(
|
||||
prompt = get_creative_title_json_prompt(
|
||||
stage=stage,
|
||||
emotion=emotion,
|
||||
slots=slots,
|
||||
user_profile=user_profile,
|
||||
birth_year=birth_year,
|
||||
)
|
||||
response = llm.invoke(prompt)
|
||||
return (response.content or "").strip().strip('"')
|
||||
raw = invoke_json_object(
|
||||
llm,
|
||||
prompt,
|
||||
max_tokens=256,
|
||||
agent="NarrativeAgent.generate_title",
|
||||
)
|
||||
data = json.loads(extract_json_payload(raw))
|
||||
title = (data.get("title") or "").strip() if isinstance(data, dict) else ""
|
||||
if title:
|
||||
return title.strip('"')
|
||||
return f"{stage} 回忆"
|
||||
except Exception as e:
|
||||
logger.warning("NarrativeAgent 生成标题失败: %s", e)
|
||||
logger.warning("NarrativeAgent 生成标题失败: {}", e)
|
||||
return f"{stage} 回忆"
|
||||
|
||||
def generate_narrative(
|
||||
@@ -56,25 +68,46 @@ class NarrativeAgent:
|
||||
birth_year: Optional[int] = None,
|
||||
llm: Any = None,
|
||||
) -> str:
|
||||
"""将新对话改写为叙述。若无 LLM 则直接拼接"""
|
||||
"""将新对话改写为叙述。若无 LLM 则直接拼接。
|
||||
|
||||
若 `existing_content` 非空(append 路径),使用整篇合并提示,输出覆盖全篇的有序段落。
|
||||
"""
|
||||
if not llm:
|
||||
if existing_content:
|
||||
return f"{existing_content}\n\n{new_content}"
|
||||
return new_content
|
||||
try:
|
||||
prompt = get_narrative_json_prompt(
|
||||
stage=stage,
|
||||
slots=slots,
|
||||
new_content=new_content,
|
||||
existing_content=existing_content,
|
||||
user_profile=user_profile,
|
||||
birth_year=birth_year,
|
||||
)
|
||||
json_llm = bind_json_object_mode(llm, max_tokens=4096)
|
||||
response = json_llm.invoke(prompt)
|
||||
return (response.content or "").strip()
|
||||
merge_mode = bool((existing_content or "").strip())
|
||||
if merge_mode:
|
||||
prompt = get_narrative_merge_json_prompt(
|
||||
stage=stage,
|
||||
slots=slots,
|
||||
new_content=new_content,
|
||||
existing_content=existing_content,
|
||||
user_profile=user_profile,
|
||||
birth_year=birth_year,
|
||||
)
|
||||
max_tokens = 8192
|
||||
agent_name = "NarrativeAgent.generate_narrative_merge"
|
||||
else:
|
||||
prompt = get_narrative_json_prompt(
|
||||
stage=stage,
|
||||
slots=slots,
|
||||
new_content=new_content,
|
||||
existing_content=existing_content,
|
||||
user_profile=user_profile,
|
||||
birth_year=birth_year,
|
||||
)
|
||||
max_tokens = 4096
|
||||
agent_name = "NarrativeAgent.generate_narrative"
|
||||
return invoke_json_object(
|
||||
llm,
|
||||
prompt,
|
||||
max_tokens=max_tokens,
|
||||
agent=agent_name,
|
||||
).strip()
|
||||
except Exception as e:
|
||||
logger.warning("NarrativeAgent 生成叙事失败: %s", e)
|
||||
logger.warning("NarrativeAgent 生成叙事失败: {}", e)
|
||||
if existing_content:
|
||||
return f"{existing_content}\n\n{new_content}"
|
||||
return new_content
|
||||
|
||||
@@ -6,6 +6,7 @@ MemoirOrchestrator:按 segment 编排流水线,调用各 Specialist Agent。
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Callable, Dict, List, Set, Tuple
|
||||
|
||||
@@ -17,6 +18,7 @@ from app.agents.memoir.classification_agent import (
|
||||
)
|
||||
from app.agents.memoir.extraction_agent import ExtractionAgent, ExtractionResult
|
||||
from app.agents.state_schema import MemoirStateSchema
|
||||
from app.core.agent_logging import agent_span, agent_summary_enabled, log_agent_detail
|
||||
from app.core.logging import get_logger
|
||||
from app.features.conversation.models import Segment
|
||||
|
||||
@@ -58,32 +60,59 @@ class MemoirOrchestrator:
|
||||
category_to_segments: Dict[str, List[Segment]] = {}
|
||||
|
||||
for segment in segments:
|
||||
text = segment.transcript_text or ""
|
||||
text = segment.user_input_text or ""
|
||||
seg_t0 = time.perf_counter()
|
||||
initial_stage = detect_stage_from_keywords(
|
||||
text, state.current_stage or "childhood"
|
||||
)
|
||||
stage_slots_raw = state.slots.get(initial_stage, {}) or {}
|
||||
|
||||
result: ExtractionResult = self.extraction_agent.extract(
|
||||
user_message=text,
|
||||
current_stage=state.current_stage or "childhood",
|
||||
stage_slots=stage_slots_raw,
|
||||
llm=llm,
|
||||
)
|
||||
with agent_span(
|
||||
logger,
|
||||
"MemoirOrchestrator.ExtractionAgent.extract",
|
||||
segment_id=segment.id,
|
||||
):
|
||||
result: ExtractionResult = self.extraction_agent.extract(
|
||||
user_message=text,
|
||||
current_stage=state.current_stage or "childhood",
|
||||
stage_slots=stage_slots_raw,
|
||||
llm=llm,
|
||||
)
|
||||
detected_stage = result.detected_stage
|
||||
for slot_name, snippet in result.slots.items():
|
||||
state = update_slot(detected_stage, slot_name, snippet, [segment.id])
|
||||
|
||||
chapter_category = self.classification_agent.classify(
|
||||
text=text,
|
||||
fallback_stage=detected_stage,
|
||||
llm=llm,
|
||||
with agent_span(
|
||||
logger,
|
||||
"MemoirOrchestrator.ClassificationAgent.classify",
|
||||
segment_id=segment.id,
|
||||
):
|
||||
chapter_category = self.classification_agent.classify(
|
||||
text=text,
|
||||
fallback_stage=detected_stage,
|
||||
llm=llm,
|
||||
)
|
||||
if agent_summary_enabled():
|
||||
logger.info(
|
||||
"MemoirOrchestrator.segment segment_id={} text_len={} "
|
||||
"detected_stage={} category={} segment_total_ms={:.2f}",
|
||||
segment.id,
|
||||
len(text),
|
||||
detected_stage,
|
||||
chapter_category,
|
||||
(time.perf_counter() - seg_t0) * 1000,
|
||||
)
|
||||
log_agent_detail(
|
||||
logger,
|
||||
"MemoirOrchestrator.segment_done segment_id={} slots={}",
|
||||
segment.id,
|
||||
list((result.slots or {}).keys()),
|
||||
)
|
||||
if chapter_category is None:
|
||||
logger.debug(
|
||||
"段落无回忆录价值,跳过: segment_id=%s transcript=%s",
|
||||
"段落无回忆录价值,跳过: segment_id={} transcript={}",
|
||||
segment.id,
|
||||
getattr(segment, "transcript_text", None) or "",
|
||||
getattr(segment, "user_input_text", None) or "",
|
||||
)
|
||||
continue
|
||||
category_to_segments.setdefault(chapter_category, []).append(segment)
|
||||
@@ -138,7 +167,7 @@ class MemoirOrchestrator:
|
||||
for chapter_category, category_segments in category_to_segments.items():
|
||||
if not acquire_lock(chapter_category):
|
||||
logger.warning(
|
||||
"章节锁竞争: category=%s, 延迟重试",
|
||||
"章节锁竞争: category={}, 延迟重试",
|
||||
chapter_category,
|
||||
)
|
||||
raise_retry()
|
||||
|
||||
@@ -81,8 +81,8 @@ def inject_image_placeholder_template(content: str) -> str:
|
||||
return content
|
||||
|
||||
|
||||
def get_system_prompt() -> str:
|
||||
"""获取整理 Agent 的系统提示词"""
|
||||
def get_memoir_editor_system_prompt() -> str:
|
||||
"""传记整理 Agent 的系统提示词(口语转书面、章节归类;与访谈对话用的 system prompt 不同)。"""
|
||||
return """你是一位专业的传记作家和文字编辑,擅长将口语化的对话内容整理成优雅的书面语回忆录章节。
|
||||
|
||||
你的任务:
|
||||
@@ -131,7 +131,7 @@ def get_system_prompt() -> str:
|
||||
|
||||
|
||||
def get_memoir_fidelity_system_prompt() -> str:
|
||||
"""叙事/标题生成专用:准确性优先,禁止编造事实(与 get_system_prompt 分离)。"""
|
||||
"""叙事/标题生成专用:准确性优先,禁止编造事实(与 get_memoir_editor_system_prompt 分离)。"""
|
||||
return """你是回忆录编辑助手,任务是把用户口述整理为第一人称书面叙述。
|
||||
|
||||
## 事实边界(必须遵守,优先于文采)
|
||||
@@ -139,30 +139,33 @@ def get_memoir_fidelity_system_prompt() -> str:
|
||||
2. **禁止编造**:不得新增用户未提及的具体人物姓名、对话原文、地点、时间、事件经过、因果、数字;不得推断性心理描写或「典型年代场景」填充。
|
||||
3. **禁止为凑字数扩写**:材料短则输出短;段落数量与长度随材料而定。
|
||||
4. 允许:去除口语赘词与寒暄、调整语序、合并重复指代、把口语改为书面语;**不得**用虚构细节「让文章更好看」。
|
||||
5. **叙述风格平实**:少用抒情、比喻与文学铺陈;像清楚记事,不要写成散文。
|
||||
|
||||
## 用户档案与阶段信息
|
||||
- 「用户基本信息」「时间参考」仅可使用其中**已写明**的条目;不得把档案中的出生地等写进正文,除非用户在本段口述里已提及或明确关联。"""
|
||||
|
||||
|
||||
def get_narrative_editor_system_prompt() -> str:
|
||||
"""叙事改写:准确性系统提示 + 可执行文体约束(不用 get_system_prompt 中的「过渡句/生动细节」泛化指令)。"""
|
||||
"""叙事改写:准确性系统提示 + 可执行文体约束(不用 get_memoir_editor_system_prompt 中的「过渡句/生动细节」泛化指令)。"""
|
||||
return f"""{get_memoir_fidelity_system_prompt()}
|
||||
|
||||
## 文体(在严守事实的前提下)
|
||||
- 使用第一人称、书面语;不要直接引用对话原话。
|
||||
- 使用第一人称、**平实书面语**(少修辞、少感叹);不要直接引用对话原话。
|
||||
- 不使用 Markdown 标题(#、##)、不使用表格。
|
||||
- 如有「衔接上下文」,仅保持语气与时间线连贯,不重复已有段落全文。"""
|
||||
|
||||
|
||||
def _short_classification_edit_prefix() -> str:
|
||||
"""章节分类专用短系统前缀(不重复整段 get_memoir_editor_system_prompt)。"""
|
||||
return """你是回忆录编辑。先忽略语气词与寒暄,只根据**与人生经历有关的实质内容**判断归类。
|
||||
保留:事件、人物关系、地点时间、情感与信念。过滤:纯寒暄、与 AI 的交互、无关闲聊。"""
|
||||
|
||||
|
||||
def get_chapter_classification_prompt(segments_text: str) -> str:
|
||||
"""获取章节分类的提示词。
|
||||
"""获取章节分类的提示词(短系统段 + 规则;供纯文本输出路径或兼容)。"""
|
||||
return f"""{_short_classification_edit_prefix()}
|
||||
|
||||
返回 none 的语义与 Story 路由(get_story_route_prompt / get_story_batch_plan_prompt)中
|
||||
「可独立讲述的一段人生经历」对齐:none 表示本段不足以单独成篇进入回忆录 Story 流水线。
|
||||
"""
|
||||
return f"""{get_system_prompt()}
|
||||
|
||||
请分析以下对话内容,**忽略其中的语气词、寒暄和无关对话**,判断应归类到哪个章节类别,或是否不足以写入回忆录正文。
|
||||
请分析以下对话内容,判断应归类到哪个章节类别,或是否不足以写入回忆录正文。
|
||||
|
||||
## 章节类别
|
||||
- childhood: 童年与成长背景
|
||||
@@ -192,33 +195,21 @@ def get_chapter_classification_prompt(segments_text: str) -> str:
|
||||
若内容不足以独立成篇、仅为零散信息,返回 none。"""
|
||||
|
||||
|
||||
def get_text_rewrite_prompt(
|
||||
segments_text: str, chapter_category: str, existing_content: str = ""
|
||||
) -> str:
|
||||
"""获取文本改写的提示词"""
|
||||
chapter_name = CHAPTER_CATEGORIES.get(chapter_category, chapter_category)
|
||||
existing_section = (
|
||||
f"\n\n已有章节内容:\n{existing_content}" if existing_content else ""
|
||||
)
|
||||
return f"""{get_system_prompt()}
|
||||
def get_chapter_classification_json_prompt(segments_text: str) -> str:
|
||||
"""章节分类:JSON 输出(与 invoke_json_object 配合)。"""
|
||||
return f"""{_short_classification_edit_prefix()}
|
||||
|
||||
请将以下口语化的对话内容改写为书面语,归类到"{chapter_name}"章节。
|
||||
## 章节 key(英文)
|
||||
childhood, education, career_early, career_achievement, career_challenge, family, beliefs, summary;不足以成篇则 **none**。
|
||||
|
||||
规则与「何时必须返回 none」同 `get_chapter_classification_prompt`(档案点、无叙事骨架 → none)。
|
||||
|
||||
对话内容:
|
||||
{segments_text}
|
||||
{existing_section}
|
||||
|
||||
请按照以下格式返回 JSON:
|
||||
{{
|
||||
"title": "章节标题",
|
||||
"content": "改写后的书面语内容",
|
||||
"summary": "章节摘要(50字以内)"
|
||||
}}
|
||||
|
||||
要求:
|
||||
1. 标题要简洁有力,能概括章节主题
|
||||
2. 内容要流畅自然,保持原意和情感
|
||||
3. 如果已有章节内容,请将新内容与已有内容自然融合"""
|
||||
**JSON 输出**:`response_format=json_object`,只输出:
|
||||
{{"category": "childhood|education|career_early|career_achievement|career_challenge|family|beliefs|summary|none"}}
|
||||
不要其它文字。"""
|
||||
|
||||
|
||||
def get_state_extraction_prompt(
|
||||
@@ -234,9 +225,11 @@ def get_state_extraction_prompt(
|
||||
"belief": ["value", "regret", "pride", "lesson"],
|
||||
}
|
||||
|
||||
return f"""{get_system_prompt()}
|
||||
return f"""{get_memoir_fidelity_system_prompt()}
|
||||
|
||||
你需要从用户话语中**先提炼与人生经历相关的核心内容**,然后抽取结构化信息,并判断用户实际在谈论哪个人生阶段。
|
||||
你需要从用户话语中**先提炼与人生经历相关的核心内容**,然后抽取结构化信息,并判断用户实际在谈论哪个人生阶段(slots 仅填口述中确有依据的片段)。
|
||||
|
||||
**JSON 输出**:接口已启用 `response_format=json_object`,你必须只输出一个合法 JSON 对象,不要 markdown 代码块或其它文字。
|
||||
|
||||
系统当前跟踪的阶段:{current_stage}
|
||||
该阶段可填 slots:{slot_keys}
|
||||
@@ -313,13 +306,36 @@ def get_creative_title_prompt(
|
||||
|
||||
要求:
|
||||
1. 格式:「时间标注 · 标题正文」(时间标注可用年龄、年代或阶段,须与上列信息一致;勿编造未出现的年份)。
|
||||
2. 标题正文 **12–18 字**,必须概括 **用户口述或 slots 中已出现的主题/事实**;**禁止**使用用户未提及的纯文学意象(如未提巷子/蝉鸣则不得写)。
|
||||
3. 可略带文采,但不得引入口述中不存在的人、事、地、物。
|
||||
2. 标题正文 **12–18 字**,必须概括 **用户口述或 slots 中已出现的主题/事实**;**禁止**文学意象与比喻(如未提巷子/蝉鸣则不得写)。
|
||||
3. **平实**概括,不得引入口述中不存在的人、事、地、物。
|
||||
|
||||
只输出标题这一行文字,不要加引号或书名号。
|
||||
"""
|
||||
|
||||
|
||||
def get_creative_title_json_prompt(
|
||||
stage: str,
|
||||
emotion: str,
|
||||
slots: dict,
|
||||
user_profile: str = "",
|
||||
birth_year: Optional[int] = None,
|
||||
) -> str:
|
||||
"""生成故事标题(JSON:`{"title":"..."}`),与 invoke_json_object 配合。"""
|
||||
base = get_creative_title_prompt(
|
||||
stage=stage,
|
||||
emotion=emotion,
|
||||
slots=slots,
|
||||
user_profile=user_profile,
|
||||
birth_year=birth_year,
|
||||
)
|
||||
return (
|
||||
base.rstrip()
|
||||
+ "\n\n**JSON 输出**:`response_format=json_object`,只输出:"
|
||||
+ '\n{"title":"完整标题一行(含时间标注 · 正文格式)"}\n'
|
||||
+ "不要其它文字。"
|
||||
)
|
||||
|
||||
|
||||
def get_narrative_prompt(
|
||||
stage: str,
|
||||
slots: dict,
|
||||
@@ -399,6 +415,7 @@ def get_narrative_json_prompt(
|
||||
return f"""{get_narrative_editor_system_prompt()}
|
||||
|
||||
请将「本段用户口述」改写为第一人称书面叙述,并输出 **纯 JSON**,不要包含任何其他文字或 markdown 代码块。
|
||||
**JSON 输出**:接口已启用 `response_format=json_object`(与 DeepSeek JSON 模式一致),只输出一个合法 JSON 对象。
|
||||
|
||||
阶段:{stage}
|
||||
可用信息(slots):{slots}{profile_section}{time_section}
|
||||
@@ -411,7 +428,7 @@ def get_narrative_json_prompt(
|
||||
1. **只展开「本段用户口述」**;若有参考摘录区,不得把摘录中的具体事实写成本轮亲历经历(见系统说明)。
|
||||
2. 过滤语气词、寒暄、与 AI 的交互;不重复已有故事全文;本批只写同一主题/事件链。
|
||||
3. 段落数量与每段长度**随材料而定**,禁止为凑字数编造。
|
||||
4. 使用第一人称;不要直接引用原话;不要用 `#`、`##`、表格。
|
||||
4. 使用第一人称、**平实书面语**,少修辞;不要直接引用原话;不要用 `#`、`##`、表格。
|
||||
|
||||
## 输出格式(严格 JSON)
|
||||
{{
|
||||
@@ -427,6 +444,80 @@ def get_narrative_json_prompt(
|
||||
"""
|
||||
|
||||
|
||||
# 整篇合并时避免超长上下文:保留首尾,中间省略(字符级)
|
||||
NARRATIVE_MERGE_EXISTING_MAX_CHARS = 14000
|
||||
NARRATIVE_MERGE_HEAD_CHARS = 7000
|
||||
NARRATIVE_MERGE_TAIL_CHARS = 7000
|
||||
|
||||
|
||||
def clip_existing_story_body_for_merge(existing_markdown: str) -> str:
|
||||
"""供 append 合并提示使用:极长正文截断为 头+尾,避免 token 爆炸。"""
|
||||
s = (existing_markdown or "").strip()
|
||||
if not s:
|
||||
return ""
|
||||
if len(s) <= NARRATIVE_MERGE_EXISTING_MAX_CHARS:
|
||||
return s
|
||||
head = s[:NARRATIVE_MERGE_HEAD_CHARS]
|
||||
tail = s[-NARRATIVE_MERGE_TAIL_CHARS:]
|
||||
return (
|
||||
f"{head}\n\n【…中间省略…】\n\n"
|
||||
f"{tail}\n\n(上文为已有故事正文节选,合并时须保留其中全部事实,不得因省略而删事实。)"
|
||||
)
|
||||
|
||||
|
||||
def get_narrative_merge_json_prompt(
|
||||
stage: str,
|
||||
slots: dict,
|
||||
new_content: str,
|
||||
existing_content: str,
|
||||
user_profile: str = "",
|
||||
birth_year: Optional[int] = None,
|
||||
) -> str:
|
||||
"""
|
||||
已有故事追加:将「已有全文(或节选)」与「本段口述」合并为**一篇**第一人称叙述,
|
||||
按事件发生顺序组织段落,输出覆盖全篇的 JSON paragraphs。
|
||||
"""
|
||||
clipped = clip_existing_story_body_for_merge(existing_content)
|
||||
existing_section = (
|
||||
f"\n\n【已有故事正文(须全部保留事实,仅调整顺序与衔接;不得编造)】:\n{clipped}"
|
||||
if clipped
|
||||
else ""
|
||||
)
|
||||
profile_section = f"\n\n用户基本信息:\n{user_profile}" if user_profile else ""
|
||||
age_hint = _build_age_hint(stage, birth_year)
|
||||
time_section = f"\n时间参考:{age_hint}" if age_hint else ""
|
||||
|
||||
return f"""{get_narrative_editor_system_prompt()}
|
||||
|
||||
你正在**扩写并重组**一则已有回忆录故事:必须把「已有故事」中的事实全部保留在输出中(可合并重复表述、调整语序),并融入「本段用户口述」中的新事实;按**事件发生的时间顺序**排列段落(早→晚);禁止丢弃未矛盾的旧内容。
|
||||
|
||||
**JSON 输出**:接口已启用 `response_format=json_object`,只输出一个合法 JSON 对象,不要 markdown 代码块。
|
||||
|
||||
阶段:{stage}
|
||||
可用信息(slots):{slots}{profile_section}{time_section}
|
||||
|
||||
【本段用户口述与参考(含证据摘录时遵守系统事实边界)】:
|
||||
{new_content}
|
||||
{existing_section}
|
||||
|
||||
## 要求
|
||||
1. 输出为**完整故事正文**(不是仅写本段):`paragraphs` 须包含重组后的**全文**。
|
||||
2. **禁止编造**:不得新增用户未在「已有」或「本段」中出现的人名、地点、时间、对话、数字。
|
||||
3. 若本段与旧文完全重复或无新信息,可仅输出与旧文等价重组后的正文(不得无故缩短到明显少于旧文)。
|
||||
4. 使用第一人称、平实书面语;不要用 `#`、`##`、表格。
|
||||
|
||||
## 输出格式(严格 JSON)
|
||||
{{
|
||||
"paragraphs": [
|
||||
{{"content": "段落正文"}},
|
||||
...
|
||||
]
|
||||
}}
|
||||
|
||||
若无任何可保留内容:{{"paragraphs": []}}
|
||||
"""
|
||||
|
||||
|
||||
def get_story_route_prompt(
|
||||
*,
|
||||
chapter_category: str,
|
||||
@@ -443,6 +534,8 @@ def get_story_route_prompt(
|
||||
- append_story:内容明显延续、补充某一已有故事的主题与时间线,且能对应到具体 candidate id
|
||||
- new_story:新话题、新人生阶段片段,或与所有候选故事都不够贴合
|
||||
|
||||
**JSON 输出**:接口已启用 `response_format=json_object`,只输出下面 schema 的一个合法 JSON 对象,不要 markdown。
|
||||
|
||||
「故事」在此指:**可独立讲述的一段人生经历**——单一主题或同一事件链;不要假设本批里包含多个互不相关的故事(多段由系统其它步骤处理)。
|
||||
|
||||
**new_story_title 与 reason 只能依据口述中已有信息概括,不得编造口述未出现的人、事、地、物。**
|
||||
@@ -481,6 +574,8 @@ def get_story_batch_plan_prompt(
|
||||
"""同一章节类别下多 segment:划分为若干写入单元(每单元 new 或 append)。输出严格 JSON。"""
|
||||
return f"""你是回忆录编辑助手。下面同一章节类别下有一批**按时间顺序**的用户口述片段(每段有 id 与文本)。
|
||||
|
||||
**JSON 输出**:接口已启用 `response_format=json_object`,只输出下面 schema 的一个合法 JSON 对象,不要 markdown。
|
||||
|
||||
## 「故事」定义(必须遵守)
|
||||
一段「故事」= **可独立讲述的一段人生经历**:单一主题或同一事件链,能单独成篇。若话题切换、时间线跳到另一件事、人物/主线明显变化,应作为**新的故事**(new_story),而不是塞进同一段 append。
|
||||
|
||||
@@ -539,9 +634,14 @@ def format_narrative_user_content(oral_text: str, evidence_text: str = "") -> st
|
||||
|
||||
|
||||
def format_evidence_chunks_for_prompt(evidence: dict) -> str:
|
||||
"""将 retrieve_evidence 结果格式化为简短文本,供叙事 prompt 使用。"""
|
||||
"""将 retrieve_evidence / retrieve_evidence_sync 结果格式化为简短文本,供叙事 prompt 使用。
|
||||
|
||||
仅包含实际返回的 chunks、confirmed facts、timeline;不包含 relevant_summaries / relevant_stories
|
||||
(当前管线多为空列表,避免模型误以为有摘要或故事全文可用)。
|
||||
"""
|
||||
chunks = evidence.get("relevant_chunks") or []
|
||||
facts = evidence.get("relevant_facts") or []
|
||||
timeline = evidence.get("timeline_hints") or []
|
||||
parts: list[str] = []
|
||||
for c in chunks[:10]:
|
||||
content = (
|
||||
@@ -558,4 +658,18 @@ def format_evidence_chunks_for_prompt(evidence: dict) -> str:
|
||||
parts.append(f"{subj} {pred} {obj}")
|
||||
else:
|
||||
parts.append(f"{getattr(f, 'subject', '')} {getattr(f, 'predicate', '')}")
|
||||
for t in timeline[:5]:
|
||||
if isinstance(t, dict):
|
||||
title = (t.get("title") or "").strip()
|
||||
year = t.get("event_year")
|
||||
desc = (t.get("description") or "").strip()
|
||||
line = " ".join(
|
||||
x for x in (str(year) if year is not None else "", title, desc) if x
|
||||
)
|
||||
if line:
|
||||
parts.append(line)
|
||||
return "\n\n".join(parts) if parts else ""
|
||||
|
||||
|
||||
# 向后兼容:旧代码中的 get_system_prompt 指「回忆录编辑」系统提示,勿与访谈模块的 get_system_prompt 混淆
|
||||
get_system_prompt = get_memoir_editor_system_prompt
|
||||
|
||||
@@ -13,7 +13,7 @@ from app.agents.memoir.prompts import (
|
||||
get_story_batch_plan_prompt,
|
||||
get_story_route_prompt,
|
||||
)
|
||||
from app.core.langchain_llm import bind_json_object_mode
|
||||
from app.core.langchain_llm import invoke_json_object
|
||||
from app.core.logging import get_logger
|
||||
from app.features.story.models import Story
|
||||
|
||||
@@ -90,7 +90,7 @@ def _build_candidate_json(stories: list[Story], *, preview_chars: int = 220) ->
|
||||
def _build_segments_json_for_plan(
|
||||
segments: list[tuple[str, str]], *, text_preview_chars: int = 4000
|
||||
) -> str:
|
||||
"""segments: (id, transcript_text) 按口述顺序。"""
|
||||
"""segments: (id, user_input_text) 按口述顺序。"""
|
||||
rows: list[dict[str, str]] = []
|
||||
for sid, text in segments:
|
||||
t = (text or "").strip()
|
||||
@@ -157,13 +157,16 @@ class StoryRouteAgent:
|
||||
candidate_stories_json=payload,
|
||||
)
|
||||
try:
|
||||
json_llm = bind_json_object_mode(llm, max_tokens=1024)
|
||||
response = json_llm.invoke(prompt)
|
||||
raw = (response.content or "").strip()
|
||||
raw = invoke_json_object(
|
||||
llm,
|
||||
prompt,
|
||||
max_tokens=1024,
|
||||
agent="StoryRouteAgent.decide",
|
||||
).strip()
|
||||
data = json.loads(raw)
|
||||
decision = StoryRouteDecision.model_validate(data)
|
||||
except Exception as e:
|
||||
logger.warning("StoryRouteAgent 解析失败: %s", e)
|
||||
logger.warning("StoryRouteAgent 解析失败: {}", e)
|
||||
return StoryRouteDecision(
|
||||
decision="new_story",
|
||||
new_story_title=None,
|
||||
@@ -174,7 +177,7 @@ class StoryRouteAgent:
|
||||
tid = decision.target_story_id
|
||||
if not tid or tid not in valid_story_ids:
|
||||
logger.warning(
|
||||
"StoryRoute append 无效 target_story_id=%s,回退 new_story",
|
||||
"StoryRoute append 无效 target_story_id={},回退 new_story",
|
||||
tid,
|
||||
)
|
||||
return StoryRouteDecision(
|
||||
@@ -212,18 +215,21 @@ class StoryRouteAgent:
|
||||
candidate_stories_json=payload,
|
||||
)
|
||||
try:
|
||||
json_llm = bind_json_object_mode(llm, max_tokens=4096)
|
||||
response = json_llm.invoke(prompt)
|
||||
raw = (response.content or "").strip()
|
||||
raw = invoke_json_object(
|
||||
llm,
|
||||
prompt,
|
||||
max_tokens=4096,
|
||||
agent="StoryRouteAgent.plan_batch",
|
||||
).strip()
|
||||
data = json.loads(raw)
|
||||
plan = StoryBatchPlan.model_validate(data)
|
||||
except Exception as e:
|
||||
logger.warning("StoryRouteAgent.plan_batch 解析失败: %s", e)
|
||||
logger.warning("StoryRouteAgent.plan_batch 解析失败: {}", e)
|
||||
return None
|
||||
|
||||
ordered = [s[0] for s in segments]
|
||||
ok, err = validate_story_batch_plan(ordered, plan, valid_story_ids)
|
||||
if not ok:
|
||||
logger.warning("StoryRouteAgent.plan_batch 校验失败: %s", err)
|
||||
logger.warning("StoryRouteAgent.plan_batch 校验失败: {}", err)
|
||||
return None
|
||||
return plan
|
||||
|
||||
Reference in New Issue
Block a user