feat(memoir): 回忆录分段两阶段管线(Phase1 分类 / Phase2 叙事)与配置、测试

This commit is contained in:
Kevin
2026-04-02 16:37:14 +08:00
parent 3ae39838c0
commit 6b930808a3
27 changed files with 1550 additions and 430 deletions

View File

@@ -208,6 +208,9 @@ def _build_era_context(current_stage: str, user_profile_context: str) -> str:
"career": (18, 50),
"family": (20, 50),
"belief": (30, 60),
# chapter / 防御性 key与 belief 同档年龄参照
"beliefs": (30, 60),
"summary": (30, 60),
}
age_range = stage_era_map.get(current_stage, (0, 30))

View File

@@ -12,6 +12,11 @@ from app.agents.chat.stage_prompts import (
get_chat_stage_detection_prompt,
life_stage_display_zh,
)
from app.agents.stage_constants import (
CHAT_STAGES,
STAGE_KEYWORD_WEIGHTS,
normalize_chat_stage,
)
from app.core.config import settings
from app.core.langchain_llm import ainvoke_json_object
from app.core.logging import get_logger
@@ -19,117 +24,26 @@ from app.core.json_utils import extract_json_payload
logger = get_logger(__name__)
# 关键词按阶段打分;同一词不重复出现在多阶段,避免「父母」独占童年。
_STAGE_KEYWORD_WEIGHTS: dict[str, list[tuple[str, int]]] = {
"childhood": [
("童年", 3),
("小时候", 3),
("幼年", 2),
("出生", 2),
("家乡", 2),
("老家", 2),
("小镇", 1),
("幼儿园", 2),
("玩伴", 1),
],
"education": [
("上学", 2),
("学校", 2),
("老师", 2),
("同学", 2),
("教育", 1),
("大学", 3),
("高中", 2),
("初中", 2),
("小学", 2),
("考试", 1),
("毕业", 2),
("读书", 1),
("高考", 2),
("课堂", 1),
("宿舍", 1),
],
"career": [
("工作", 3),
("职业", 2),
("事业", 2),
("公司", 2),
("同事", 2),
("创业", 2),
("升职", 1),
("跳槽", 1),
("老板", 1),
("行业", 1),
("项目", 1),
("加班", 1),
("薪水", 1),
("面试", 1),
("职场", 2),
("离职", 1),
],
"family": [
("伴侣", 2),
("孩子", 2),
("家庭", 2),
("家人", 2),
("结婚", 2),
("爱人", 1),
("老婆", 1),
("老公", 1),
("丈夫", 1),
("妻子", 1),
("儿子", 1),
("女儿", 1),
("婚礼", 1),
("恋爱", 1),
("父母", 2),
("爸妈", 2),
("父亲", 2),
("母亲", 2),
("爷爷", 1),
("奶奶", 1),
("外公", 1),
("外婆", 1),
],
"belief": [
("信念", 2),
("价值观", 2),
("座右铭", 2),
("坚持", 1),
("原则", 1),
("信仰", 1),
("意义", 1),
("感悟", 1),
("遗憾", 1),
("骄傲", 1),
],
}
def normalize_life_stage(raw: Optional[str], fallback: str) -> str:
if not raw or not isinstance(raw, str):
return fallback
s = raw.strip().lower()
if s in VALID_CHAT_LIFE_STAGES:
return s
return fallback
"""兼容旧名:统一走 normalize_chat_stage。"""
return normalize_chat_stage(raw, fallback)
def keyword_fallback_primary_stage(user_message: str) -> str:
"""多阶段打分,取最高分;平局stage_order 靠后的优先(更具体场景常后验)。"""
"""多阶段打分,取最高分;平局按 CHAT_STAGES 逆序优先(与历史 tie_order 派生一致,可能有小幅行为差异)。"""
if not (user_message or "").strip():
return ""
text = user_message
scores: dict[str, int] = {k: 0 for k in _STAGE_KEYWORD_WEIGHTS}
for stage, pairs in _STAGE_KEYWORD_WEIGHTS.items():
scores: dict[str, int] = {k: 0 for k in STAGE_KEYWORD_WEIGHTS}
for stage, pairs in STAGE_KEYWORD_WEIGHTS.items():
for word, w in pairs:
if word in text:
scores[stage] += w
best = max(scores.values())
if best <= 0:
return ""
# 平局education > career > family > belief > childhood避免童年默认胜出
tie_order = ["childhood", "belief", "family", "career", "education"]
tie_order = list(reversed(CHAT_STAGES))
candidates = [s for s, v in scores.items() if v == best]
for s in reversed(tie_order):
if s in candidates:
@@ -145,14 +59,14 @@ async def detect_primary_life_stage(
"""
返回合法的人生阶段 key失败时回退为 current_stage。
"""
fb = normalize_life_stage(current_stage, "childhood")
fb = normalize_chat_stage(current_stage, "childhood")
if not settings.chat_stage_detection_enabled:
k = keyword_fallback_primary_stage(user_message)
return normalize_life_stage(k, fb) if k else fb
return normalize_chat_stage(k, fb) if k else fb
if not llm:
k = keyword_fallback_primary_stage(user_message)
return normalize_life_stage(k, fb) if k else fb
return normalize_chat_stage(k, fb) if k else fb
try:
prompt = get_chat_stage_detection_prompt(user_message, fb)
@@ -164,16 +78,26 @@ async def detect_primary_life_stage(
)
if not raw.strip():
k = keyword_fallback_primary_stage(user_message)
return normalize_life_stage(k, fb) if k else fb
return normalize_chat_stage(k, fb) if k else fb
parsed = json.loads(extract_json_payload(raw))
detected = parsed.get("detected_stage", fb)
return normalize_life_stage(str(detected) if detected is not None else "", fb)
return normalize_chat_stage(str(detected) if detected is not None else "", fb)
except (json.JSONDecodeError, Exception) as e:
logger.warning("detect_primary_life_stage 解析失败,使用关键词回退: {}", e)
k = keyword_fallback_primary_stage(user_message)
return normalize_life_stage(k, fb) if k else fb
return normalize_chat_stage(k, fb) if k else fb
def life_stage_display_name(stage: str) -> str:
"""供提示词展示的中文名。"""
return life_stage_display_zh(stage)
# re-export for modules that still import VALID_CHAT_LIFE_STAGES from stage_detection
__all__ = [
"VALID_CHAT_LIFE_STAGES",
"detect_primary_life_stage",
"keyword_fallback_primary_stage",
"life_stage_display_name",
"normalize_life_stage",
]

View File

@@ -2,9 +2,9 @@
访谈「人生阶段」判定专用短提示词(与回忆录五阶段 slots 一致)。
"""
from app.agents.stage_constants import CHAT_STAGES, STAGE_DISPLAY_ZH
from app.agents.stage_constants import CHAT_STAGES, STAGE_DISPLAY_ZH, VALID_CHAT_STAGES
VALID_CHAT_LIFE_STAGES = frozenset(CHAT_STAGES)
VALID_CHAT_LIFE_STAGES = VALID_CHAT_STAGES
def life_stage_display_zh(stage: str) -> str: