feat(memoir): 回忆录分段两阶段管线(Phase1 分类 / Phase2 叙事)与配置、测试

This commit is contained in:
Kevin
2026-04-02 16:37:14 +08:00
parent 3ae39838c0
commit 6b930808a3
27 changed files with 1550 additions and 430 deletions

View File

@@ -14,8 +14,11 @@ from dataclasses import dataclass
from typing import Any
from app.agents.memoir.prompts import get_chapter_classification_json_prompt
from app.agents.stage_constants import CHAPTER_CATEGORIES
from app.agents.stage_constants import STAGE_TO_DEFAULT_CATEGORY
from app.agents.stage_constants import (
CHAPTER_CATEGORIES,
STAGE_KEYWORD_WEIGHTS,
STAGE_TO_DEFAULT_CATEGORY,
)
from app.core.json_utils import extract_json_payload
from app.core.langchain_llm import invoke_json_object
from app.core.logging import get_logger
@@ -40,21 +43,12 @@ _SHORT_HUKOU_STYLE = re.compile(
re.UNICODE,
)
# 5-stage 关键词(用于 LLM 失败时的兜底);注意勿含易与「仅年份句」共现的泛词,以免误推类别
STAGE_KEYWORDS = {
"childhood": ["童年", "小时候", "家乡", "小镇"],
"education": ["上学", "学校", "老师", "同学", "教育", "大学"],
"career": ["工作", "职业", "事业", "公司", "同事", "创业"],
"family": ["伴侣", "孩子", "家庭", "家人", "结婚", "父母"],
"belief": ["信念", "价值观", "座右铭", "坚持", "原则"],
}
def _detect_stage(text: str, fallback_stage: str) -> str:
"""根据关键词检测消息所属的 5-stage 阶段"""
"""根据关键词检测消息所属的 5-stage 阶段(与 stage_constants.STAGE_KEYWORD_WEIGHTS 同源;匹配方式为子串,非加权)。"""
message = (text or "").lower()
for stage, keywords in STAGE_KEYWORDS.items():
if any(word in message for word in keywords):
for stage, pairs in STAGE_KEYWORD_WEIGHTS.items():
if any(word in message for word, _w in pairs):
return stage
return fallback_stage