feat(memoir): 回忆录分段两阶段管线(Phase1 分类 / Phase2 叙事)与配置、测试
This commit is contained in:
@@ -14,8 +14,11 @@ from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
from app.agents.memoir.prompts import get_chapter_classification_json_prompt
|
||||
from app.agents.stage_constants import CHAPTER_CATEGORIES
|
||||
from app.agents.stage_constants import STAGE_TO_DEFAULT_CATEGORY
|
||||
from app.agents.stage_constants import (
|
||||
CHAPTER_CATEGORIES,
|
||||
STAGE_KEYWORD_WEIGHTS,
|
||||
STAGE_TO_DEFAULT_CATEGORY,
|
||||
)
|
||||
from app.core.json_utils import extract_json_payload
|
||||
from app.core.langchain_llm import invoke_json_object
|
||||
from app.core.logging import get_logger
|
||||
@@ -40,21 +43,12 @@ _SHORT_HUKOU_STYLE = re.compile(
|
||||
re.UNICODE,
|
||||
)
|
||||
|
||||
# 5-stage 关键词(用于 LLM 失败时的兜底);注意勿含易与「仅年份句」共现的泛词,以免误推类别
|
||||
STAGE_KEYWORDS = {
|
||||
"childhood": ["童年", "小时候", "家乡", "小镇"],
|
||||
"education": ["上学", "学校", "老师", "同学", "教育", "大学"],
|
||||
"career": ["工作", "职业", "事业", "公司", "同事", "创业"],
|
||||
"family": ["伴侣", "孩子", "家庭", "家人", "结婚", "父母"],
|
||||
"belief": ["信念", "价值观", "座右铭", "坚持", "原则"],
|
||||
}
|
||||
|
||||
|
||||
def _detect_stage(text: str, fallback_stage: str) -> str:
|
||||
"""根据关键词检测消息所属的 5-stage 阶段"""
|
||||
"""根据关键词检测消息所属的 5-stage 阶段(与 stage_constants.STAGE_KEYWORD_WEIGHTS 同源;匹配方式为子串,非加权)。"""
|
||||
message = (text or "").lower()
|
||||
for stage, keywords in STAGE_KEYWORDS.items():
|
||||
if any(word in message for word in keywords):
|
||||
for stage, pairs in STAGE_KEYWORD_WEIGHTS.items():
|
||||
if any(word in message for word, _w in pairs):
|
||||
return stage
|
||||
return fallback_stage
|
||||
|
||||
|
||||
Reference in New Issue
Block a user