feat: 修正章节排序和分类逻辑

- 新增 SQL 脚本以修正章节排序索引,确保与 8 个分类体系对齐。
- 更新 API 章节获取逻辑,始终返回所有 8 个预定义类别,未填充内容的类别使用占位符。
- 引入章节分类功能,支持从 5-stage 关键词映射到 8 个章节类别,提升内容分类准确性。
- 更新 Android 客户端以适应新的章节定义和占位逻辑,确保用户界面一致性。
This commit is contained in:
penghanyuan
2026-03-01 10:50:58 +01:00
parent c1e2fb31a0
commit 5125ee1564
4 changed files with 131 additions and 40 deletions

View File

@@ -21,7 +21,9 @@ from agents.prompts.memory_prompts import (
get_creative_title_prompt,
get_narrative_prompt,
get_state_extraction_prompt,
get_chapter_classification_prompt,
STAGE_TO_ORDER,
CHAPTER_CATEGORIES,
)
from agents.prompts.profile_prompts import format_user_profile_context
@@ -77,9 +79,18 @@ STAGE_KEYWORDS = {
"belief": ["信念", "价值观", "座右铭", "坚持", "原则"],
}
# 5-stage → 默认 8-category 映射LLM 分类失败时的兜底)
_STAGE_TO_DEFAULT_CATEGORY = {
"childhood": "childhood",
"education": "education",
"career": "career_early",
"family": "family",
"belief": "beliefs",
}
def _detect_stage(user_message: str, fallback_stage: str) -> str:
"""检测消息所属阶段"""
"""检测消息所属的 5-stage 阶段(用于状态跟踪)"""
message = user_message.lower()
for stage, keywords in STAGE_KEYWORDS.items():
if any(word in message for word in keywords):
@@ -87,6 +98,25 @@ def _detect_stage(user_message: str, fallback_stage: str) -> str:
return fallback_stage
def _classify_chapter_category(text: str, fallback_stage: str, llm=None) -> str:
"""
将内容分类到 8 个章节类别之一。
优先使用 LLM失败则按 5-stage 关键词映射到默认类别。
"""
if llm:
try:
prompt = get_chapter_classification_prompt(text)
response = llm.invoke(prompt)
category = response.content.strip().lower()
if category in CHAPTER_CATEGORIES:
return category
except Exception as e:
logger.warning(f"LLM 章节分类失败: {e}")
stage = _detect_stage(text, fallback_stage)
return _STAGE_TO_DEFAULT_CATEGORY.get(stage, _STAGE_TO_DEFAULT_CATEGORY.get(fallback_stage, "childhood"))
def _coerce_state(model: MemoirState) -> MemoirStateSchema:
"""将数据库模型转换为 Schema"""
return MemoirStateSchema.model_validate(
@@ -196,14 +226,16 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]):
occupation=user_obj.occupation,
)
# 按阶段分组处理
stage_to_segments: Dict[str, List[Segment]] = {}
# 分两步处理
# 1) 5-stage 状态跟踪slots
# 2) 8-category 章节分类chapter creation
category_to_segments: Dict[str, List[Segment]] = {}
for segment in segments:
text = segment.transcript_text
detected_stage = _detect_stage(text, state.current_stage)
# 尝试使用 LLM 提取信息
# 提取 slots5-stage 状态跟踪)
extracted_slots = {}
if llm:
try:
@@ -219,8 +251,7 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]):
extracted_slots = parsed.get("slots", {}) or {}
except (json.JSONDecodeError, Exception) as e:
logger.warning(f"LLM 解析失败: {e}")
# 更新 slots
for slot_name, snippet in extracted_slots.items():
state = _update_slot_sync(
user_id=user_id,
@@ -230,11 +261,13 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]):
segment_ids=[segment.id],
db=db,
)
stage_to_segments.setdefault(detected_stage, []).append(segment)
# 生成章节内容
for stage, stage_segments in stage_to_segments.items():
# 8-category 章节分类
chapter_category = _classify_chapter_category(text, detected_stage, llm)
category_to_segments.setdefault(chapter_category, []).append(segment)
# 按 8 分类生成章节内容
for stage, stage_segments in category_to_segments.items():
if not _acquire_chapter_lock(user_id, stage):
logger.warning(f"章节锁竞争: user={user_id}, stage={stage}, 延迟重试")
raise self.retry(countdown=10)