feat: 修正章节排序和分类逻辑

- 新增 SQL 脚本以修正章节排序索引，确保与 8 个分类体系对齐。 - 更新 API 章节获取逻辑，始终返回所有 8 个预定义类别，未填充内容的类别使用占位符。 - 引入章节分类功能，支持从 5-stage 关键词映射到 8 个章节类别，提升内容分类准确性。 - 更新 Android 客户端以适应新的章节定义和占位逻辑，确保用户界面一致性。
2026-03-01 10:50:58 +01:00
parent c1e2fb31a0
commit 5125ee1564
4 changed files with 131 additions and 40 deletions
--- a/api/tasks/memoir_tasks.py
+++ b/api/tasks/memoir_tasks.py
@@ -21,7 +21,9 @@ from agents.prompts.memory_prompts import (
    get_creative_title_prompt,
    get_narrative_prompt,
    get_state_extraction_prompt,
+    get_chapter_classification_prompt,
    STAGE_TO_ORDER,
+    CHAPTER_CATEGORIES,
 )
 from agents.prompts.profile_prompts import format_user_profile_context

@@ -77,9 +79,18 @@ STAGE_KEYWORDS = {
    "belief": ["信念", "价值观", "座右铭", "坚持", "原则"],
 }

+# 5-stage → 默认 8-category 映射（LLM 分类失败时的兜底）
+_STAGE_TO_DEFAULT_CATEGORY = {
+    "childhood": "childhood",
+    "education": "education",
+    "career": "career_early",
+    "family": "family",
+    "belief": "beliefs",
+}
+

 def _detect_stage(user_message: str, fallback_stage: str) -> str:
-    """检测消息所属阶段"""
+    """检测消息所属的 5-stage 阶段（用于状态跟踪）"""
    message = user_message.lower()
    for stage, keywords in STAGE_KEYWORDS.items():
        if any(word in message for word in keywords):
@@ -87,6 +98,25 @@ def _detect_stage(user_message: str, fallback_stage: str) -> str:
    return fallback_stage


+def _classify_chapter_category(text: str, fallback_stage: str, llm=None) -> str:
+    """
+    将内容分类到 8 个章节类别之一。
+    优先使用 LLM，失败则按 5-stage 关键词映射到默认类别。
+    """
+    if llm:
+        try:
+            prompt = get_chapter_classification_prompt(text)
+            response = llm.invoke(prompt)
+            category = response.content.strip().lower()
+            if category in CHAPTER_CATEGORIES:
+                return category
+        except Exception as e:
+            logger.warning(f"LLM 章节分类失败: {e}")
+
+    stage = _detect_stage(text, fallback_stage)
+    return _STAGE_TO_DEFAULT_CATEGORY.get(stage, _STAGE_TO_DEFAULT_CATEGORY.get(fallback_stage, "childhood"))
+
+
 def _coerce_state(model: MemoirState) -> MemoirStateSchema:
    """将数据库模型转换为 Schema"""
    return MemoirStateSchema.model_validate(
@@ -196,14 +226,16 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]):
                    occupation=user_obj.occupation,
                )
            
-            # 按阶段分组处理
-            stage_to_segments: Dict[str, List[Segment]] = {}
-            
+            # 分两步处理：
+            # 1) 5-stage 状态跟踪（slots）
+            # 2) 8-category 章节分类（chapter creation）
+            category_to_segments: Dict[str, List[Segment]] = {}
+
            for segment in segments:
                text = segment.transcript_text
                detected_stage = _detect_stage(text, state.current_stage)
-                
-                # 尝试使用 LLM 提取信息
+
+                # 提取 slots（5-stage 状态跟踪）
                extracted_slots = {}
                if llm:
                    try:
@@ -219,8 +251,7 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]):
                        extracted_slots = parsed.get("slots", {}) or {}
                    except (json.JSONDecodeError, Exception) as e:
                        logger.warning(f"LLM 解析失败: {e}")
-                
-                # 更新 slots
+
                for slot_name, snippet in extracted_slots.items():
                    state = _update_slot_sync(
                        user_id=user_id,
@@ -230,11 +261,13 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]):
                        segment_ids=[segment.id],
                        db=db,
                    )
-                
-                stage_to_segments.setdefault(detected_stage, []).append(segment)
-            
-            # 生成章节内容
-            for stage, stage_segments in stage_to_segments.items():
+
+                # 8-category 章节分类
+                chapter_category = _classify_chapter_category(text, detected_stage, llm)
+                category_to_segments.setdefault(chapter_category, []).append(segment)
+
+            # 按 8 分类生成章节内容
+            for stage, stage_segments in category_to_segments.items():
                if not _acquire_chapter_lock(user_id, stage):
                    logger.warning(f"章节锁竞争: user={user_id}, stage={stage}, 延迟重试")
                    raise self.retry(countdown=10)