feat: 增强对话代理以检测用户阶段并更新章节排序

- 在 api/agents/conversation_agent.py 中添加 _detect_user_stage 方法，以通过关键词检测用户谈论的人生阶段。 - 在 api/agents/memory_agent.py 中更新章节排序逻辑，使用 STAGE_TO_ORDER 替代 CHAPTER_ORDER。 - 在 api/agents/state_schema.py 中添加方法以获取各阶段的填充情况。 - 在 api/agents/prompts/conversation_prompts.py 中更新对话提示，包含用户阶段检测和整体进度信息。 - 在 api/migrations/fix_chapter_order_index.sql 中添加 SQL 脚本以修复章节 order_index 的问题。 - 更新相关文档和提示以反映新功能。
2026-02-13 21:45:56 +01:00
parent 0ebeb05420
commit 7fe0b70d5c
9 changed files with 207 additions and 48 deletions
--- a/.github/workflows/docker-build-deploy.yml
+++ b/.github/workflows/docker-build-deploy.yml
@@ -203,6 +203,12 @@ jobs:
          ssh -p $SSH_PORT $SSH_USER@$SSH_HOST \
            "docker exec -i life-echo-postgres psql -U $DB_USER -d $DB_NAME" \
            < api/migrations/sync_schema_to_models.sql
+          
+          echo "修复章节 order_index..."
+          ssh -p $SSH_PORT $SSH_USER@$SSH_HOST \
+            "docker exec -i life-echo-postgres psql -U $DB_USER -d $DB_NAME" \
+            < api/migrations/fix_chapter_order_index.sql
+          
          echo "数据库迁移完成"

      - name: Verify deployment
--- a/api/agents/conversation_agent.py
+++ b/api/agents/conversation_agent.py
@@ -102,6 +102,26 @@ class ConversationAgent:
            logger.error(f"生成回应失败: {e}")
            return f"抱歉，生成回应时出现错误: {str(e)}"

+    def _detect_user_stage(self, user_message: str) -> str:
+        """
+        通过关键词检测用户当前正在谈论的人生阶段。
+        返回阶段名称字符串，未检测到返回空字符串。
+        """
+        message = user_message.lower()
+
+        stage_keywords = {
+            "childhood": ["童年", "小时候", "出生", "家乡", "小镇", "爸妈", "父亲", "母亲", "爷爷", "奶奶", "外公", "外婆", "幼儿园"],
+            "education": ["上学", "学校", "老师", "同学", "教育", "大学", "高中", "初中", "小学", "考试", "毕业", "读书", "高考", "课堂"],
+            "career": ["工作", "职业", "事业", "公司", "同事", "创业", "升职", "跳槽", "老板", "行业", "项目", "加班", "薪水", "面试"],
+            "family": ["伴侣", "孩子", "家庭", "家人", "结婚", "爱人", "老婆", "老公", "丈夫", "妻子", "儿子", "女儿", "婚礼", "恋爱"],
+            "belief": ["信念", "价值观", "座右铭", "坚持", "原则", "信仰", "意义", "感悟", "遗憾", "骄傲"],
+        }
+
+        for stage, keywords in stage_keywords.items():
+            if any(word in message for word in keywords):
+                return stage
+        return ""
+
    async def generate_response_with_state(
        self,
        conversation_id: str,
@@ -130,6 +150,9 @@ class ConversationAgent:
                if value.snippet
            }

+            # 检测用户当前正在谈论的阶段
+            detected_user_stage = self._detect_user_stage(user_message)
+
            # 从 Redis 获取对话历史，用于计算对话轮数
            history_messages = await self._get_history_messages(conversation_id)
            conversation_turn = len(history_messages) // 2  # 每轮包括一个用户消息和一个AI回复
@@ -137,6 +160,9 @@ class ConversationAgent:
            # 计算同一话题的轮数（简单估算：基于已填充槽位的变化）
            # 如果槽位数量没有增加，说明还在同一话题深入
            same_topic_turns = self._estimate_same_topic_turns(history_messages, filled_slots)
+
+            # 获取所有阶段的覆盖情况
+            all_stages_coverage = memoir_state.all_stages_coverage()
            
            system_prompt = get_guided_conversation_prompt(
                current_stage=memoir_state.current_stage,
@@ -145,6 +171,8 @@ class ConversationAgent:
                user_message=user_message,
                conversation_turn=conversation_turn,
                same_topic_turns=same_topic_turns,
+                all_stages_coverage=all_stages_coverage,
+                detected_user_stage=detected_user_stage,
            )

            history_string = self._format_history_string(history_messages)
--- a/api/agents/memory_agent.py
+++ b/api/agents/memory_agent.py
@@ -13,7 +13,7 @@ from .prompts import (
    get_chapter_classification_prompt,
    get_text_rewrite_prompt,
    CHAPTER_CATEGORIES,
-    CHAPTER_ORDER
+    STAGE_TO_ORDER,
 )

 logger = logging.getLogger(__name__)
@@ -176,7 +176,7 @@ class MemoryAgent:
                "summary": result.get("summary", ""),
                "image_suggestions": result.get("image_suggestions", []),
                "category": category,
-                "order_index": CHAPTER_ORDER.index(category) if category in CHAPTER_ORDER else 999
+                "order_index": STAGE_TO_ORDER.get(category, 999)
            }
        
        return updated_chapters
--- a/api/agents/prompts/init.py
+++ b/api/agents/prompts/init.py
@@ -17,6 +17,7 @@ from .memory_prompts import (
    get_narrative_prompt,
    CHAPTER_CATEGORIES,
    CHAPTER_ORDER,
+    STAGE_TO_ORDER,
 )

 __all__ = [
@@ -33,5 +34,6 @@ __all__ = [
    "get_narrative_prompt",
    "CHAPTER_CATEGORIES",
    "CHAPTER_ORDER",
+    "STAGE_TO_ORDER",
 ]

--- a/api/agents/prompts/conversation_prompts.py
+++ b/api/agents/prompts/conversation_prompts.py
@@ -180,28 +180,22 @@ def get_guided_conversation_prompt(
    user_message: str,
    conversation_turn: int = 0,
    same_topic_turns: int = 0,
+    all_stages_coverage: Dict[str, Dict] = None,
+    detected_user_stage: str = "",
 ) -> str:
    """
    生成状态感知的对话提示词
-    
+
    Args:
-        current_stage: 当前阶段
-        empty_slots: 未填充的槽位
-        filled_slots: 已填充的槽位
+        current_stage: 系统当前跟踪的阶段
+        empty_slots: 当前阶段未填充的槽位
+        filled_slots: 当前阶段已填充的槽位
        user_message: 用户消息
        conversation_turn: 总对话轮数
        same_topic_turns: 同一话题的轮数
+        all_stages_coverage: 所有阶段的覆盖情况 {stage: {total, filled, empty, ratio}}
+        detected_user_stage: 检测到用户正在谈论的阶段（可能和 current_stage 不同）
    """
-    # 转换 slot 名称为中文
-    empty_slots_readable = [SLOT_NAME_MAP.get(s, s) for s in empty_slots]
-    empty_slots_str = "、".join(empty_slots_readable) if empty_slots_readable else "已聊得很充分"
-    
-    filled_info = []
-    for key, value in filled_slots.items():
-        readable_key = SLOT_NAME_MAP.get(key, key)
-        filled_info.append(f"{readable_key}: {value[:50]}..." if len(value) > 50 else f"{readable_key}: {value}")
-    filled_slots_str = "\n".join(filled_info) if filled_info else "刚开始聊"
-
    stage_name_map = {
        "childhood": "童年时光",
        "education": "求学经历",
@@ -209,21 +203,52 @@ def get_guided_conversation_prompt(
        "family": "家庭生活",
        "belief": "人生信念",
    }
-    stage_name = stage_name_map.get(current_stage, current_stage)
-    
-    # 计算已填充的槽位数量
+
+    current_stage_name = stage_name_map.get(current_stage, current_stage)
+    user_stage_name = stage_name_map.get(detected_user_stage, "") if detected_user_stage else ""
+
+    # 判断用户是否在聊一个不同于系统当前阶段的话题
+    user_jumped = detected_user_stage and detected_user_stage != current_stage
+
+    # --- 构建当前聊天上下文 ---
+    # 转换 slot 名称为中文
+    empty_slots_readable = [SLOT_NAME_MAP.get(s, s) for s in empty_slots]
+    empty_slots_str = "、".join(empty_slots_readable) if empty_slots_readable else "已聊得很充分"
+
+    filled_info = []
+    for key, value in filled_slots.items():
+        readable_key = SLOT_NAME_MAP.get(key, key)
+        filled_info.append(f"{readable_key}: {value[:50]}..." if len(value) > 50 else f"{readable_key}: {value}")
+    filled_slots_str = "\n".join(filled_info) if filled_info else "刚开始聊"
+
+    # --- 构建全局进度概览 ---
+    progress_lines = []
+    uncovered_stages = []
+    if all_stages_coverage:
+        for stage in ["childhood", "education", "career", "family", "belief"]:
+            cov = all_stages_coverage.get(stage, {})
+            filled_n = cov.get("filled", 0)
+            total_n = cov.get("total", 0)
+            sname = stage_name_map.get(stage, stage)
+            if filled_n == 0:
+                progress_lines.append(f"  {sname}：还没聊到")
+                uncovered_stages.append(sname)
+            elif filled_n < total_n:
+                progress_lines.append(f"  {sname}：聊了一些（{filled_n}/{total_n}）")
+            else:
+                progress_lines.append(f"  {sname}：已聊得很充分 ✓")
+    progress_str = "\n".join(progress_lines) if progress_lines else ""
+
+    # --- 动态策略 ---
    filled_count = len(filled_slots)
-    total_slots = filled_count + len(empty_slots)
-    
-    # 动态调整策略
    should_switch_topic = same_topic_turns >= 3 or (filled_count >= 2 and same_topic_turns >= 2)
    should_lighten_mood = conversation_turn > 0 and conversation_turn % 5 == 0
    should_try_new_stage = filled_count >= 3 and len(empty_slots) <= 2
-    
+
    # 获取相关阶段
    related_stages = STAGE_RELATED_TOPICS.get(current_stage, [])
    related_stages_str = "、".join([stage_name_map.get(s, s) for s in related_stages])
-    
+
    # 选择回应风格
    style = random.choice(RESPONSE_STYLES)
    style_guidance = {
@@ -233,24 +258,45 @@ def get_guided_conversation_prompt(
        "lighthearted": "这次回应可以轻松一点，适当加入幽默",
        "connection": "这次回应可以分享一个类似的经历或感受（可以虚构）",
    }.get(style, "")
-    
-    # 构建动态指导
+
+    # --- 构建动态指导 ---
    dynamic_guidance = ""
-    if should_lighten_mood:
-        dynamic_guidance += "\n- 聊了一会儿了，可以适当轻松一下，聊点有趣的"
-    if should_switch_topic and empty_slots_readable:
-        dynamic_guidance += f"\n- 这个话题聊得差不多了，可以自然转到：{empty_slots_str}"
-    if should_try_new_stage and related_stages:
-        dynamic_guidance += f"\n- 如果自然的话，可以尝试聊聊相关的话题，比如{related_stages_str}"
+    if user_jumped:
+        dynamic_guidance += f"""
+- **用户正在聊「{user_stage_name}」的话题，跟着他/她的节奏走，不要试图拉回「{current_stage_name}」**
+- 顺着用户的思路，帮他/她把这个话题聊深聊透
+- 这是很自然的事情，人回忆往事经常会跳跃，你要做的是陪伴和倾听"""
+    else:
+        if should_lighten_mood:
+            dynamic_guidance += "\n- 聊了一会儿了，可以适当轻松一下，聊点有趣的"
+        if should_switch_topic and empty_slots_readable:
+            dynamic_guidance += f"\n- 这个话题聊得差不多了，可以自然转到：{empty_slots_str}"
+        if should_try_new_stage and related_stages:
+            dynamic_guidance += f"\n- 如果自然的话，可以尝试聊聊相关的话题，比如{related_stages_str}"

-    prompt = f"""你是用户的老朋友，正在和他/她聊人生故事。你们聊到了「{stage_name}」这个话题。
+    # --- 缺失章节补充提示（仅在用户没有跳转、且当前话题聊得差不多时） ---
+    uncovered_hint = ""
+    if not user_jumped and uncovered_stages and should_try_new_stage:
+        uncovered_hint = f"\n- 还没聊到的人生阶段有：{'、'.join(uncovered_stages)}，如果聊天中有自然的契机，可以轻轻带一句，但不要刻意"

-## 已经聊到的内容
+    # --- 组合 prompt ---
+    # 根据是否跳转，调整主题描述
+    if user_jumped:
+        topic_desc = f"你们原本在聊「{current_stage_name}」，但用户自然地聊到了「{user_stage_name}」的内容"
+    else:
+        topic_desc = f"你们聊到了「{current_stage_name}」这个话题"
+
+    prompt = f"""你是用户的老朋友，正在和他/她聊人生故事。{topic_desc}。
+
+## 已经聊到的内容（{current_stage_name}）
 {filled_slots_str}

-## 还可以聊的方向
+## 还可以聊的方向（{current_stage_name}）
 {empty_slots_str}

+## 整体进度
+{progress_str}
+
 ## 用户刚才说
 "{user_message}"

@@ -259,10 +305,11 @@ def get_guided_conversation_prompt(

 ## 你的任务
 1. **回应用户**：先对用户说的内容做出真诚回应（不是总结，而是有温度的反馈）
-2. **保持自然**：不要每次都追问，有时候可以分享感受、表达好奇、或者轻松聊两句
-3. **适时换话题**：如果一个方向聊了几轮，自然地换到其他方向，保持新鲜感
-4. **追问要具体**：如果要追问，问具体的细节，比如"那时候是什么季节""身边有谁陪着你""当时心里什么感觉"
-{dynamic_guidance}
+2. **跟随用户**：如果用户聊到了其他人生阶段的内容（比如从童年跳到工作），完全没问题，顺着他/她的思路继续聊。回忆本来就是跳跃的，不要强行拉回某个固定话题
+3. **保持自然**：不要每次都追问，有时候可以分享感受、表达好奇、或者轻松聊两句
+4. **适时引导**：跟着用户的节奏聊了几轮后，如果有自然的时机，可以温和地引向还没聊到的人生阶段，但绝不要生硬
+5. **追问要具体**：如果要追问，问具体的细节，比如"那时候是什么季节""身边有谁陪着你""当时心里什么感觉"
+{dynamic_guidance}{uncovered_hint}

 ## 回复格式
 - 如果内容较多，可以分成 2-3 条消息，用 [SPLIT] 分隔
@@ -276,6 +323,7 @@ def get_guided_conversation_prompt(
 - 禁止生硬地问"还有什么想分享的吗"
 - 禁止反复追问同一件事
 - 禁止每次都以问题结尾
+- **禁止在用户聊别的话题时强行拉回之前的话题**

 ## 好的回应示例
 - "哈哈，你这说的让我想起..."（轻松）
--- a/api/agents/prompts/memory_prompts.py
+++ b/api/agents/prompts/memory_prompts.py
@@ -1,6 +1,7 @@
 """
 回忆录整理 Agent 提示词模板
 """
+import json

 # 章节分类映射
 CHAPTER_CATEGORIES = {
@@ -26,6 +27,21 @@ CHAPTER_ORDER = [
    "summary",
 ]

+# 统一的阶段名 → 排序索引映射
+# 兼容 5 阶段简化名（conversation/state 模型）和 8 分类详细名（chapter 模型）
+STAGE_TO_ORDER = {
+    "childhood": 0,
+    "education": 1,
+    "career": 2,              # 5-stage 简化名
+    "career_early": 2,        # 8-category 详细名
+    "career_achievement": 3,
+    "career_challenge": 4,
+    "family": 5,
+    "belief": 6,              # 5-stage 简化名（单数）
+    "beliefs": 6,             # 8-category 详细名（复数）
+    "summary": 7,
+}
+

 def get_system_prompt() -> str:
    """获取整理 Agent 的系统提示词"""
@@ -119,12 +135,25 @@ def get_text_rewrite_prompt(segments_text: str, chapter_category: str, existing_
 def get_state_extraction_prompt(user_message: str, current_stage: str, stage_slots: dict) -> str:
    """抽取结构化信息并判断阶段"""
    slot_keys = list(stage_slots.keys())
+
+    # 提供所有阶段的 slot 参考，帮助 LLM 将内容归类到正确的阶段
+    all_stage_slots = {
+        "childhood": ["place", "people", "daily_life", "emotion", "turning_event"],
+        "education": ["school", "city", "motivation", "challenge", "change"],
+        "career": ["job", "environment", "decision", "pressure", "growth"],
+        "family": ["relationship", "conflict", "support", "responsibility", "change"],
+        "belief": ["value", "regret", "pride", "lesson"],
+    }
+
    return f"""{get_system_prompt()}

-你需要从用户话语中抽取结构化信息，并判断是否需要更新阶段。
+你需要从用户话语中抽取结构化信息，并判断用户实际在谈论哪个人生阶段。

-当前阶段：{current_stage}
-当前阶段可填 slots：{slot_keys}
+系统当前跟踪的阶段：{current_stage}
+该阶段可填 slots：{slot_keys}
+
+所有阶段及其 slots 参考：
+{json.dumps(all_stage_slots, ensure_ascii=False, indent=2)}

 用户话语：
 {user_message}
@@ -140,9 +169,11 @@ def get_state_extraction_prompt(user_message: str, current_stage: str, stage_slo
 }}

 要求：
-1. slots 只填写确实提到的内容
-2. snippet 保持用户原话风格，50 字以内
-3. 如果没有明确内容，slots 为空对象
+1. **detected_stage 必须根据用户话语的实际内容判断**，不要默认沿用系统当前阶段。用户可能在聊不同阶段的事情。
+2. slots 的 key 必须属于 detected_stage 对应的 slot 列表
+3. slots 只填写确实提到的内容
+4. snippet 保持用户原话风格，50 字以内
+5. 如果没有明确内容，slots 为空对象
 """


--- a/api/agents/state_schema.py
+++ b/api/agents/state_schema.py
@@ -29,6 +29,35 @@ class MemoirStateSchema(BaseModel):
                empty_keys.append(key)
        return empty_keys

+    def empty_slots_for_stage(self, stage: str) -> List[str]:
+        """获取指定阶段的空槽位"""
+        stage_slots = self.slots.get(stage, {})
+        return [key for key, value in stage_slots.items() if not value.snippet]
+
+    def filled_slots_for_stage(self, stage: str) -> Dict[str, str]:
+        """获取指定阶段已填充的槽位及其内容"""
+        stage_slots = self.slots.get(stage, {})
+        return {
+            key: value.snippet
+            for key, value in stage_slots.items()
+            if value.snippet
+        }
+
+    def all_stages_coverage(self) -> Dict[str, Dict]:
+        """获取所有阶段的覆盖情况摘要"""
+        coverage: Dict[str, Dict] = {}
+        for stage in self.stage_order:
+            stage_slots = self.slots.get(stage, {})
+            total = len(stage_slots)
+            filled = sum(1 for v in stage_slots.values() if v.snippet)
+            coverage[stage] = {
+                "total": total,
+                "filled": filled,
+                "empty": total - filled,
+                "ratio": filled / total if total > 0 else 0,
+            }
+        return coverage
+

 DEFAULT_STAGE_ORDER = ["childhood", "education", "career", "family", "belief"]

--- a/api/migrations/fix_chapter_order_index.sql
+++ b/api/migrations/fix_chapter_order_index.sql
@@ -0,0 +1,15 @@
+-- 修复章节 order_index 为 999 的问题
+-- 原因：STAGE_KEYWORDS 使用简化阶段名（career, belief），
+--       但 CHAPTER_ORDER 使用详细分类名（career_early, beliefs），导致查找失败回退到 999
+
+-- 根据 category 字段修复 order_index
+UPDATE chapters SET order_index = 0 WHERE order_index = 999 AND category = 'childhood';
+UPDATE chapters SET order_index = 1 WHERE order_index = 999 AND category = 'education';
+UPDATE chapters SET order_index = 2 WHERE order_index = 999 AND category = 'career';
+UPDATE chapters SET order_index = 2 WHERE order_index = 999 AND category = 'career_early';
+UPDATE chapters SET order_index = 3 WHERE order_index = 999 AND category = 'career_achievement';
+UPDATE chapters SET order_index = 4 WHERE order_index = 999 AND category = 'career_challenge';
+UPDATE chapters SET order_index = 5 WHERE order_index = 999 AND category = 'family';
+UPDATE chapters SET order_index = 6 WHERE order_index = 999 AND category = 'belief';
+UPDATE chapters SET order_index = 6 WHERE order_index = 999 AND category = 'beliefs';
+UPDATE chapters SET order_index = 7 WHERE order_index = 999 AND category = 'summary';
--- a/api/tasks/memoir_tasks.py
+++ b/api/tasks/memoir_tasks.py
@@ -21,7 +21,7 @@ from agents.prompts.memory_prompts import (
    get_creative_title_prompt,
    get_narrative_prompt,
    get_state_extraction_prompt,
-    CHAPTER_ORDER,
+    STAGE_TO_ORDER,
 )

 logger = logging.getLogger(__name__)
@@ -264,7 +264,7 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]):
                    chapter.source_segments = list({*(chapter.source_segments or []), *source_ids})
                else:
                    # 根据 stage 计算正确的排序索引
-                    calculated_order_index = CHAPTER_ORDER.index(stage) if stage in CHAPTER_ORDER else 999
+                    calculated_order_index = STAGE_TO_ORDER.get(stage, 999)
                    chapter = Chapter(
                        id=str(uuid.uuid4()),
                        user_id=user_id,
@@ -367,7 +367,7 @@ def generate_chapter_content(self, user_id: str, stage: str, new_content: str):
                chapter.is_new = True
            else:
                # 根据 stage 计算正确的排序索引
-                calculated_order_index = CHAPTER_ORDER.index(stage) if stage in CHAPTER_ORDER else 999
+                calculated_order_index = STAGE_TO_ORDER.get(stage, 999)
                chapter = Chapter(
                    id=str(uuid.uuid4()),
                    user_id=user_id,