WIP: memory system improvements (in progress)

Interview/chat prompt layers, reply planner, style profiles, memory injection, interview meta store, and related tests. Work not finished. Made-with: Cursor
2026-04-22 16:56:28 +08:00
parent e848f26354
commit 3121d1384d
28 changed files with 2790 additions and 452 deletions
--- a/api/app/agents/chat/interview_agent.py
+++ b/api/app/agents/chat/interview_agent.py
@@ -11,12 +11,14 @@ from langchain_core.messages import HumanMessage, SystemMessage
 from app.agents.chat.agent_turn import AgentChatTurn
 from app.agents.chat.helpers import format_history_string, get_history_with_window
 from app.agents.chat.interview_state_hints import (
+    apply_autobiographical_boundary_guard,
    apply_duplicate_question_guard,
    extract_recent_questions,
    segments_are_only_duplicate_guard_fallback,
    update_recent_questions,
 )
 from app.agents.chat.interview_turn_plan import plan_interview_turn
+from app.agents.chat.reply_planner import maybe_refine_turn_plan_with_llm
 from app.agents.chat.personas import normalize_interview_persona
 from app.agents.chat.prompt_context import ChatPromptContext
 from app.agents.chat.prompts_conversation import (
@@ -135,12 +137,15 @@ class InterviewAgent:
        user_profile_context: str = "",
        detected_user_stage: Optional[str] = None,
        memory_evidence_text: str = "",
+        memory_anchor_source: str = "",
+        memory_planner_text: str = "",
        background_voice: str = "default",
        normalized_user_message: Optional[str] = None,
        occupation: str = "",
        profile_birth_year: int | None = None,
        profile_era_place: str = "",
        stage_switched_this_turn: bool = False,
+        scene_cues_for_planner: Optional[list[str]] = None,
    ) -> AgentChatTurn:
        """生成状态感知的访谈回复，不持久化（由 Orchestrator 负责）"""
        if not self.llm:
@@ -179,7 +184,7 @@ class InterviewAgent:
                current_stage=memoir_state.current_stage,
                empty_slots=empty_slots,
                normalized_user_message=text_for_model,
-                memory_evidence_text=memory_evidence_text,
+                memory_evidence_text=(memory_anchor_source or "").strip(),
                stage_switched_this_turn=stage_switched_this_turn,
            )
            logger.info(
@@ -189,6 +194,37 @@ class InterviewAgent:
                len(turn_plan.anchor_snippet or ""),
            )

+            reply_planner_raw = ""
+            baseline_mode = turn_plan.mode
+            baseline_primary_focus = turn_plan.primary_focus
+            if settings.chat_reply_planner_llm_enabled:
+                rq_preview = (
+                    "\n".join(recent_questions[-4:])
+                    if recent_questions
+                    else ""
+                )
+                turn_plan, reply_planner_raw = await maybe_refine_turn_plan_with_llm(
+                    self.llm,
+                    plan=turn_plan,
+                    text_for_model=text_for_model,
+                    memory_evidence_text=(memory_planner_text or memory_evidence_text)
+                    or "",
+                    max_tokens=int(settings.chat_reply_planner_max_tokens),
+                    temperature=float(settings.chat_reply_planner_temperature),
+                    scene_cues_for_planner=scene_cues_for_planner or [],
+                    recent_questions_preview=rq_preview,
+                )
+                if reply_planner_raw:
+                    logger.info(
+                        "event=reply_planner_applied memory_usage={} reply_shape={} "
+                        "mode={} primary_focus={} focus_source={}",
+                        turn_plan.memory_usage,
+                        turn_plan.reply_shape,
+                        turn_plan.mode,
+                        turn_plan.primary_focus,
+                        turn_plan.focus_source,
+                    )
+
            ctx = ChatPromptContext(
                current_stage=memoir_state.current_stage,
                empty_slots=empty_slots,
@@ -322,6 +358,7 @@ class InterviewAgent:
                    recent_questions=rq_base,
                )
                retry_used = True
+            out, auto_bio = apply_autobiographical_boundary_guard(out)
            updated_recent_questions = update_recent_questions(rq_base, out)
            log_agent_summary(
                logger,
@@ -338,6 +375,18 @@ class InterviewAgent:
                    "recent_questions": updated_recent_questions,
                    "duplicate_question_guard_triggered": deduped,
                    "duplicate_question_guard_llm_retry": retry_used,
+                    "autobiographical_boundary_guard_triggered": auto_bio,
+                    "reply_planner_llm_used": bool(
+                        settings.chat_reply_planner_llm_enabled
+                        and (reply_planner_raw or "").strip()
+                    ),
+                    "reply_planner_raw_preview": (reply_planner_raw or "")[:800],
+                    "focus_planner_baseline_mode": baseline_mode,
+                    "focus_planner_baseline_primary_focus": baseline_primary_focus,
+                    "focus_planner_mode": turn_plan.mode,
+                    "focus_planner_primary_focus": turn_plan.primary_focus,
+                    "focus_planner_focus_source": turn_plan.focus_source,
+                    "focus_planner_focus_summary": (turn_plan.focus_summary or "")[:200],
                },
            )
        except Exception as e: