WIP: memory system improvements (in progress)

Interview/chat prompt layers, reply planner, style profiles, memory injection, interview meta store, and related tests. Work not finished. Made-with: Cursor
2026-04-22 16:56:28 +08:00
parent e848f26354
commit 3121d1384d
28 changed files with 2790 additions and 452 deletions
--- a/api/app/agents/state_schema.py
+++ b/api/app/agents/state_schema.py
@@ -1,5 +1,13 @@
 """
-共享状态 Schema（对话 Agent 与后台 Agent 共用）
+共享状态 Schema（对话 Agent 与后台 Agent 共用）。
+
+Option B 的 schema 分层：
+
+- `NarrativeCoverageState`：叙述覆盖视图 —— 阶段推进、叙述槽、每阶段的完成情况；**不掺控制元信息**。
+- `InterviewControlState`：访谈控制视图 —— 已确认事实、人物主线、最近已问；用于访谈控场，**不参与成稿槽位**。
+- `MemoirStateSchema`：以 facade 形式聚合两个视图，保留旧字段以兼容已持久化数据。
+
+消费方**应**通过 `state.narrative()` / `state.control()` 视图取值；顶层字段会逐步按阶段迁出。
 """

 from __future__ import annotations
@@ -48,51 +56,39 @@ class PersonaThread(BaseModel):
        return self.trait


-class MemoirStateSchema(BaseModel):
-    """回忆录状态"""
+# =============================================================================
+# Narrative 视图：叙述覆盖、阶段推进、叙述槽
+# =============================================================================
+
+
+class NarrativeCoverageState(BaseModel):
+    """叙述覆盖视图。
+
+    只承载「人生叙事覆盖」相关信息：阶段顺序、当前阶段、覆盖过的阶段、每阶段的叙述槽。
+    **禁止**在此视图承载访谈控场数据（已确认事实、人物主线、最近已问），那些数据属于
+    `InterviewControlState`。
+    """

    stage_order: List[str]
    current_stage: str
    covered_stages: List[str]
    slots: Dict[str, Dict[str, SlotData]]
-    known_facts: List[KnownFact] = Field(default_factory=list)
-    persona_threads: List[PersonaThread] = Field(default_factory=list)
-    recent_questions: List[str] = Field(default_factory=list)

    def empty_slots_for_current_stage(self) -> List[str]:
        stage_slots = self.slots.get(self.current_stage, {})
-        empty_keys: List[str] = []
-        for key, value in stage_slots.items():
-            if not value.snippet:
-                empty_keys.append(key)
-        return empty_keys
-
-    def prompt_empty_slots_for_stage(self, stage: str) -> List[str]:
-        """生成 prompt 时可追问的槽位，排除已被 known_facts 覆盖的方向。"""
-        blocked = {
-            fact.slot_name
-            for fact in self.known_facts
-            if fact.slot_name and (not fact.stage or fact.stage == stage)
-        }
-        return [key for key in self.empty_slots_for_stage(stage) if key not in blocked]
-
-    def prompt_empty_slots_for_current_stage(self) -> List[str]:
-        return self.prompt_empty_slots_for_stage(self.current_stage)
+        return [key for key, value in stage_slots.items() if not value.snippet]

    def empty_slots_for_stage(self, stage: str) -> List[str]:
-        """获取指定阶段的空槽位"""
        stage_slots = self.slots.get(stage, {})
        return [key for key, value in stage_slots.items() if not value.snippet]

    def filled_slots_for_stage(self, stage: str) -> Dict[str, str]:
-        """获取指定阶段已填充的槽位及其内容"""
        stage_slots = self.slots.get(stage, {})
        return {
            key: value.snippet for key, value in stage_slots.items() if value.snippet
        }

    def all_stages_coverage(self) -> Dict[str, Dict]:
-        """获取所有阶段的覆盖情况摘要"""
        coverage: Dict[str, Dict] = {}
        for stage in self.stage_order:
            stage_slots = self.slots.get(stage, {})
@@ -106,6 +102,23 @@ class MemoirStateSchema(BaseModel):
            }
        return coverage

+
+# =============================================================================
+# Interview Control 视图：访谈控场（已知事实 / 人物主线 / 最近已问）
+# =============================================================================
+
+
+class InterviewControlState(BaseModel):
+    """访谈控制视图。
+
+    承载仅与「控场 / 去重问 / 人物呼应」相关的信息。这些字段**不应**出现在叙述覆盖计算里，也
+    **不应**写入 `slots`。
+    """
+
+    known_facts: List[KnownFact] = Field(default_factory=list)
+    persona_threads: List[PersonaThread] = Field(default_factory=list)
+    recent_questions: List[str] = Field(default_factory=list)
+
    def prompt_known_fact_lines(self, *, limit: int = 10) -> List[str]:
        xs: List[str] = []
        for fact in self.known_facts[-limit:]:
@@ -133,8 +146,89 @@ class MemoirStateSchema(BaseModel):
            out.append(s)
        return out

+    def blocked_slot_names_for_stage(self, stage: str) -> set[str]:
+        """已被 known_facts 覆盖的槽位名：追问时应避开。"""
+        return {
+            fact.slot_name
+            for fact in self.known_facts
+            if fact.slot_name and (not fact.stage or fact.stage == stage)
+        }
+
+
+# =============================================================================
+# Facade：MemoirStateSchema 兼容旧字段形态，方法委托到两个视图
+# =============================================================================
+
+
+class MemoirStateSchema(BaseModel):
+    """回忆录状态（Facade）。
+
+    为兼容既有持久化与旧调用方，顶层字段保持不变；内部将其投影成
+    `NarrativeCoverageState` 与 `InterviewControlState` 两个视图，后续新代码应直接使用
+    `state.narrative()` / `state.control()` 表达意图。
+    """
+
+    stage_order: List[str]
+    current_stage: str
+    covered_stages: List[str]
+    slots: Dict[str, Dict[str, SlotData]]
+    known_facts: List[KnownFact] = Field(default_factory=list)
+    persona_threads: List[PersonaThread] = Field(default_factory=list)
+    recent_questions: List[str] = Field(default_factory=list)
+
+    # ---- 视图投影 ----
+
+    def narrative(self) -> NarrativeCoverageState:
+        return NarrativeCoverageState(
+            stage_order=self.stage_order,
+            current_stage=self.current_stage,
+            covered_stages=self.covered_stages,
+            slots=self.slots,
+        )
+
+    def control(self) -> InterviewControlState:
+        return InterviewControlState(
+            known_facts=self.known_facts,
+            persona_threads=self.persona_threads,
+            recent_questions=self.recent_questions,
+        )
+
+    # ---- 兼容层：委托到 narrative / control 视图 ----
+
+    def empty_slots_for_current_stage(self) -> List[str]:
+        return self.narrative().empty_slots_for_current_stage()
+
+    def empty_slots_for_stage(self, stage: str) -> List[str]:
+        return self.narrative().empty_slots_for_stage(stage)
+
+    def filled_slots_for_stage(self, stage: str) -> Dict[str, str]:
+        return self.narrative().filled_slots_for_stage(stage)
+
+    def all_stages_coverage(self) -> Dict[str, Dict]:
+        return self.narrative().all_stages_coverage()
+
+    def prompt_empty_slots_for_stage(self, stage: str) -> List[str]:
+        """生成 prompt 时可追问的槽位，排除已被 known_facts 覆盖的方向。"""
+        blocked = self.control().blocked_slot_names_for_stage(stage)
+        return [
+            key
+            for key in self.narrative().empty_slots_for_stage(stage)
+            if key not in blocked
+        ]
+
+    def prompt_empty_slots_for_current_stage(self) -> List[str]:
+        return self.prompt_empty_slots_for_stage(self.current_stage)
+
+    def prompt_known_fact_lines(self, *, limit: int = 10) -> List[str]:
+        return self.control().prompt_known_fact_lines(limit=limit)
+
+    def prompt_persona_thread_lines(self, *, limit: int = 6) -> List[str]:
+        return self.control().prompt_persona_thread_lines(limit=limit)
+
+    def prompt_recent_question_lines(self, *, limit: int = 4) -> List[str]:
+        return self.control().prompt_recent_question_lines(limit=limit)
+

-# 与 stage_constants.CHAT_STAGES 同一顺序；list() 避免与元组共享可变别名
 DEFAULT_STAGE_ORDER: list[str] = list(CHAT_STAGES)


@@ -184,3 +278,16 @@ def default_state() -> MemoirStateSchema:
        covered_stages=[],
        slots=default_slots(),
    )
+
+
+__all__ = [
+    "DEFAULT_STAGE_ORDER",
+    "InterviewControlState",
+    "KnownFact",
+    "MemoirStateSchema",
+    "NarrativeCoverageState",
+    "PersonaThread",
+    "SlotData",
+    "default_slots",
+    "default_state",
+]