WIP: memory system improvements (in progress)

Interview/chat prompt layers, reply planner, style profiles, memory injection, interview meta store, and related tests. Work not finished. Made-with: Cursor
2026-04-22 16:56:28 +08:00
parent e848f26354
commit 3121d1384d
28 changed files with 2790 additions and 452 deletions
--- a/api/app/agents/chat/reply_planner.py
+++ b/api/app/agents/chat/reply_planner.py
@@ -0,0 +1,220 @@
+"""访谈 focus planner：规则 TurnPlan 之后的可选 LLM 细化（JSON），判断本轮承接重点并微调记忆引用与回复形状。"""
+
+from __future__ import annotations
+
+import json
+from dataclasses import replace
+from typing import Any
+
+from app.agents.chat.interview_turn_plan import (
+    InterviewTurnPlan,
+    apply_safe_mode_override,
+)
+from app.core.langchain_llm import ainvoke_json_object
+from app.core.logging import get_logger
+
+logger = get_logger(__name__)
+
+_VALID_FOCUS_PRIMARIES: frozenset[str] = frozenset(
+    {"emotion", "relationship", "identity", "scene", "memoir_gap", "follow_user"}
+)
+
+
+def merge_reply_planner_json_into_turn_plan(
+    plan: InterviewTurnPlan,
+    raw_json: str,
+) -> InterviewTurnPlan:
+    """将 planner 返回的 JSON 合并进 TurnPlan；非法字段忽略，且不得突破安全边界。"""
+    if not (raw_json or "").strip():
+        return plan
+    try:
+        data = json.loads(raw_json)
+    except json.JSONDecodeError:
+        logger.warning("reply_planner json decode failed")
+        return plan
+    if not isinstance(data, dict):
+        return plan
+
+    kw: dict[str, Any] = {}
+    touched_focus = False
+
+    mu = data.get("memory_usage")
+    if mu in ("none", "allowed_with_attribution"):
+        if plan.memory_usage == "none" and mu == "allowed_with_attribution":
+            pass
+        else:
+            kw["memory_usage"] = mu
+
+    rs = data.get("reply_shape")
+    if rs in ("flexible", "ack_only", "ack_then_question"):
+        kw["reply_shape"] = rs
+
+    mrs = data.get("memory_reference_style")
+    if isinstance(mrs, str) and 2 <= len(mrs.strip()) <= 24:
+        kw["memory_reference_style"] = mrs.strip()
+
+    # forbid_first_person_experience：仅允许 true；模型若建议 false 一律忽略
+    if data.get("forbid_first_person_experience") is False:
+        logger.debug("reply_planner ignored forbid_first_person_experience=false")
+
+    if "primary_focus" in data:
+        pf = data.get("primary_focus")
+        if isinstance(pf, str) and pf in _VALID_FOCUS_PRIMARIES:
+            kw["primary_focus"] = pf  # type: ignore[assignment]
+            touched_focus = True
+
+    if "secondary_focus" in data:
+        sf = data.get("secondary_focus")
+        if sf is None or (isinstance(sf, str) and not str(sf).strip()):
+            kw["secondary_focus"] = None
+            touched_focus = True
+        elif isinstance(sf, str) and sf in _VALID_FOCUS_PRIMARIES:
+            kw["secondary_focus"] = sf  # type: ignore[assignment]
+            touched_focus = True
+
+    fsum = data.get("focus_summary")
+    if isinstance(fsum, str) and fsum.strip():
+        s = fsum.strip()
+        if len(s) > 200:
+            s = s[:199].rstrip() + "…"
+        kw["focus_summary"] = s
+        touched_focus = True
+
+    mo = data.get("mode_override")
+    if mo is not None and mo != "":
+        merged_mode = apply_safe_mode_override(
+            plan.mode,
+            str(mo) if not isinstance(mo, str) else mo,
+            primary_focus=str(kw.get("primary_focus", plan.primary_focus)),
+        )
+        if merged_mode is not None and merged_mode != plan.mode:
+            kw["mode"] = merged_mode
+            touched_focus = True
+
+    if touched_focus:
+        kw["focus_source"] = "llm"
+
+    if not kw:
+        return plan
+    return replace(plan, **kw)
+
+
+def _build_reply_planner_prompt(
+    *,
+    plan: InterviewTurnPlan,
+    user_message_preview: str,
+    memory_evidence_preview: str,
+    scene_cues_preview: str,
+    recent_questions_preview: str,
+) -> str:
+    mem_note = (
+        (memory_evidence_preview or "").strip()[:1200]
+        if (memory_evidence_preview or "").strip()
+        else "（本轮无检索记忆预览）"
+    )
+    um = (user_message_preview or "").strip()[:800]
+    scene_block = (
+        (scene_cues_preview or "").strip()[:600]
+        if (scene_cues_preview or "").strip()
+        else "（本轮无场景关键词触发的氛围线索）"
+    )
+    rq_block = (
+        (recent_questions_preview or "").strip()[:400]
+        if (recent_questions_preview or "").strip()
+        else "（无）"
+    )
+    focus_hint = f"{plan.primary_focus}"
+    if plan.secondary_focus:
+        focus_hint += f" / 次:{plan.secondary_focus}"
+    return f"""你是回忆录访谈的「本轮重点计划器」。只输出**一个 JSON 对象**，不要 markdown，不要解释。
+
+## 任务
+先判断：用户本轮**最该被接住、最不该被忽略**的是什么（情绪、关系与他人、身份与面子、现场感官、或叙述槽缺口）。再决定如何微调基线。
+
+## 当前规则基线（服务端已算好，须尊重安全边界）
+- mode: {plan.mode}
+- primary_focus（规则先验）: {focus_hint}
+- memory_usage: {plan.memory_usage}
+- reply_shape: {plan.reply_shape}
+- memory_reference_style: {plan.memory_reference_style}
+- forbid_first_person_experience: {plan.forbid_first_person_experience}
+
+## 用户本轮话（截断）
+{um}
+
+## 近期你已问过的问题（截断；避免重复角度）
+{rq_block}
+
+## 检索记忆预览（供规划追问角度；**非**正文提纲，勿复述成长摘要）
+{mem_note}
+
+## 场景氛围线索（仅关键词映射，**不是用户原话**；可作辅助意象，不得压过用户明确提到的人名、关系与面子）
+{scene_block}
+
+## 输出 JSON 字段（仅限下列键；未提及的键不要输出）
+- primary_focus: \"emotion\" | \"relationship\" | \"identity\" | \"scene\" | \"memoir_gap\" | \"follow_user\"
+- secondary_focus: 同上或 null
+- focus_summary: 字符串，≤80 字，用**中文**写清**追问角度 / 承接方向**（问什么、先接住哪条张力），**不要**写成回复正文提纲或旧记忆复述
+- mode_override: \"emotion_first\" | \"clarify_first\" | \"memoir_push\" | \"follow_user_only\" | null
+- memory_usage: \"none\" | \"allowed_with_attribution\"
+- reply_shape: \"flexible\" | \"ack_only\" | \"ack_then_question\"
+- memory_reference_style: 2–24 字，用于「你之前提过…」类归因起句
+- forbid_first_person_experience: 必须为 true
+
+## 约束
+1. **不要编造**用户未说的人、事、时地。
+2. 若基线 memory_usage 为 none，则输出 memory_usage 必须为 none。
+3. 若用户话里同时有**明确他人/称谓/观众/面子/身份自称**与**身体感受或环境**，通常应把 primary_focus 设为 relationship 或 identity，而不是 scene。
+4. mode_override 仅在确实需要时给出；与基线相同时填 null。不要为了改而改。
+5. 若用户在追问「讲讲她的故事/说说他」等，倾向 reply_shape=\"ack_then_question\"（仍最多一个问句）。
+6. focus_summary **不得**支配主回复措辞或诱导复述检索细节；若基线 memory_usage 为 none，**不得**输出 allowed_with_attribution。
+7. focus_summary 用于：**先接住本轮核心张力**、再决定追问槽位；若用户话里含说不清/不确定/暧昧羞涩，倾向 mode_override=\"clarify_first\"（勿强推问卷）。"""
+
+
+async def maybe_refine_turn_plan_with_llm(
+    llm: Any,
+    *,
+    plan: InterviewTurnPlan,
+    text_for_model: str,
+    memory_evidence_text: str,
+    max_tokens: int,
+    temperature: float,
+    scene_cues_for_planner: list[str] | None = None,
+    recent_questions_preview: str = "",
+) -> tuple[InterviewTurnPlan, str]:
+    """可选：调用轻量 JSON focus planner；失败返回原 plan 与空 raw。"""
+    if llm is None:
+        return plan, ""
+    scene_cues_preview = ""
+    if scene_cues_for_planner:
+        scene_cues_preview = "\n".join(
+            f"- {c}" for c in scene_cues_for_planner[:8]
+        )
+    prompt = _build_reply_planner_prompt(
+        plan=plan,
+        user_message_preview=text_for_model,
+        memory_evidence_preview=memory_evidence_text,
+        scene_cues_preview=scene_cues_preview,
+        recent_questions_preview=recent_questions_preview,
+    )
+    try:
+        pl_llm = llm.bind(temperature=float(temperature))
+        raw = await ainvoke_json_object(
+            pl_llm,
+            prompt,
+            max_tokens=max_tokens,
+            agent="ReplyPlanner.interview",
+        )
+        if not raw:
+            return plan, ""
+        merged = merge_reply_planner_json_into_turn_plan(plan, raw)
+        return merged, raw
+    except Exception as e:
+        logger.warning("reply_planner llm failed: {}", e)
+        return plan, ""
+
+
+__all__ = [
+    "maybe_refine_turn_plan_with_llm",
+    "merge_reply_planner_json_into_turn_plan",
+]