WIP: memory system improvements (in progress)

Interview/chat prompt layers, reply planner, style profiles, memory injection, interview meta store, and related tests. Work not finished. Made-with: Cursor
2026-04-22 16:56:28 +08:00
parent e848f26354
commit 3121d1384d
28 changed files with 2790 additions and 452 deletions
--- a/api/app/features/memory/evidence_format.py
+++ b/api/app/features/memory/evidence_format.py
@@ -66,6 +66,114 @@ def _flatten_object_json(obj_raw: object) -> str:
    return str(obj_raw) if obj_raw else ""


+def format_user_memory_for_chat_display(
+    text: str,
+    *,
+    verbatim: bool = False,
+) -> str:
+    """给聊天态的记忆文本加清晰归属，不改写原内容本身。"""
+    t = (text or "").strip()
+    if not t:
+        return ""
+    if verbatim:
+        return f"用户曾说：「{t}」"
+    return f"关于用户：{t}"
+
+
+def format_evidence_chunks_for_chat_prompt(evidence: dict) -> str:
+    """聊天访谈专用：将检索 bundle 格式化为带编号引用与安全说明的短文本。
+
+    与 `format_evidence_chunks_for_prompt` 并行存在；memoir/叙事流水线仍用后者，避免牵连成稿。
+    """
+    chunks = evidence.get("relevant_chunks") or []
+    chunks = dedupe_evidence_chunk_rows(chunks[:10])
+    summaries = evidence.get("relevant_summaries") or []
+    facts = evidence.get("relevant_facts") or []
+    timeline = evidence.get("timeline_hints") or []
+    stories = evidence.get("relevant_stories") or []
+
+    header = (
+        "【相关记忆摘录·聊天专用】\n"
+        "以下编号条目均来自**用户过往口述或系统摘要**，**不是**助手本人经历。\n"
+        "承接时**必须**用「你之前提过…」「你说过…」「你刚讲到…」等**归因式**引用；\n"
+        "**禁止**改写成「我当时…」「我小时候…」「我演过…」等助手第一人称亲历口吻；"
+        "**禁止**把条目当作你与用户的共同回忆或无归因复述。\n"
+    )
+
+    lines: list[str] = []
+    n = 0
+    for c in chunks:
+        content = (
+            c.get("content", "") if isinstance(c, dict) else getattr(c, "content", "")
+        )
+        raw = (content or "").strip()
+        if not raw:
+            continue
+        n += 1
+        cid = ""
+        if isinstance(c, dict) and c.get("id"):
+            cid = str(c.get("id", ""))[:12]
+        label = f"[M{n}]" + (f"(id…{cid})" if cid else "")
+        safe = format_user_memory_for_chat_display(raw, verbatim=True)
+        lines.append(f"{label} {safe}")
+
+    for s in summaries[:3]:
+        if isinstance(s, dict):
+            st = (s.get("content") or "").strip()
+            stype = (s.get("summary_type") or "").strip()
+            if not st:
+                continue
+            n += 1
+            prefix = f"[摘要:{stype}]" if stype else "[摘要]"
+            safe = format_user_memory_for_chat_display(f"{prefix} {st}")
+            lines.append(f"[M{n}] {safe}")
+
+    for f in facts[:5]:
+        if isinstance(f, dict):
+            subj = f.get("subject", "")
+            pred = f.get("predicate", "")
+            obj_raw = f.get("object_json", "")
+            obj = _flatten_object_json(obj_raw)
+            if not (subj or pred):
+                continue
+            n += 1
+            fact_line = (
+                f"{subj}：{pred}（{obj}）" if obj else f"{subj}：{pred}"
+            )
+            safe = format_user_memory_for_chat_display(fact_line)
+            lines.append(f"[M{n}] {safe}")
+
+    for t in timeline[:5]:
+        if isinstance(t, dict):
+            title = (t.get("title") or "").strip()
+            year = t.get("event_year")
+            desc = (t.get("description") or "").strip()
+            line = " ".join(
+                x for x in (str(year) if year is not None else "", title, desc) if x
+            )
+            if not line:
+                continue
+            n += 1
+            safe = format_user_memory_for_chat_display(line)
+            lines.append(f"[M{n}] {safe}")
+
+    for st in stories[:3]:
+        if isinstance(st, dict):
+            title = (st.get("title") or "").strip()
+            summ = (st.get("summary") or "").strip()
+            if not (title or summ):
+                continue
+            n += 1
+            safe = format_user_memory_for_chat_display(
+                " ".join(x for x in (title, summ) if x)
+            )
+            lines.append(f"[M{n}] {safe}")
+
+    if not lines:
+        return ""
+    return header + "\n".join(lines)
+
+
 def format_evidence_chunks_for_prompt(evidence: dict) -> str:
    """将 retrieve_evidence / retrieve_evidence_sync 结果格式化为简短文本，供叙事与访谈 prompt 使用。