WIP: memory system improvements (in progress)

Interview/chat prompt layers, reply planner, style profiles, memory
injection, interview meta store, and related tests. Work not finished.

Made-with: Cursor
This commit is contained in:
Kevin
2026-04-22 16:56:28 +08:00
parent e848f26354
commit 3121d1384d
28 changed files with 2790 additions and 452 deletions

View File

@@ -66,6 +66,114 @@ def _flatten_object_json(obj_raw: object) -> str:
return str(obj_raw) if obj_raw else ""
def format_user_memory_for_chat_display(
text: str,
*,
verbatim: bool = False,
) -> str:
"""给聊天态的记忆文本加清晰归属,不改写原内容本身。"""
t = (text or "").strip()
if not t:
return ""
if verbatim:
return f"用户曾说:「{t}"
return f"关于用户:{t}"
def format_evidence_chunks_for_chat_prompt(evidence: dict) -> str:
"""聊天访谈专用:将检索 bundle 格式化为带编号引用与安全说明的短文本。
与 `format_evidence_chunks_for_prompt` 并行存在memoir/叙事流水线仍用后者,避免牵连成稿。
"""
chunks = evidence.get("relevant_chunks") or []
chunks = dedupe_evidence_chunk_rows(chunks[:10])
summaries = evidence.get("relevant_summaries") or []
facts = evidence.get("relevant_facts") or []
timeline = evidence.get("timeline_hints") or []
stories = evidence.get("relevant_stories") or []
header = (
"【相关记忆摘录·聊天专用】\n"
"以下编号条目均来自**用户过往口述或系统摘要****不是**助手本人经历。\n"
"承接时**必须**用「你之前提过…」「你说过…」「你刚讲到…」等**归因式**引用;\n"
"**禁止**改写成「我当时…」「我小时候…」「我演过…」等助手第一人称亲历口吻;"
"**禁止**把条目当作你与用户的共同回忆或无归因复述。\n"
)
lines: list[str] = []
n = 0
for c in chunks:
content = (
c.get("content", "") if isinstance(c, dict) else getattr(c, "content", "")
)
raw = (content or "").strip()
if not raw:
continue
n += 1
cid = ""
if isinstance(c, dict) and c.get("id"):
cid = str(c.get("id", ""))[:12]
label = f"[M{n}]" + (f"(id…{cid})" if cid else "")
safe = format_user_memory_for_chat_display(raw, verbatim=True)
lines.append(f"{label} {safe}")
for s in summaries[:3]:
if isinstance(s, dict):
st = (s.get("content") or "").strip()
stype = (s.get("summary_type") or "").strip()
if not st:
continue
n += 1
prefix = f"[摘要:{stype}]" if stype else "[摘要]"
safe = format_user_memory_for_chat_display(f"{prefix} {st}")
lines.append(f"[M{n}] {safe}")
for f in facts[:5]:
if isinstance(f, dict):
subj = f.get("subject", "")
pred = f.get("predicate", "")
obj_raw = f.get("object_json", "")
obj = _flatten_object_json(obj_raw)
if not (subj or pred):
continue
n += 1
fact_line = (
f"{subj}{pred}{obj}" if obj else f"{subj}{pred}"
)
safe = format_user_memory_for_chat_display(fact_line)
lines.append(f"[M{n}] {safe}")
for t in timeline[:5]:
if isinstance(t, dict):
title = (t.get("title") or "").strip()
year = t.get("event_year")
desc = (t.get("description") or "").strip()
line = " ".join(
x for x in (str(year) if year is not None else "", title, desc) if x
)
if not line:
continue
n += 1
safe = format_user_memory_for_chat_display(line)
lines.append(f"[M{n}] {safe}")
for st in stories[:3]:
if isinstance(st, dict):
title = (st.get("title") or "").strip()
summ = (st.get("summary") or "").strip()
if not (title or summ):
continue
n += 1
safe = format_user_memory_for_chat_display(
" ".join(x for x in (title, summ) if x)
)
lines.append(f"[M{n}] {safe}")
if not lines:
return ""
return header + "\n".join(lines)
def format_evidence_chunks_for_prompt(evidence: dict) -> str:
"""将 retrieve_evidence / retrieve_evidence_sync 结果格式化为简短文本,供叙事与访谈 prompt 使用。