聊天和回忆录证据检索都走 pgvector，去掉 Postgres FTS/content_tsv，新迁移删掉 content_tsv 列（部署要先 alembic upgrade）。

Embedding 端口增加 is_available()，聊天和回忆录日志用统一方式表示向量是否真能调用。记忆整理（compaction）支持 Beat 定期扫用户；事实抽取提示与 subject 归一化，减少同一人多种称呼；
2026-04-03 11:43:16 +08:00
parent b853b986dd
commit 41518bda11
26 changed files with 543 additions and 222 deletions
--- a/api/app/features/memory/enrichment_pipeline.py
+++ b/api/app/features/memory/enrichment_pipeline.py
@@ -5,10 +5,34 @@ from __future__ import annotations
 import json
 from typing import Any

+# 叙述者常见别名 — 归一化到 narrator_name 或「叙述者」
+_NARRATOR_ALIASES: frozenset[str] = frozenset(
+    {
+        "我",
+        "本人",
+        "人物",
+        "叙述者",
+        "讲述者",
+        "老人",
+        "自己",
+        "咱们",
+    }
+)

-def dedupe_key(f: dict) -> tuple:
-    s = f.get("subject") or ""
-    p = f.get("predicate") or ""
+
+def normalize_subject(subject: str | None, narrator_name: str | None = None) -> str:
+    """将代词/泛称映射为统一 subject，便于去重与检索。"""
+    s = (subject or "").strip()
+    if not s:
+        return narrator_name or "叙述者"
+    if s in _NARRATOR_ALIASES:
+        return narrator_name or "叙述者"
+    return s
+
+
+def dedupe_key(f: dict, *, narrator_name: str | None = None) -> tuple:
+    s = normalize_subject(f.get("subject"), narrator_name)
+    p = (f.get("predicate") or "").strip()
    o = f.get("object_json")
    try:
        oj = json.dumps(o, sort_keys=True, ensure_ascii=False) if o is not None else ""