Files
life-echo/api/app/features/memory/enrichment_pipeline.py
Kevin 41518bda11 聊天和回忆录证据检索都走 pgvector,去掉 Postgres FTS/content_tsv,新迁移删掉 content_tsv 列(部署要先 alembic upgrade)。
Embedding 端口增加 is_available(),聊天和回忆录日志用统一方式表示向量是否真能调用。

记忆整理(compaction)支持 Beat 定期扫用户;

事实抽取提示与 subject 归一化,减少同一人多种称呼;
2026-04-03 11:43:16 +08:00

50 lines
1.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Enrichment 共享:去重键与 object_json 规范化sync/async 共用)。"""
from __future__ import annotations
import json
from typing import Any
# 叙述者常见别名 — 归一化到 narrator_name 或「叙述者」
_NARRATOR_ALIASES: frozenset[str] = frozenset(
{
"",
"本人",
"人物",
"叙述者",
"讲述者",
"老人",
"自己",
"咱们",
}
)
def normalize_subject(subject: str | None, narrator_name: str | None = None) -> str:
"""将代词/泛称映射为统一 subject便于去重与检索。"""
s = (subject or "").strip()
if not s:
return narrator_name or "叙述者"
if s in _NARRATOR_ALIASES:
return narrator_name or "叙述者"
return s
def dedupe_key(f: dict, *, narrator_name: str | None = None) -> tuple:
s = normalize_subject(f.get("subject"), narrator_name)
p = (f.get("predicate") or "").strip()
o = f.get("object_json")
try:
oj = json.dumps(o, sort_keys=True, ensure_ascii=False) if o is not None else ""
except (TypeError, ValueError):
oj = str(o)
return (str(s), str(p), oj)
def normalize_object_json(obj: Any) -> dict | list | None:
if obj is None:
return None
if isinstance(obj, (dict, list)):
return obj
return {"value": obj}