Files
life-echo/api/app/features/memory/enrichment_pipeline.py

50 lines
1.3 KiB
Python
Raw Normal View History

"""Enrichment 共享:去重键与 object_json 规范化sync/async 共用)。"""
from __future__ import annotations
import json
from typing import Any
# 叙述者常见别名 — 归一化到 narrator_name 或「叙述者」
_NARRATOR_ALIASES: frozenset[str] = frozenset(
{
"",
"本人",
"人物",
"叙述者",
"讲述者",
"老人",
"自己",
"咱们",
}
)
def normalize_subject(subject: str | None, narrator_name: str | None = None) -> str:
"""将代词/泛称映射为统一 subject便于去重与检索。"""
s = (subject or "").strip()
if not s:
return narrator_name or "叙述者"
if s in _NARRATOR_ALIASES:
return narrator_name or "叙述者"
return s
def dedupe_key(f: dict, *, narrator_name: str | None = None) -> tuple:
s = normalize_subject(f.get("subject"), narrator_name)
p = (f.get("predicate") or "").strip()
o = f.get("object_json")
try:
oj = json.dumps(o, sort_keys=True, ensure_ascii=False) if o is not None else ""
except (TypeError, ValueError):
oj = str(o)
return (str(s), str(p), oj)
def normalize_object_json(obj: Any) -> dict | list | None:
if obj is None:
return None
if isinstance(obj, (dict, list)):
return obj
return {"value": obj}