Embedding 端口增加 is_available(),聊天和回忆录日志用统一方式表示向量是否真能调用。 记忆整理(compaction)支持 Beat 定期扫用户; 事实抽取提示与 subject 归一化,减少同一人多种称呼;
50 lines
1.3 KiB
Python
50 lines
1.3 KiB
Python
"""Enrichment 共享:去重键与 object_json 规范化(sync/async 共用)。"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import json
|
||
from typing import Any
|
||
|
||
# 叙述者常见别名 — 归一化到 narrator_name 或「叙述者」
|
||
_NARRATOR_ALIASES: frozenset[str] = frozenset(
|
||
{
|
||
"我",
|
||
"本人",
|
||
"人物",
|
||
"叙述者",
|
||
"讲述者",
|
||
"老人",
|
||
"自己",
|
||
"咱们",
|
||
}
|
||
)
|
||
|
||
|
||
def normalize_subject(subject: str | None, narrator_name: str | None = None) -> str:
|
||
"""将代词/泛称映射为统一 subject,便于去重与检索。"""
|
||
s = (subject or "").strip()
|
||
if not s:
|
||
return narrator_name or "叙述者"
|
||
if s in _NARRATOR_ALIASES:
|
||
return narrator_name or "叙述者"
|
||
return s
|
||
|
||
|
||
def dedupe_key(f: dict, *, narrator_name: str | None = None) -> tuple:
|
||
s = normalize_subject(f.get("subject"), narrator_name)
|
||
p = (f.get("predicate") or "").strip()
|
||
o = f.get("object_json")
|
||
try:
|
||
oj = json.dumps(o, sort_keys=True, ensure_ascii=False) if o is not None else ""
|
||
except (TypeError, ValueError):
|
||
oj = str(o)
|
||
return (str(s), str(p), oj)
|
||
|
||
|
||
def normalize_object_json(obj: Any) -> dict | list | None:
|
||
if obj is None:
|
||
return None
|
||
if isinstance(obj, (dict, list)):
|
||
return obj
|
||
return {"value": obj}
|