聊天和回忆录证据检索都走 pgvector,去掉 Postgres FTS/content_tsv,新迁移删掉 content_tsv 列(部署要先 alembic upgrade)。
Embedding 端口增加 is_available(),聊天和回忆录日志用统一方式表示向量是否真能调用。 记忆整理(compaction)支持 Beat 定期扫用户; 事实抽取提示与 subject 归一化,减少同一人多种称呼;
This commit is contained in:
@@ -5,10 +5,34 @@ from __future__ import annotations
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
# 叙述者常见别名 — 归一化到 narrator_name 或「叙述者」
|
||||
_NARRATOR_ALIASES: frozenset[str] = frozenset(
|
||||
{
|
||||
"我",
|
||||
"本人",
|
||||
"人物",
|
||||
"叙述者",
|
||||
"讲述者",
|
||||
"老人",
|
||||
"自己",
|
||||
"咱们",
|
||||
}
|
||||
)
|
||||
|
||||
def dedupe_key(f: dict) -> tuple:
|
||||
s = f.get("subject") or ""
|
||||
p = f.get("predicate") or ""
|
||||
|
||||
def normalize_subject(subject: str | None, narrator_name: str | None = None) -> str:
|
||||
"""将代词/泛称映射为统一 subject,便于去重与检索。"""
|
||||
s = (subject or "").strip()
|
||||
if not s:
|
||||
return narrator_name or "叙述者"
|
||||
if s in _NARRATOR_ALIASES:
|
||||
return narrator_name or "叙述者"
|
||||
return s
|
||||
|
||||
|
||||
def dedupe_key(f: dict, *, narrator_name: str | None = None) -> tuple:
|
||||
s = normalize_subject(f.get("subject"), narrator_name)
|
||||
p = (f.get("predicate") or "").strip()
|
||||
o = f.get("object_json")
|
||||
try:
|
||||
oj = json.dumps(o, sort_keys=True, ensure_ascii=False) if o is not None else ""
|
||||
|
||||
Reference in New Issue
Block a user