"""Enrichment 共享:去重键与 object_json 规范化(sync/async 共用)。""" from __future__ import annotations import json from typing import Any # 叙述者常见别名 — 归一化到 narrator_name 或「叙述者」 _NARRATOR_ALIASES: frozenset[str] = frozenset( { "我", "本人", "人物", "叙述者", "讲述者", "老人", "自己", "咱们", } ) def normalize_subject(subject: str | None, narrator_name: str | None = None) -> str: """将代词/泛称映射为统一 subject,便于去重与检索。""" s = (subject or "").strip() if not s: return narrator_name or "叙述者" if s in _NARRATOR_ALIASES: return narrator_name or "叙述者" return s def dedupe_key(f: dict, *, narrator_name: str | None = None) -> tuple: s = normalize_subject(f.get("subject"), narrator_name) p = (f.get("predicate") or "").strip() o = f.get("object_json") try: oj = json.dumps(o, sort_keys=True, ensure_ascii=False) if o is not None else "" except (TypeError, ValueError): oj = str(o) return (str(s), str(p), oj) def normalize_object_json(obj: Any) -> dict | list | None: if obj is None: return None if isinstance(obj, (dict, list)): return obj return {"value": obj}