feat(api)!: memory single chain — async MemoryService, strict eval closure

Route all memory ingest/retrieve/enrichment/compaction through async MemoryService. Remove legacy sync memory implementations (ingest/retrieve/compaction); Celery and memoir Phase2 call asyncio.run into MemoryService-backed helpers. Memoir Phase1 batch ingest uses MemoryService.ingest_transcripts_batch; drop chapters. evidence_bundle_json mirror (Alembic 0015). Evaluation uses snapshot/link-only bundles; raise EvidenceClosureMissing instead of partial/fallback lineage tiers. Split memoir state into NarrativeCoverageState and InterviewControlState; delete the _interview_meta_store adapter layer. Remove rolling-query and recent-fact fallback settings from config and evidence assembly. Update judges, docs, tests, and PlaygroundPage alignment. Made-with: Cursor
2026-04-30 14:11:46 +08:00
parent ac436b87a2
commit 71fbd39e32
53 changed files with 953 additions and 2448 deletions
--- a/api/app/features/memoir/chapter_evidence_snapshot.py
+++ b/api/app/features/memoir/chapter_evidence_snapshot.py
@@ -1,4 +1,4 @@
-"""章节证据闭包：统一计算（评测与生产共用）+ Phase C 表持久化（快照行 + chapter_evidence_links）。"""
+"""章节证据闭包：快照行 + chapter_evidence_links 是评测唯一证据来源。"""

 from __future__ import annotations

@@ -16,7 +16,7 @@ from app.features.memoir.models import (
    ChapterEvidenceLink,
    ChapterEvidenceSnapshot,
 )
-from app.features.memory.repo import fetch_memory_closure_for_conversations_sync
+from app.features.story.models import StoryEvidenceLink

 EVIDENCE_SNAPSHOT_SCHEMA_VERSION = 1

@@ -53,21 +53,69 @@ def _story_ids_ordered(chapter: Chapter) -> list[str]:
    return out


+def _dedupe_ids(raw: list[str]) -> list[str]:
+    seen: set[str] = set()
+    out: list[str] = []
+    for item in raw:
+        sid = str(item).strip()
+        if sid and sid not in seen:
+            seen.add(sid)
+            out.append(sid)
+    return out
+
+
+def _story_evidence_ids_for_chapter(
+    session: Session, story_ids: list[str]
+) -> tuple[list[str], list[str], list[str], list[str]]:
+    """Collect strict story-bound evidence ids for the chapter snapshot."""
+    if not story_ids:
+        return [], [], [], []
+    stmt = select(
+        StoryEvidenceLink.evidence_type,
+        StoryEvidenceLink.evidence_id,
+    ).where(StoryEvidenceLink.story_id.in_(story_ids))
+    chunk_ids: list[str] = []
+    fact_ids: list[str] = []
+    timeline_ids: list[str] = []
+    summary_ids: list[str] = []
+    for evidence_type, evidence_id in session.execute(stmt).all():
+        et = str(evidence_type or "").strip()
+        eid = str(evidence_id or "").strip()
+        if not eid:
+            continue
+        if et == "chunk":
+            chunk_ids.append(eid)
+        elif et == "fact":
+            fact_ids.append(eid)
+        elif et == "timeline_event":
+            timeline_ids.append(eid)
+        elif et == "summary":
+            summary_ids.append(eid)
+    return (
+        _dedupe_ids(chunk_ids),
+        _dedupe_ids(fact_ids),
+        _dedupe_ids(timeline_ids),
+        _dedupe_ids(summary_ids),
+    )
+
+
 def build_chapter_evidence_closure_payload_sync(
    session: Session, chapter: Chapter
 ) -> dict:
    """
-    唯一闭包计算入口：由 `refresh_chapter_evidence_snapshot_sync` 与评测侧（经 JSON 镜像）
-    共用同一套 segment / conversation / memory 推导逻辑。
+    唯一闭包计算入口：transcript 证据来自 chapter.segment 绑定；
+    memory 证据只来自 StoryEvidenceLink，不再做 live memory closure fallback。
    """
    uid = str(chapter.user_id)
    segment_ids = _normalize_segment_ids(chapter.source_segments)
    story_ids = _story_ids_ordered(chapter)
+    chunk_ids, fact_ids, tl_ids, sum_ids = _story_evidence_ids_for_chapter(
+        session, story_ids
+    )
    segs: list = []

    if not segment_ids:
        conv_ids: list[str] = []
-        chunk_ids, fact_ids, tl_ids, sum_ids = [], [], [], []
        notes = [
            "no_source_segments",
            "snapshot_materialized",
@@ -84,11 +132,6 @@ def build_chapter_evidence_closure_payload_sync(
        )
        segs = list(session.execute(stmt).scalars().all())
        conv_ids = sorted({str(s.conversation_id) for s in segs if s.conversation_id})
-        chunk_ids, fact_ids, tl_ids, sum_ids = (
-            fetch_memory_closure_for_conversations_sync(session, uid, conv_ids)
-            if conv_ids
-            else ([], [], [], [])
-        )
        notes = ["snapshot_materialized"]
        if len(segs) < len(segment_ids):
            notes.append("some_segment_ids_unresolved_or_foreign_user")
@@ -172,7 +215,7 @@ def _replace_chapter_evidence_links_sync(


 def refresh_chapter_evidence_snapshot_sync(session: Session, chapter_id: str) -> bool:
-    """写入新版本快照行、替换 evidence_links、更新 Chapter 当前指针；镜像 evidence_bundle_json。"""
+    """写入新版本快照行、替换 evidence_links、更新 Chapter 当前指针。"""
    stmt = (
        select(Chapter)
        .where(Chapter.id == chapter_id)
@@ -215,7 +258,6 @@ def refresh_chapter_evidence_snapshot_sync(session: Session, chapter_id: str) ->
        session, chapter_id=str(ch.id), payload=payload
    )
    ch.current_evidence_snapshot_id = snap.id
-    ch.evidence_bundle_json = payload
    if payload.get("message_lineage_json") is not None:
        ch.source_lineage_json = payload.get("message_lineage_json")
    session.flush()