feat(api)!: memory single chain — async MemoryService, strict eval closure

Route all memory ingest/retrieve/enrichment/compaction through async MemoryService. Remove legacy sync memory implementations (ingest/retrieve/compaction); Celery and memoir Phase2 call asyncio.run into MemoryService-backed helpers. Memoir Phase1 batch ingest uses MemoryService.ingest_transcripts_batch; drop chapters. evidence_bundle_json mirror (Alembic 0015). Evaluation uses snapshot/link-only bundles; raise EvidenceClosureMissing instead of partial/fallback lineage tiers. Split memoir state into NarrativeCoverageState and InterviewControlState; delete the _interview_meta_store adapter layer. Remove rolling-query and recent-fact fallback settings from config and evidence assembly. Update judges, docs, tests, and PlaygroundPage alignment. Made-with: Cursor
2026-04-30 14:11:46 +08:00
parent ac436b87a2
commit 71fbd39e32
53 changed files with 953 additions and 2448 deletions
--- a/api/app/features/memory/ingest_service.py
+++ b/api/app/features/memory/ingest_service.py
@@ -106,5 +106,82 @@ class MemoryIngestService:
        )
        return source.id

+    async def ingest_transcripts_batch(
+        self,
+        user_id: str,
+        items: list[tuple[str, str, dict | None]],
+        *,
+        memoir_correlation_id: str | None = None,
+    ) -> list[str]:
+        """
+        Batch ingest transcript items through the async memory path.
+
+        items: (conversation_id, transcript, lineage_json). Empty transcripts are skipped.
+        """
+        source_ids: list[str] = []
+        chunk_records: list[tuple[str, str]] = []
+
+        for conversation_id, transcript, lineage_json in items:
+            text = (transcript or "").strip()
+            if not text:
+                continue
+            primary_mid = (
+                primary_user_message_id_from_lineage(lineage_json)
+                if lineage_json
+                else None
+            )
+            source = await create_source(
+                self._db,
+                user_id=user_id,
+                source_type="transcript",
+                raw_text=text,
+                conversation_id=conversation_id or None,
+                lineage_json=lineage_json,
+                primary_user_message_id=primary_mid,
+            )
+            source_ids.append(source.id)
+
+            for i, content in enumerate(chunk_transcript(text)):
+                chunk = await create_chunk(
+                    self._db,
+                    source_id=source.id,
+                    user_id=user_id,
+                    content=content,
+                    chunk_index=i,
+                )
+                chunk_records.append((chunk.id, content))
+
+        await self._db.flush()
+
+        vectors_written = 0
+        if self._embedding and chunk_records:
+            texts = [content for _, content in chunk_records]
+            embeddings = await self._embedding.embed_texts(texts)
+            for (chunk_id, _), emb in zip(chunk_records, embeddings, strict=False):
+                if emb:
+                    vectors_written += 1
+                    await update_chunk_embedding(self._db, chunk_id, emb)
+
+        await self._db.commit()
+        emb_ok = self._embedding.is_available() if self._embedding else False
+        task_ids = self._enrichment_scheduler.schedule_many(
+            user_id,
+            source_ids,
+            memoir_correlation_id=memoir_correlation_id,
+        )
+
+        logger.info(
+            "event=memory_ingest_batch_done user_id={} sources={} chunks={} "
+            "vectors_written={} embedding_available={} enrichment_enabled={} enrichment_tasks={}",
+            user_id,
+            len(source_ids),
+            len(chunk_records),
+            vectors_written,
+            emb_ok,
+            settings.memory_enrichment_enabled,
+            len(task_ids),
+        )
+        return source_ids
+

 __all__ = ["MemoryIngestService"]