feat(api)!: memory single chain — async MemoryService, strict eval closure
Route all memory ingest/retrieve/enrichment/compaction through async MemoryService. Remove legacy sync memory implementations (ingest/retrieve/compaction); Celery and memoir Phase2 call asyncio.run into MemoryService-backed helpers. Memoir Phase1 batch ingest uses MemoryService.ingest_transcripts_batch; drop chapters. evidence_bundle_json mirror (Alembic 0015). Evaluation uses snapshot/link-only bundles; raise EvidenceClosureMissing instead of partial/fallback lineage tiers. Split memoir state into NarrativeCoverageState and InterviewControlState; delete the _interview_meta_store adapter layer. Remove rolling-query and recent-fact fallback settings from config and evidence assembly. Update judges, docs, tests, and PlaygroundPage alignment. Made-with: Cursor
This commit is contained in:
@@ -106,5 +106,82 @@ class MemoryIngestService:
|
||||
)
|
||||
return source.id
|
||||
|
||||
async def ingest_transcripts_batch(
|
||||
self,
|
||||
user_id: str,
|
||||
items: list[tuple[str, str, dict | None]],
|
||||
*,
|
||||
memoir_correlation_id: str | None = None,
|
||||
) -> list[str]:
|
||||
"""
|
||||
Batch ingest transcript items through the async memory path.
|
||||
|
||||
items: (conversation_id, transcript, lineage_json). Empty transcripts are skipped.
|
||||
"""
|
||||
source_ids: list[str] = []
|
||||
chunk_records: list[tuple[str, str]] = []
|
||||
|
||||
for conversation_id, transcript, lineage_json in items:
|
||||
text = (transcript or "").strip()
|
||||
if not text:
|
||||
continue
|
||||
primary_mid = (
|
||||
primary_user_message_id_from_lineage(lineage_json)
|
||||
if lineage_json
|
||||
else None
|
||||
)
|
||||
source = await create_source(
|
||||
self._db,
|
||||
user_id=user_id,
|
||||
source_type="transcript",
|
||||
raw_text=text,
|
||||
conversation_id=conversation_id or None,
|
||||
lineage_json=lineage_json,
|
||||
primary_user_message_id=primary_mid,
|
||||
)
|
||||
source_ids.append(source.id)
|
||||
|
||||
for i, content in enumerate(chunk_transcript(text)):
|
||||
chunk = await create_chunk(
|
||||
self._db,
|
||||
source_id=source.id,
|
||||
user_id=user_id,
|
||||
content=content,
|
||||
chunk_index=i,
|
||||
)
|
||||
chunk_records.append((chunk.id, content))
|
||||
|
||||
await self._db.flush()
|
||||
|
||||
vectors_written = 0
|
||||
if self._embedding and chunk_records:
|
||||
texts = [content for _, content in chunk_records]
|
||||
embeddings = await self._embedding.embed_texts(texts)
|
||||
for (chunk_id, _), emb in zip(chunk_records, embeddings, strict=False):
|
||||
if emb:
|
||||
vectors_written += 1
|
||||
await update_chunk_embedding(self._db, chunk_id, emb)
|
||||
|
||||
await self._db.commit()
|
||||
emb_ok = self._embedding.is_available() if self._embedding else False
|
||||
task_ids = self._enrichment_scheduler.schedule_many(
|
||||
user_id,
|
||||
source_ids,
|
||||
memoir_correlation_id=memoir_correlation_id,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"event=memory_ingest_batch_done user_id={} sources={} chunks={} "
|
||||
"vectors_written={} embedding_available={} enrichment_enabled={} enrichment_tasks={}",
|
||||
user_id,
|
||||
len(source_ids),
|
||||
len(chunk_records),
|
||||
vectors_written,
|
||||
emb_ok,
|
||||
settings.memory_enrichment_enabled,
|
||||
len(task_ids),
|
||||
)
|
||||
return source_ids
|
||||
|
||||
|
||||
__all__ = ["MemoryIngestService"]
|
||||
|
||||
Reference in New Issue
Block a user