"""Memory ingest service boundary.""" from __future__ import annotations from sqlalchemy.ext.asyncio import AsyncSession from app.core.config import settings from app.core.logging import get_logger from app.features.conversation.lineage_schemas import ( primary_user_message_id_from_lineage, ) from app.features.memory.chunker import chunk_transcript from app.features.memory.enrichment_scheduler import ( MemoryEnrichmentRequest, MemoryEnrichmentScheduler, ) from app.features.memory.repo import ( create_chunk, create_source, update_chunk_embedding, ) from app.ports.embedding import EmbeddingProvider logger = get_logger(__name__) class MemoryIngestService: """Creates memory sources/chunks and schedules post-commit enrichment.""" def __init__( self, db: AsyncSession, *, embedding_provider: EmbeddingProvider | None = None, enrichment_scheduler: MemoryEnrichmentScheduler | None = None, ) -> None: self._db = db self._embedding = embedding_provider self._enrichment_scheduler = enrichment_scheduler or MemoryEnrichmentScheduler() async def ingest_transcript( self, user_id: str, conversation_id: str, transcript: str, *, lineage_json: dict | None = None, ) -> str: if not transcript or not transcript.strip(): raise ValueError("transcript cannot be empty") primary_mid = ( primary_user_message_id_from_lineage(lineage_json) if lineage_json else None ) source = await create_source( self._db, user_id=user_id, source_type="transcript", raw_text=transcript.strip(), conversation_id=conversation_id, lineage_json=lineage_json, primary_user_message_id=primary_mid, ) chunk_records: list[tuple[str, str]] = [] for i, content in enumerate(chunk_transcript(transcript.strip())): chunk = await create_chunk( self._db, source_id=source.id, user_id=user_id, content=content, chunk_index=i, ) chunk_records.append((chunk.id, content)) await self._db.flush() vectors_written = 0 if self._embedding and chunk_records: texts = [content for _, content in chunk_records] embeddings = await self._embedding.embed_texts(texts) for (chunk_id, _), emb in zip( chunk_records, embeddings, strict=False ): if emb: vectors_written += 1 await update_chunk_embedding(self._db, chunk_id, emb) await self._db.commit() emb_ok = self._embedding.is_available() if self._embedding else False enrichment_task_id = self._enrichment_scheduler.schedule( MemoryEnrichmentRequest(user_id=user_id, source_id=source.id) ) logger.info( "event=memory_ingest_done user_id={} conversation_id={} source_id={} " "chunks={} vectors_written={} embedding_available={} enrichment_enabled={} enrichment_task_id={}", user_id, conversation_id, source.id, len(chunk_records), vectors_written, emb_ok, settings.memory_enrichment_enabled, enrichment_task_id, ) return source.id __all__ = ["MemoryIngestService"]