Files
life-echo/api/app/features/memory/ingest_service.py

111 lines
3.5 KiB
Python
Raw Normal View History

"""Memory ingest service boundary."""
from __future__ import annotations
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.config import settings
from app.core.logging import get_logger
from app.features.conversation.lineage_schemas import (
primary_user_message_id_from_lineage,
)
from app.features.memory.chunker import chunk_transcript
from app.features.memory.enrichment_scheduler import (
MemoryEnrichmentRequest,
MemoryEnrichmentScheduler,
)
from app.features.memory.repo import (
create_chunk,
create_source,
update_chunk_embedding,
)
from app.ports.embedding import EmbeddingProvider
logger = get_logger(__name__)
class MemoryIngestService:
"""Creates memory sources/chunks and schedules post-commit enrichment."""
def __init__(
self,
db: AsyncSession,
*,
embedding_provider: EmbeddingProvider | None = None,
enrichment_scheduler: MemoryEnrichmentScheduler | None = None,
) -> None:
self._db = db
self._embedding = embedding_provider
self._enrichment_scheduler = enrichment_scheduler or MemoryEnrichmentScheduler()
async def ingest_transcript(
self,
user_id: str,
conversation_id: str,
transcript: str,
*,
lineage_json: dict | None = None,
) -> str:
if not transcript or not transcript.strip():
raise ValueError("transcript cannot be empty")
primary_mid = (
primary_user_message_id_from_lineage(lineage_json) if lineage_json else None
)
source = await create_source(
self._db,
user_id=user_id,
source_type="transcript",
raw_text=transcript.strip(),
conversation_id=conversation_id,
lineage_json=lineage_json,
primary_user_message_id=primary_mid,
)
chunk_records: list[tuple[str, str]] = []
for i, content in enumerate(chunk_transcript(transcript.strip())):
chunk = await create_chunk(
self._db,
source_id=source.id,
user_id=user_id,
content=content,
chunk_index=i,
)
chunk_records.append((chunk.id, content))
await self._db.flush()
vectors_written = 0
if self._embedding and chunk_records:
texts = [content for _, content in chunk_records]
embeddings = await self._embedding.embed_texts(texts)
for (chunk_id, _), emb in zip(
chunk_records, embeddings, strict=False
):
if emb:
vectors_written += 1
await update_chunk_embedding(self._db, chunk_id, emb)
await self._db.commit()
emb_ok = self._embedding.is_available() if self._embedding else False
enrichment_task_id = self._enrichment_scheduler.schedule(
MemoryEnrichmentRequest(user_id=user_id, source_id=source.id)
)
logger.info(
"event=memory_ingest_done user_id={} conversation_id={} source_id={} "
"chunks={} vectors_written={} embedding_available={} enrichment_enabled={} enrichment_task_id={}",
user_id,
conversation_id,
source.id,
len(chunk_records),
vectors_written,
emb_ok,
settings.memory_enrichment_enabled,
enrichment_task_id,
)
return source.id
__all__ = ["MemoryIngestService"]