2026-03-18 17:18:23 +08:00
|
|
|
|
"""
|
2026-04-30 14:11:46 +08:00
|
|
|
|
MemoryService — conversation / memoir / eval 的唯一 memory 门面。
|
2026-03-20 10:30:07 +08:00
|
|
|
|
|
2026-04-30 14:11:46 +08:00
|
|
|
|
所有运行链路通过 async service 进入 ingest、retrieve、enrichment 与 compaction;
|
|
|
|
|
|
Celery task 只能作为同步入口包装 async service,不再维护 sync memory 双轨。
|
2026-03-18 17:18:23 +08:00
|
|
|
|
"""
|
2026-03-19 14:36:14 +08:00
|
|
|
|
|
2026-03-18 17:18:23 +08:00
|
|
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
|
|
|
2026-05-22 13:44:50 +08:00
|
|
|
|
from app.core.db import transactional
|
2026-03-27 16:01:28 +08:00
|
|
|
|
from app.core.logging import get_logger
|
2026-04-30 16:22:55 +08:00
|
|
|
|
from app.features.memory.embedding_service import MemoryEmbeddingService
|
2026-04-30 09:17:01 +08:00
|
|
|
|
from app.features.memory.ingest_service import MemoryIngestService
|
2026-03-20 10:30:07 +08:00
|
|
|
|
from app.features.memory.repo import (
|
2026-03-27 16:01:28 +08:00
|
|
|
|
create_curation_action,
|
2026-04-30 16:22:55 +08:00
|
|
|
|
mark_facts_stale_for_excluded_chunk,
|
2026-03-27 16:01:28 +08:00
|
|
|
|
set_chunk_excluded,
|
|
|
|
|
|
set_memory_fact_status,
|
2026-04-09 15:32:35 +08:00
|
|
|
|
)
|
2026-04-30 09:17:01 +08:00
|
|
|
|
from app.features.memory.retrieval_service import MemoryRetrievalService
|
2026-04-08 15:37:09 +08:00
|
|
|
|
from app.features.memory.schemas import EvidenceBundle
|
2026-03-20 10:30:07 +08:00
|
|
|
|
from app.ports.embedding import EmbeddingProvider
|
|
|
|
|
|
|
2026-03-27 16:01:28 +08:00
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
|
|
|
2026-03-18 17:18:23 +08:00
|
|
|
|
|
|
|
|
|
|
class MemoryService:
|
2026-03-20 10:30:07 +08:00
|
|
|
|
def __init__(
|
|
|
|
|
|
self,
|
|
|
|
|
|
db: AsyncSession,
|
|
|
|
|
|
*,
|
|
|
|
|
|
embedding_provider: EmbeddingProvider | None = None,
|
|
|
|
|
|
):
|
2026-03-18 17:18:23 +08:00
|
|
|
|
self._db = db
|
2026-03-20 10:30:07 +08:00
|
|
|
|
self._embedding = embedding_provider
|
2026-03-18 17:18:23 +08:00
|
|
|
|
|
2026-03-19 14:36:14 +08:00
|
|
|
|
async def ingest_transcript(
|
2026-04-08 15:37:09 +08:00
|
|
|
|
self,
|
|
|
|
|
|
user_id: str,
|
|
|
|
|
|
conversation_id: str,
|
|
|
|
|
|
transcript: str,
|
|
|
|
|
|
*,
|
|
|
|
|
|
lineage_json: dict | None = None,
|
2026-03-19 14:36:14 +08:00
|
|
|
|
) -> str:
|
2026-03-20 10:30:07 +08:00
|
|
|
|
"""
|
|
|
|
|
|
Ingest conversation transcript into memory.
|
2026-04-03 11:43:16 +08:00
|
|
|
|
Creates MemorySource, chunks, populates embedding.
|
2026-03-20 10:30:07 +08:00
|
|
|
|
Returns source_id.
|
|
|
|
|
|
"""
|
2026-04-30 09:17:01 +08:00
|
|
|
|
service = MemoryIngestService(self._db, embedding_provider=self._embedding)
|
|
|
|
|
|
return await service.ingest_transcript(
|
2026-04-03 13:49:24 +08:00
|
|
|
|
user_id,
|
|
|
|
|
|
conversation_id,
|
2026-04-30 09:17:01 +08:00
|
|
|
|
transcript,
|
|
|
|
|
|
lineage_json=lineage_json,
|
2026-04-03 13:49:24 +08:00
|
|
|
|
)
|
2026-03-18 17:18:23 +08:00
|
|
|
|
|
2026-04-30 14:11:46 +08:00
|
|
|
|
async def ingest_transcripts_batch(
|
|
|
|
|
|
self,
|
|
|
|
|
|
user_id: str,
|
2026-05-22 13:44:50 +08:00
|
|
|
|
items: list[tuple[str, str, dict | None, str | None]],
|
2026-04-30 14:11:46 +08:00
|
|
|
|
*,
|
|
|
|
|
|
memoir_correlation_id: str | None = None,
|
|
|
|
|
|
) -> list[str]:
|
|
|
|
|
|
"""Batch ingest transcripts; returns created MemorySource ids."""
|
|
|
|
|
|
service = MemoryIngestService(self._db, embedding_provider=self._embedding)
|
|
|
|
|
|
return await service.ingest_transcripts_batch(
|
|
|
|
|
|
user_id,
|
|
|
|
|
|
items,
|
|
|
|
|
|
memoir_correlation_id=memoir_correlation_id,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2026-03-27 16:01:28 +08:00
|
|
|
|
async def retrieve(
|
|
|
|
|
|
self, user_id: str, query: str, *, top_k: int = 10
|
|
|
|
|
|
) -> EvidenceBundle:
|
2026-03-20 10:30:07 +08:00
|
|
|
|
"""Retrieve relevant evidence. 委托 HybridRetriever。"""
|
2026-04-30 09:17:01 +08:00
|
|
|
|
service = MemoryRetrievalService(self._db, embedding_provider=self._embedding)
|
|
|
|
|
|
return await service.retrieve(user_id, query, top_k=top_k)
|
2026-03-27 16:01:28 +08:00
|
|
|
|
|
2026-04-30 14:11:46 +08:00
|
|
|
|
async def enrich_source(self, user_id: str, source_id: str, *, llm=None) -> None:
|
|
|
|
|
|
"""Run post-ingest enrichment through the async memory path."""
|
|
|
|
|
|
from app.features.memory.enrichment import enrich_memory_after_ingest_async
|
|
|
|
|
|
|
2026-04-30 16:22:55 +08:00
|
|
|
|
await enrich_memory_after_ingest_async(
|
|
|
|
|
|
self._db,
|
|
|
|
|
|
user_id,
|
|
|
|
|
|
source_id,
|
|
|
|
|
|
llm=llm,
|
|
|
|
|
|
raise_on_failure=True,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
async def embed_source(
|
|
|
|
|
|
self,
|
|
|
|
|
|
user_id: str,
|
|
|
|
|
|
source_id: str,
|
|
|
|
|
|
*,
|
|
|
|
|
|
raise_on_failure: bool = False,
|
|
|
|
|
|
) -> dict:
|
|
|
|
|
|
"""Embed persisted memory chunks and update embedding status."""
|
|
|
|
|
|
service = MemoryEmbeddingService(self._db, embedding_provider=self._embedding)
|
|
|
|
|
|
return await service.embed_source(
|
|
|
|
|
|
user_id,
|
|
|
|
|
|
source_id,
|
|
|
|
|
|
raise_on_failure=raise_on_failure,
|
|
|
|
|
|
)
|
2026-04-30 14:11:46 +08:00
|
|
|
|
|
|
|
|
|
|
async def compact_user(self, user_id: str, context: dict | None = None) -> dict:
|
|
|
|
|
|
"""Run near-duplicate compaction through the async memory path."""
|
|
|
|
|
|
from app.features.memory.compaction_service import run_memory_compaction
|
|
|
|
|
|
|
|
|
|
|
|
return await run_memory_compaction(self._db, user_id, context)
|
|
|
|
|
|
|
2026-03-27 16:01:28 +08:00
|
|
|
|
async def exclude_chunk(
|
|
|
|
|
|
self, user_id: str, chunk_id: str, *, reason: str = ""
|
|
|
|
|
|
) -> bool:
|
2026-05-22 13:44:50 +08:00
|
|
|
|
async with transactional(self._db):
|
|
|
|
|
|
ok = await set_chunk_excluded(self._db, chunk_id, user_id, True)
|
|
|
|
|
|
if not ok:
|
|
|
|
|
|
return False
|
|
|
|
|
|
stale_count = await mark_facts_stale_for_excluded_chunk(
|
|
|
|
|
|
self._db,
|
|
|
|
|
|
user_id=user_id,
|
|
|
|
|
|
chunk_id=chunk_id,
|
|
|
|
|
|
)
|
|
|
|
|
|
await create_curation_action(
|
|
|
|
|
|
self._db,
|
|
|
|
|
|
user_id=user_id,
|
|
|
|
|
|
action_type="exclude",
|
|
|
|
|
|
target_type="chunk",
|
|
|
|
|
|
target_id=chunk_id,
|
|
|
|
|
|
details={
|
|
|
|
|
|
**({"reason": reason} if reason else {}),
|
|
|
|
|
|
"staled_fact_count": stale_count,
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
2026-03-27 16:01:28 +08:00
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
async def restore_chunk(self, user_id: str, chunk_id: str) -> bool:
|
2026-05-22 13:44:50 +08:00
|
|
|
|
async with transactional(self._db):
|
|
|
|
|
|
ok = await set_chunk_excluded(self._db, chunk_id, user_id, False)
|
|
|
|
|
|
if not ok:
|
|
|
|
|
|
return False
|
|
|
|
|
|
await create_curation_action(
|
|
|
|
|
|
self._db,
|
|
|
|
|
|
user_id=user_id,
|
|
|
|
|
|
action_type="restore",
|
|
|
|
|
|
target_type="chunk",
|
|
|
|
|
|
target_id=chunk_id,
|
|
|
|
|
|
details={"fact_restore_policy": "requires_reenrichment"},
|
|
|
|
|
|
)
|
2026-03-27 16:01:28 +08:00
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
async def confirm_fact(self, user_id: str, fact_id: str) -> bool:
|
2026-05-22 13:44:50 +08:00
|
|
|
|
async with transactional(self._db):
|
|
|
|
|
|
ok = await set_memory_fact_status(self._db, fact_id, user_id, "confirmed")
|
|
|
|
|
|
if not ok:
|
|
|
|
|
|
return False
|
|
|
|
|
|
await create_curation_action(
|
|
|
|
|
|
self._db,
|
|
|
|
|
|
user_id=user_id,
|
|
|
|
|
|
action_type="confirm",
|
|
|
|
|
|
target_type="fact",
|
|
|
|
|
|
target_id=fact_id,
|
|
|
|
|
|
details=None,
|
|
|
|
|
|
)
|
2026-03-27 16:01:28 +08:00
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
async def reject_fact(
|
|
|
|
|
|
self, user_id: str, fact_id: str, *, reason: str = ""
|
|
|
|
|
|
) -> bool:
|
2026-05-22 13:44:50 +08:00
|
|
|
|
async with transactional(self._db):
|
|
|
|
|
|
ok = await set_memory_fact_status(self._db, fact_id, user_id, "rejected")
|
|
|
|
|
|
if not ok:
|
|
|
|
|
|
return False
|
|
|
|
|
|
await create_curation_action(
|
|
|
|
|
|
self._db,
|
|
|
|
|
|
user_id=user_id,
|
|
|
|
|
|
action_type="reject",
|
|
|
|
|
|
target_type="fact",
|
|
|
|
|
|
target_id=fact_id,
|
|
|
|
|
|
details={"reason": reason} if reason else None,
|
|
|
|
|
|
)
|
|
|
|
|
|
return True
|