""" 证据包组装:跨 memory + story 的检索结果合并(业务层,非纯 repo)。 权威层级(可靠性 hardening): - **Chunk 原文**(未 excluded)为首要证据;rolling 摘要/故事摘录为便利视图,不得压过冲突的 chunk。 - **MemoryFact**:`confirmed` 为检索默认集;`candidate` 可被上游提升;`stale` 由 compaction 等标出,检索时应排除。 - 事实 FTS 无命中时是否退回「最近事实」由 `memory_fact_search_use_recent_fallback` 控制(默认可避免串台)。 Celery 使用 sync;`HybridRetriever` 使用 async + RRF chunk 合并。 """ from __future__ import annotations from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.orm import Session from app.core.config import settings from app.features.memory.repo import ( list_summaries_for_evidence_async, list_summaries_for_evidence_sync, search_chunks_fts, search_chunks_fts_sync, search_facts_for_user_async, search_facts_for_user_sync, search_timeline_events_for_user_async, search_timeline_events_for_user_sync, ) from app.features.story.repo import ( list_recent_stories_for_evidence, list_recent_stories_for_evidence_sync, ) EMPTY_EVIDENCE_BUNDLE: dict = { "relevant_chunks": [], "relevant_summaries": [], "relevant_facts": [], "timeline_hints": [], "relevant_stories": [], } def _facts_to_dicts(facts) -> list[dict]: return [ { "id": f.id, "fact_type": f.fact_type, "subject": f.subject, "predicate": f.predicate, "object_json": f.object_json, } for f in facts ] def _timeline_to_dicts(events) -> list[dict]: return [ { "id": e.id, "event_year": e.event_year, "event_date": e.event_date, "title": e.title, "description": e.description, } for e in events ] def _stories_to_dicts(story_rows) -> list[dict]: return [ { "id": s.id, "title": s.title, "summary": s.summary, "stage": s.stage, "story_type": s.story_type, } for s in story_rows ] def fetch_evidence_metadata_sync( session: Session, user_id: str, q: str, top_k: int ) -> dict: """非 chunk 证据:摘要、事实、时间线、故事(sync)。""" facts = search_facts_for_user_sync(session, user_id, q, top_k) events = search_timeline_events_for_user_sync(session, user_id, q, top_k) relevant_summaries = list_summaries_for_evidence_sync( session, user_id=user_id, q=q, limit=top_k ) story_rows = list_recent_stories_for_evidence_sync( session, user_id, query=q, limit=top_k ) return { "relevant_facts": _facts_to_dicts(facts), "timeline_hints": _timeline_to_dicts(events), "relevant_summaries": relevant_summaries, "relevant_stories": _stories_to_dicts(story_rows), } async def fetch_evidence_metadata_async( db: AsyncSession, user_id: str, q: str, top_k: int ) -> dict: """非 chunk 证据(async)。""" facts = await search_facts_for_user_async(db, user_id, q, top_k) events = await search_timeline_events_for_user_async(db, user_id, q, top_k) relevant_summaries = await list_summaries_for_evidence_async( db, user_id=user_id, q=q, limit=top_k ) story_rows = await list_recent_stories_for_evidence( db, user_id=user_id, query=q, limit=top_k ) return { "relevant_facts": _facts_to_dicts(facts), "timeline_hints": _timeline_to_dicts(events), "relevant_summaries": relevant_summaries, "relevant_stories": _stories_to_dicts(story_rows), } def _empty_query_bundle_sync(session: Session, user_id: str, top_k: int) -> dict: """无 FTS query 时的「浏览」降级:rolling 摘要 + 事实/时间线 fallback。""" from app.features.memory.models import MemorySummary from sqlalchemy import select from app.features.memory.repo import ( get_facts_for_user_sync, get_timeline_events_for_user_sync, ) rolling = ( session.execute( select(MemorySummary) .where( MemorySummary.user_id == user_id, MemorySummary.summary_type == "rolling", ) .order_by(MemorySummary.updated_at.desc()) .limit(1) ) .unique() .scalar_one_or_none() ) summaries = [] if rolling: summaries = [ { "id": rolling.id, "summary_type": rolling.summary_type, "content": rolling.content, "source_chunk_ids": rolling.source_chunk_ids, } ] facts = get_facts_for_user_sync(session, user_id, top_k) events = get_timeline_events_for_user_sync(session, user_id, top_k) return { "relevant_chunks": [], "relevant_summaries": summaries, "relevant_facts": _facts_to_dicts(facts), "timeline_hints": _timeline_to_dicts(events), "relevant_stories": [], } async def _empty_query_bundle_async(db: AsyncSession, user_id: str, top_k: int) -> dict: from sqlalchemy import select from app.features.memory.models import MemorySummary from app.features.memory.repo import ( get_facts_for_user, get_timeline_events_for_user, ) roll_stmt = ( select(MemorySummary) .where( MemorySummary.user_id == user_id, MemorySummary.summary_type == "rolling", ) .order_by(MemorySummary.updated_at.desc()) .limit(1) ) r_result = await db.execute(roll_stmt) rolling = r_result.unique().scalar_one_or_none() summaries = [] if rolling: summaries = [ { "id": rolling.id, "summary_type": rolling.summary_type, "content": rolling.content, "source_chunk_ids": rolling.source_chunk_ids, } ] facts = await get_facts_for_user(db, user_id=user_id, limit=top_k) events = await get_timeline_events_for_user(db, user_id=user_id, limit=top_k) return { "relevant_chunks": [], "relevant_summaries": summaries, "relevant_facts": _facts_to_dicts(facts), "timeline_hints": _timeline_to_dicts(events), "relevant_stories": [], } def retrieve_evidence_bundle_sync( session: Session, user_id: str, query: str, *, top_k: int = 10 ) -> dict: """Celery / 叙事流水线:FTS-only chunks + 元数据。""" if not query or not query.strip(): if settings.memory_evidence_empty_query_include_rolling: return _empty_query_bundle_sync(session, user_id, top_k) return dict(EMPTY_EVIDENCE_BUNDLE) q = query.strip() chunk_rows = search_chunks_fts_sync(session, user_id, q, top_k) relevant_chunks = [ {"id": r["id"], "content": r["content"], "chunk_index": r["chunk_index"]} for r in chunk_rows ] meta = fetch_evidence_metadata_sync(session, user_id, q, top_k) return { "relevant_chunks": relevant_chunks, **meta, } async def retrieve_evidence_bundle_async( db: AsyncSession, user_id: str, query: str, *, top_k: int = 10, merged_chunk_dicts: list[dict], ) -> dict: """ 异步路径:chunk 已由调用方 RRF 合并;此处只拼元数据。 merged_chunk_dicts: [{"id","content","chunk_index"}, ...] """ if not query or not query.strip(): if settings.memory_evidence_empty_query_include_rolling: return await _empty_query_bundle_async(db, user_id, top_k) return dict(EMPTY_EVIDENCE_BUNDLE) q = query.strip() meta = await fetch_evidence_metadata_async(db, user_id, q, top_k) return { "relevant_chunks": merged_chunk_dicts, **meta, }