Files
life-echo/api/app/features/memory/evidence.py

245 lines
7.4 KiB
Python
Raw Normal View History

"""
证据包组装 memory + story 的检索结果合并业务层非纯 repo
Celery 使用 sync`HybridRetriever` 使用 async + RRF chunk 合并
"""
from __future__ import annotations
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import Session
from app.core.config import settings
from app.features.memory.repo import (
list_summaries_for_evidence_async,
list_summaries_for_evidence_sync,
search_chunks_fts,
search_chunks_fts_sync,
search_facts_for_user_async,
search_facts_for_user_sync,
search_timeline_events_for_user_async,
search_timeline_events_for_user_sync,
)
from app.features.story.repo import (
list_recent_stories_for_evidence,
list_recent_stories_for_evidence_sync,
)
EMPTY_EVIDENCE_BUNDLE: dict = {
"relevant_chunks": [],
"relevant_summaries": [],
"relevant_facts": [],
"timeline_hints": [],
"relevant_stories": [],
}
def _facts_to_dicts(facts) -> list[dict]:
return [
{
"id": f.id,
"fact_type": f.fact_type,
"subject": f.subject,
"predicate": f.predicate,
"object_json": f.object_json,
}
for f in facts
]
def _timeline_to_dicts(events) -> list[dict]:
return [
{
"id": e.id,
"event_year": e.event_year,
"event_date": e.event_date,
"title": e.title,
"description": e.description,
}
for e in events
]
def _stories_to_dicts(story_rows) -> list[dict]:
return [
{
"id": s.id,
"title": s.title,
"summary": s.summary,
"stage": s.stage,
"story_type": s.story_type,
}
for s in story_rows
]
def fetch_evidence_metadata_sync(
session: Session, user_id: str, q: str, top_k: int
) -> dict:
"""非 chunk 证据摘要、事实、时间线、故事sync"""
facts = search_facts_for_user_sync(session, user_id, q, top_k)
events = search_timeline_events_for_user_sync(session, user_id, q, top_k)
relevant_summaries = list_summaries_for_evidence_sync(
session, user_id=user_id, q=q, limit=top_k
)
story_rows = list_recent_stories_for_evidence_sync(
session, user_id, query=q, limit=top_k
)
return {
"relevant_facts": _facts_to_dicts(facts),
"timeline_hints": _timeline_to_dicts(events),
"relevant_summaries": relevant_summaries,
"relevant_stories": _stories_to_dicts(story_rows),
}
async def fetch_evidence_metadata_async(
db: AsyncSession, user_id: str, q: str, top_k: int
) -> dict:
"""非 chunk 证据async"""
facts = await search_facts_for_user_async(db, user_id, q, top_k)
events = await search_timeline_events_for_user_async(db, user_id, q, top_k)
relevant_summaries = await list_summaries_for_evidence_async(
db, user_id=user_id, q=q, limit=top_k
)
story_rows = await list_recent_stories_for_evidence(
db, user_id=user_id, query=q, limit=top_k
)
return {
"relevant_facts": _facts_to_dicts(facts),
"timeline_hints": _timeline_to_dicts(events),
"relevant_summaries": relevant_summaries,
"relevant_stories": _stories_to_dicts(story_rows),
}
def _empty_query_bundle_sync(session: Session, user_id: str, top_k: int) -> dict:
"""无 FTS query 时的「浏览」降级rolling 摘要 + 事实/时间线 fallback。"""
from app.features.memory.models import MemorySummary
from sqlalchemy import select
from app.features.memory.repo import (
get_facts_for_user_sync,
get_timeline_events_for_user_sync,
)
rolling = (
session.execute(
select(MemorySummary)
.where(
MemorySummary.user_id == user_id,
MemorySummary.summary_type == "rolling",
)
.order_by(MemorySummary.updated_at.desc())
.limit(1)
)
.unique()
.scalar_one_or_none()
)
summaries = []
if rolling:
summaries = [
{
"id": rolling.id,
"summary_type": rolling.summary_type,
"content": rolling.content,
"source_chunk_ids": rolling.source_chunk_ids,
}
]
facts = get_facts_for_user_sync(session, user_id, top_k)
events = get_timeline_events_for_user_sync(session, user_id, top_k)
return {
"relevant_chunks": [],
"relevant_summaries": summaries,
"relevant_facts": _facts_to_dicts(facts),
"timeline_hints": _timeline_to_dicts(events),
"relevant_stories": [],
}
async def _empty_query_bundle_async(db: AsyncSession, user_id: str, top_k: int) -> dict:
from sqlalchemy import select
from app.features.memory.models import MemorySummary
from app.features.memory.repo import (
get_facts_for_user,
get_timeline_events_for_user,
)
roll_stmt = (
select(MemorySummary)
.where(
MemorySummary.user_id == user_id,
MemorySummary.summary_type == "rolling",
)
.order_by(MemorySummary.updated_at.desc())
.limit(1)
)
r_result = await db.execute(roll_stmt)
rolling = r_result.unique().scalar_one_or_none()
summaries = []
if rolling:
summaries = [
{
"id": rolling.id,
"summary_type": rolling.summary_type,
"content": rolling.content,
"source_chunk_ids": rolling.source_chunk_ids,
}
]
facts = await get_facts_for_user(db, user_id=user_id, limit=top_k)
events = await get_timeline_events_for_user(db, user_id=user_id, limit=top_k)
return {
"relevant_chunks": [],
"relevant_summaries": summaries,
"relevant_facts": _facts_to_dicts(facts),
"timeline_hints": _timeline_to_dicts(events),
"relevant_stories": [],
}
def retrieve_evidence_bundle_sync(
session: Session, user_id: str, query: str, *, top_k: int = 10
) -> dict:
"""Celery / 叙事流水线FTS-only chunks + 元数据。"""
if not query or not query.strip():
if settings.memory_evidence_empty_query_include_rolling:
return _empty_query_bundle_sync(session, user_id, top_k)
return dict(EMPTY_EVIDENCE_BUNDLE)
q = query.strip()
chunk_rows = search_chunks_fts_sync(session, user_id, q, top_k)
relevant_chunks = [
{"id": r["id"], "content": r["content"], "chunk_index": r["chunk_index"]}
for r in chunk_rows
]
meta = fetch_evidence_metadata_sync(session, user_id, q, top_k)
return {
"relevant_chunks": relevant_chunks,
**meta,
}
async def retrieve_evidence_bundle_async(
db: AsyncSession,
user_id: str,
query: str,
*,
top_k: int = 10,
merged_chunk_dicts: list[dict],
) -> dict:
"""
异步路径chunk 已由调用方 RRF 合并此处只拼元数据
merged_chunk_dicts: [{"id","content","chunk_index"}, ...]
"""
if not query or not query.strip():
if settings.memory_evidence_empty_query_include_rolling:
return await _empty_query_bundle_async(db, user_id, top_k)
return dict(EMPTY_EVIDENCE_BUNDLE)
q = query.strip()
meta = await fetch_evidence_metadata_async(db, user_id, q, top_k)
return {
"relevant_chunks": merged_chunk_dicts,
**meta,
}