91 lines
2.5 KiB
Python
91 lines
2.5 KiB
Python
"""
|
||
证据包组装:跨 memory + story 的检索结果合并(业务层,非纯 repo)。
|
||
|
||
Memory evidence 只保留 async 单链路:chunk 原文为首要证据,结构化事实/
|
||
摘要/故事均按本次 query 命中进入 evidence,不再做 rolling/recent 历史降级。
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from sqlalchemy.ext.asyncio import AsyncSession
|
||
|
||
from app.features.memory.repo import (
|
||
list_summaries_for_evidence_async,
|
||
search_facts_for_user_async,
|
||
)
|
||
from app.features.story.repo import list_recent_stories_for_evidence
|
||
|
||
EMPTY_EVIDENCE_BUNDLE: dict = {
|
||
"relevant_chunks": [],
|
||
"relevant_summaries": [],
|
||
"relevant_facts": [],
|
||
"relevant_stories": [],
|
||
}
|
||
|
||
|
||
def _facts_to_dicts(facts) -> list[dict]:
|
||
return [
|
||
{
|
||
"id": f.id,
|
||
"fact_type": f.fact_type,
|
||
"subject": f.subject,
|
||
"predicate": f.predicate,
|
||
"object_json": f.object_json,
|
||
}
|
||
for f in facts
|
||
]
|
||
|
||
|
||
def _stories_to_dicts(story_rows) -> list[dict]:
|
||
return [
|
||
{
|
||
"id": s.id,
|
||
"title": s.title,
|
||
"summary": s.summary,
|
||
"stage": s.stage,
|
||
"story_type": s.story_type,
|
||
}
|
||
for s in story_rows
|
||
]
|
||
|
||
|
||
async def fetch_evidence_metadata_async(
|
||
db: AsyncSession, user_id: str, q: str, top_k: int
|
||
) -> dict:
|
||
"""非 chunk 证据(async)。"""
|
||
facts = await search_facts_for_user_async(db, user_id, q, top_k)
|
||
relevant_summaries = await list_summaries_for_evidence_async(
|
||
db, user_id=user_id, q=q, limit=top_k
|
||
)
|
||
story_rows = await list_recent_stories_for_evidence(
|
||
db, user_id=user_id, query=q, limit=top_k
|
||
)
|
||
return {
|
||
"relevant_facts": _facts_to_dicts(facts),
|
||
"relevant_summaries": relevant_summaries,
|
||
"relevant_stories": _stories_to_dicts(story_rows),
|
||
}
|
||
|
||
|
||
async def retrieve_evidence_bundle_async(
|
||
db: AsyncSession,
|
||
user_id: str,
|
||
query: str,
|
||
*,
|
||
top_k: int = 10,
|
||
merged_chunk_dicts: list[dict],
|
||
) -> dict:
|
||
"""
|
||
异步路径:chunk 已由调用方(如 HybridRetriever)向量检索填入;此处只拼元数据。
|
||
|
||
merged_chunk_dicts: [{"id","content","chunk_index"}, ...]
|
||
"""
|
||
if not query or not query.strip():
|
||
return dict(EMPTY_EVIDENCE_BUNDLE)
|
||
q = query.strip()
|
||
meta = await fetch_evidence_metadata_async(db, user_id, q, top_k)
|
||
return {
|
||
"relevant_chunks": merged_chunk_dicts,
|
||
**meta,
|
||
}
|