2026-03-27 16:01:28 +08:00
|
|
|
|
"""
|
|
|
|
|
|
证据包组装:跨 memory + story 的检索结果合并(业务层,非纯 repo)。
|
|
|
|
|
|
|
2026-04-03 10:12:59 +08:00
|
|
|
|
权威层级(可靠性 hardening):
|
|
|
|
|
|
- **Chunk 原文**(未 excluded)为首要证据;rolling 摘要/故事摘录为便利视图,不得压过冲突的 chunk。
|
|
|
|
|
|
- **MemoryFact**:`confirmed` 为检索默认集;`candidate` 可被上游提升;`stale` 由 compaction 等标出,检索时应排除。
|
|
|
|
|
|
- 事实 FTS 无命中时是否退回「最近事实」由 `memory_fact_search_use_recent_fallback` 控制(默认可避免串台)。
|
|
|
|
|
|
|
2026-03-27 16:01:28 +08:00
|
|
|
|
Celery 使用 sync;`HybridRetriever` 使用 async + RRF chunk 合并。
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
|
|
from sqlalchemy.orm import Session
|
|
|
|
|
|
|
|
|
|
|
|
from app.core.config import settings
|
|
|
|
|
|
from app.features.memory.repo import (
|
|
|
|
|
|
list_summaries_for_evidence_async,
|
|
|
|
|
|
list_summaries_for_evidence_sync,
|
|
|
|
|
|
search_chunks_fts,
|
|
|
|
|
|
search_chunks_fts_sync,
|
|
|
|
|
|
search_facts_for_user_async,
|
|
|
|
|
|
search_facts_for_user_sync,
|
|
|
|
|
|
search_timeline_events_for_user_async,
|
|
|
|
|
|
search_timeline_events_for_user_sync,
|
|
|
|
|
|
)
|
|
|
|
|
|
from app.features.story.repo import (
|
|
|
|
|
|
list_recent_stories_for_evidence,
|
|
|
|
|
|
list_recent_stories_for_evidence_sync,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
EMPTY_EVIDENCE_BUNDLE: dict = {
|
|
|
|
|
|
"relevant_chunks": [],
|
|
|
|
|
|
"relevant_summaries": [],
|
|
|
|
|
|
"relevant_facts": [],
|
|
|
|
|
|
"timeline_hints": [],
|
|
|
|
|
|
"relevant_stories": [],
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _facts_to_dicts(facts) -> list[dict]:
|
|
|
|
|
|
return [
|
|
|
|
|
|
{
|
|
|
|
|
|
"id": f.id,
|
|
|
|
|
|
"fact_type": f.fact_type,
|
|
|
|
|
|
"subject": f.subject,
|
|
|
|
|
|
"predicate": f.predicate,
|
|
|
|
|
|
"object_json": f.object_json,
|
|
|
|
|
|
}
|
|
|
|
|
|
for f in facts
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _timeline_to_dicts(events) -> list[dict]:
|
|
|
|
|
|
return [
|
|
|
|
|
|
{
|
|
|
|
|
|
"id": e.id,
|
|
|
|
|
|
"event_year": e.event_year,
|
|
|
|
|
|
"event_date": e.event_date,
|
|
|
|
|
|
"title": e.title,
|
|
|
|
|
|
"description": e.description,
|
|
|
|
|
|
}
|
|
|
|
|
|
for e in events
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _stories_to_dicts(story_rows) -> list[dict]:
|
|
|
|
|
|
return [
|
|
|
|
|
|
{
|
|
|
|
|
|
"id": s.id,
|
|
|
|
|
|
"title": s.title,
|
|
|
|
|
|
"summary": s.summary,
|
|
|
|
|
|
"stage": s.stage,
|
|
|
|
|
|
"story_type": s.story_type,
|
|
|
|
|
|
}
|
|
|
|
|
|
for s in story_rows
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def fetch_evidence_metadata_sync(
|
|
|
|
|
|
session: Session, user_id: str, q: str, top_k: int
|
|
|
|
|
|
) -> dict:
|
|
|
|
|
|
"""非 chunk 证据:摘要、事实、时间线、故事(sync)。"""
|
|
|
|
|
|
facts = search_facts_for_user_sync(session, user_id, q, top_k)
|
|
|
|
|
|
events = search_timeline_events_for_user_sync(session, user_id, q, top_k)
|
|
|
|
|
|
relevant_summaries = list_summaries_for_evidence_sync(
|
|
|
|
|
|
session, user_id=user_id, q=q, limit=top_k
|
|
|
|
|
|
)
|
|
|
|
|
|
story_rows = list_recent_stories_for_evidence_sync(
|
|
|
|
|
|
session, user_id, query=q, limit=top_k
|
|
|
|
|
|
)
|
|
|
|
|
|
return {
|
|
|
|
|
|
"relevant_facts": _facts_to_dicts(facts),
|
|
|
|
|
|
"timeline_hints": _timeline_to_dicts(events),
|
|
|
|
|
|
"relevant_summaries": relevant_summaries,
|
|
|
|
|
|
"relevant_stories": _stories_to_dicts(story_rows),
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def fetch_evidence_metadata_async(
|
|
|
|
|
|
db: AsyncSession, user_id: str, q: str, top_k: int
|
|
|
|
|
|
) -> dict:
|
|
|
|
|
|
"""非 chunk 证据(async)。"""
|
|
|
|
|
|
facts = await search_facts_for_user_async(db, user_id, q, top_k)
|
|
|
|
|
|
events = await search_timeline_events_for_user_async(db, user_id, q, top_k)
|
|
|
|
|
|
relevant_summaries = await list_summaries_for_evidence_async(
|
|
|
|
|
|
db, user_id=user_id, q=q, limit=top_k
|
|
|
|
|
|
)
|
|
|
|
|
|
story_rows = await list_recent_stories_for_evidence(
|
|
|
|
|
|
db, user_id=user_id, query=q, limit=top_k
|
|
|
|
|
|
)
|
|
|
|
|
|
return {
|
|
|
|
|
|
"relevant_facts": _facts_to_dicts(facts),
|
|
|
|
|
|
"timeline_hints": _timeline_to_dicts(events),
|
|
|
|
|
|
"relevant_summaries": relevant_summaries,
|
|
|
|
|
|
"relevant_stories": _stories_to_dicts(story_rows),
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _empty_query_bundle_sync(session: Session, user_id: str, top_k: int) -> dict:
|
|
|
|
|
|
"""无 FTS query 时的「浏览」降级:rolling 摘要 + 事实/时间线 fallback。"""
|
|
|
|
|
|
from app.features.memory.models import MemorySummary
|
|
|
|
|
|
from sqlalchemy import select
|
|
|
|
|
|
|
|
|
|
|
|
from app.features.memory.repo import (
|
|
|
|
|
|
get_facts_for_user_sync,
|
|
|
|
|
|
get_timeline_events_for_user_sync,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
rolling = (
|
|
|
|
|
|
session.execute(
|
|
|
|
|
|
select(MemorySummary)
|
|
|
|
|
|
.where(
|
|
|
|
|
|
MemorySummary.user_id == user_id,
|
|
|
|
|
|
MemorySummary.summary_type == "rolling",
|
|
|
|
|
|
)
|
|
|
|
|
|
.order_by(MemorySummary.updated_at.desc())
|
|
|
|
|
|
.limit(1)
|
|
|
|
|
|
)
|
|
|
|
|
|
.unique()
|
|
|
|
|
|
.scalar_one_or_none()
|
|
|
|
|
|
)
|
|
|
|
|
|
summaries = []
|
|
|
|
|
|
if rolling:
|
|
|
|
|
|
summaries = [
|
|
|
|
|
|
{
|
|
|
|
|
|
"id": rolling.id,
|
|
|
|
|
|
"summary_type": rolling.summary_type,
|
|
|
|
|
|
"content": rolling.content,
|
|
|
|
|
|
"source_chunk_ids": rolling.source_chunk_ids,
|
|
|
|
|
|
}
|
|
|
|
|
|
]
|
|
|
|
|
|
facts = get_facts_for_user_sync(session, user_id, top_k)
|
|
|
|
|
|
events = get_timeline_events_for_user_sync(session, user_id, top_k)
|
|
|
|
|
|
return {
|
|
|
|
|
|
"relevant_chunks": [],
|
|
|
|
|
|
"relevant_summaries": summaries,
|
|
|
|
|
|
"relevant_facts": _facts_to_dicts(facts),
|
|
|
|
|
|
"timeline_hints": _timeline_to_dicts(events),
|
|
|
|
|
|
"relevant_stories": [],
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def _empty_query_bundle_async(db: AsyncSession, user_id: str, top_k: int) -> dict:
|
|
|
|
|
|
from sqlalchemy import select
|
|
|
|
|
|
|
|
|
|
|
|
from app.features.memory.models import MemorySummary
|
|
|
|
|
|
from app.features.memory.repo import (
|
|
|
|
|
|
get_facts_for_user,
|
|
|
|
|
|
get_timeline_events_for_user,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
roll_stmt = (
|
|
|
|
|
|
select(MemorySummary)
|
|
|
|
|
|
.where(
|
|
|
|
|
|
MemorySummary.user_id == user_id,
|
|
|
|
|
|
MemorySummary.summary_type == "rolling",
|
|
|
|
|
|
)
|
|
|
|
|
|
.order_by(MemorySummary.updated_at.desc())
|
|
|
|
|
|
.limit(1)
|
|
|
|
|
|
)
|
|
|
|
|
|
r_result = await db.execute(roll_stmt)
|
|
|
|
|
|
rolling = r_result.unique().scalar_one_or_none()
|
|
|
|
|
|
summaries = []
|
|
|
|
|
|
if rolling:
|
|
|
|
|
|
summaries = [
|
|
|
|
|
|
{
|
|
|
|
|
|
"id": rolling.id,
|
|
|
|
|
|
"summary_type": rolling.summary_type,
|
|
|
|
|
|
"content": rolling.content,
|
|
|
|
|
|
"source_chunk_ids": rolling.source_chunk_ids,
|
|
|
|
|
|
}
|
|
|
|
|
|
]
|
|
|
|
|
|
facts = await get_facts_for_user(db, user_id=user_id, limit=top_k)
|
|
|
|
|
|
events = await get_timeline_events_for_user(db, user_id=user_id, limit=top_k)
|
|
|
|
|
|
return {
|
|
|
|
|
|
"relevant_chunks": [],
|
|
|
|
|
|
"relevant_summaries": summaries,
|
|
|
|
|
|
"relevant_facts": _facts_to_dicts(facts),
|
|
|
|
|
|
"timeline_hints": _timeline_to_dicts(events),
|
|
|
|
|
|
"relevant_stories": [],
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def retrieve_evidence_bundle_sync(
|
|
|
|
|
|
session: Session, user_id: str, query: str, *, top_k: int = 10
|
|
|
|
|
|
) -> dict:
|
|
|
|
|
|
"""Celery / 叙事流水线:FTS-only chunks + 元数据。"""
|
|
|
|
|
|
if not query or not query.strip():
|
|
|
|
|
|
if settings.memory_evidence_empty_query_include_rolling:
|
|
|
|
|
|
return _empty_query_bundle_sync(session, user_id, top_k)
|
|
|
|
|
|
return dict(EMPTY_EVIDENCE_BUNDLE)
|
|
|
|
|
|
q = query.strip()
|
|
|
|
|
|
chunk_rows = search_chunks_fts_sync(session, user_id, q, top_k)
|
|
|
|
|
|
relevant_chunks = [
|
|
|
|
|
|
{"id": r["id"], "content": r["content"], "chunk_index": r["chunk_index"]}
|
|
|
|
|
|
for r in chunk_rows
|
|
|
|
|
|
]
|
|
|
|
|
|
meta = fetch_evidence_metadata_sync(session, user_id, q, top_k)
|
|
|
|
|
|
return {
|
|
|
|
|
|
"relevant_chunks": relevant_chunks,
|
|
|
|
|
|
**meta,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def retrieve_evidence_bundle_async(
|
|
|
|
|
|
db: AsyncSession,
|
|
|
|
|
|
user_id: str,
|
|
|
|
|
|
query: str,
|
|
|
|
|
|
*,
|
|
|
|
|
|
top_k: int = 10,
|
|
|
|
|
|
merged_chunk_dicts: list[dict],
|
|
|
|
|
|
) -> dict:
|
|
|
|
|
|
"""
|
|
|
|
|
|
异步路径:chunk 已由调用方 RRF 合并;此处只拼元数据。
|
|
|
|
|
|
|
|
|
|
|
|
merged_chunk_dicts: [{"id","content","chunk_index"}, ...]
|
|
|
|
|
|
"""
|
|
|
|
|
|
if not query or not query.strip():
|
|
|
|
|
|
if settings.memory_evidence_empty_query_include_rolling:
|
|
|
|
|
|
return await _empty_query_bundle_async(db, user_id, top_k)
|
|
|
|
|
|
return dict(EMPTY_EVIDENCE_BUNDLE)
|
|
|
|
|
|
q = query.strip()
|
|
|
|
|
|
meta = await fetch_evidence_metadata_async(db, user_id, q, top_k)
|
|
|
|
|
|
return {
|
|
|
|
|
|
"relevant_chunks": merged_chunk_dicts,
|
|
|
|
|
|
**meta,
|
|
|
|
|
|
}
|