数据库 - 新增迁移 0003:timeline_events.memory_source_id 外键 → memory_sources,便于按 ingest 源做时间线幂等 后端 - 记忆 - 新增 ingest 后 LLM 富化(摘要/事实/时间线),可配置开关与最大字符数 - 新增证据包组装:合并 chunk、摘要、事实、时间线、故事等检索结果;支持空 query 时是否仍带 rolling 等开关 - repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展;文档 memory-retrieval.md 更新 后端 - 对话 WS - 增加 PING/PONG;分段 ASR 日志与空音频处理;转写失败与「无助手回复」错误提示更明确 - 助手多段回复持久化使用统一分隔符,与分段逻辑一致 后端 - Agent - reply_limits:按 [SPLIT] 与段落拆段,并保证非空 fallback,供 WS 与 TTS 多段下发 后端 - 回忆录任务 - transcript ingest 记录 source_id;任务成功结?
245 lines
7.4 KiB
Python
245 lines
7.4 KiB
Python
"""
|
||
证据包组装:跨 memory + story 的检索结果合并(业务层,非纯 repo)。
|
||
|
||
Celery 使用 sync;`HybridRetriever` 使用 async + RRF chunk 合并。
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from sqlalchemy.ext.asyncio import AsyncSession
|
||
from sqlalchemy.orm import Session
|
||
|
||
from app.core.config import settings
|
||
from app.features.memory.repo import (
|
||
list_summaries_for_evidence_async,
|
||
list_summaries_for_evidence_sync,
|
||
search_chunks_fts,
|
||
search_chunks_fts_sync,
|
||
search_facts_for_user_async,
|
||
search_facts_for_user_sync,
|
||
search_timeline_events_for_user_async,
|
||
search_timeline_events_for_user_sync,
|
||
)
|
||
from app.features.story.repo import (
|
||
list_recent_stories_for_evidence,
|
||
list_recent_stories_for_evidence_sync,
|
||
)
|
||
|
||
EMPTY_EVIDENCE_BUNDLE: dict = {
|
||
"relevant_chunks": [],
|
||
"relevant_summaries": [],
|
||
"relevant_facts": [],
|
||
"timeline_hints": [],
|
||
"relevant_stories": [],
|
||
}
|
||
|
||
|
||
def _facts_to_dicts(facts) -> list[dict]:
|
||
return [
|
||
{
|
||
"id": f.id,
|
||
"fact_type": f.fact_type,
|
||
"subject": f.subject,
|
||
"predicate": f.predicate,
|
||
"object_json": f.object_json,
|
||
}
|
||
for f in facts
|
||
]
|
||
|
||
|
||
def _timeline_to_dicts(events) -> list[dict]:
|
||
return [
|
||
{
|
||
"id": e.id,
|
||
"event_year": e.event_year,
|
||
"event_date": e.event_date,
|
||
"title": e.title,
|
||
"description": e.description,
|
||
}
|
||
for e in events
|
||
]
|
||
|
||
|
||
def _stories_to_dicts(story_rows) -> list[dict]:
|
||
return [
|
||
{
|
||
"id": s.id,
|
||
"title": s.title,
|
||
"summary": s.summary,
|
||
"stage": s.stage,
|
||
"story_type": s.story_type,
|
||
}
|
||
for s in story_rows
|
||
]
|
||
|
||
|
||
def fetch_evidence_metadata_sync(
|
||
session: Session, user_id: str, q: str, top_k: int
|
||
) -> dict:
|
||
"""非 chunk 证据:摘要、事实、时间线、故事(sync)。"""
|
||
facts = search_facts_for_user_sync(session, user_id, q, top_k)
|
||
events = search_timeline_events_for_user_sync(session, user_id, q, top_k)
|
||
relevant_summaries = list_summaries_for_evidence_sync(
|
||
session, user_id=user_id, q=q, limit=top_k
|
||
)
|
||
story_rows = list_recent_stories_for_evidence_sync(
|
||
session, user_id, query=q, limit=top_k
|
||
)
|
||
return {
|
||
"relevant_facts": _facts_to_dicts(facts),
|
||
"timeline_hints": _timeline_to_dicts(events),
|
||
"relevant_summaries": relevant_summaries,
|
||
"relevant_stories": _stories_to_dicts(story_rows),
|
||
}
|
||
|
||
|
||
async def fetch_evidence_metadata_async(
|
||
db: AsyncSession, user_id: str, q: str, top_k: int
|
||
) -> dict:
|
||
"""非 chunk 证据(async)。"""
|
||
facts = await search_facts_for_user_async(db, user_id, q, top_k)
|
||
events = await search_timeline_events_for_user_async(db, user_id, q, top_k)
|
||
relevant_summaries = await list_summaries_for_evidence_async(
|
||
db, user_id=user_id, q=q, limit=top_k
|
||
)
|
||
story_rows = await list_recent_stories_for_evidence(
|
||
db, user_id=user_id, query=q, limit=top_k
|
||
)
|
||
return {
|
||
"relevant_facts": _facts_to_dicts(facts),
|
||
"timeline_hints": _timeline_to_dicts(events),
|
||
"relevant_summaries": relevant_summaries,
|
||
"relevant_stories": _stories_to_dicts(story_rows),
|
||
}
|
||
|
||
|
||
def _empty_query_bundle_sync(session: Session, user_id: str, top_k: int) -> dict:
|
||
"""无 FTS query 时的「浏览」降级:rolling 摘要 + 事实/时间线 fallback。"""
|
||
from app.features.memory.models import MemorySummary
|
||
from sqlalchemy import select
|
||
|
||
from app.features.memory.repo import (
|
||
get_facts_for_user_sync,
|
||
get_timeline_events_for_user_sync,
|
||
)
|
||
|
||
rolling = (
|
||
session.execute(
|
||
select(MemorySummary)
|
||
.where(
|
||
MemorySummary.user_id == user_id,
|
||
MemorySummary.summary_type == "rolling",
|
||
)
|
||
.order_by(MemorySummary.updated_at.desc())
|
||
.limit(1)
|
||
)
|
||
.unique()
|
||
.scalar_one_or_none()
|
||
)
|
||
summaries = []
|
||
if rolling:
|
||
summaries = [
|
||
{
|
||
"id": rolling.id,
|
||
"summary_type": rolling.summary_type,
|
||
"content": rolling.content,
|
||
"source_chunk_ids": rolling.source_chunk_ids,
|
||
}
|
||
]
|
||
facts = get_facts_for_user_sync(session, user_id, top_k)
|
||
events = get_timeline_events_for_user_sync(session, user_id, top_k)
|
||
return {
|
||
"relevant_chunks": [],
|
||
"relevant_summaries": summaries,
|
||
"relevant_facts": _facts_to_dicts(facts),
|
||
"timeline_hints": _timeline_to_dicts(events),
|
||
"relevant_stories": [],
|
||
}
|
||
|
||
|
||
async def _empty_query_bundle_async(db: AsyncSession, user_id: str, top_k: int) -> dict:
|
||
from sqlalchemy import select
|
||
|
||
from app.features.memory.models import MemorySummary
|
||
from app.features.memory.repo import (
|
||
get_facts_for_user,
|
||
get_timeline_events_for_user,
|
||
)
|
||
|
||
roll_stmt = (
|
||
select(MemorySummary)
|
||
.where(
|
||
MemorySummary.user_id == user_id,
|
||
MemorySummary.summary_type == "rolling",
|
||
)
|
||
.order_by(MemorySummary.updated_at.desc())
|
||
.limit(1)
|
||
)
|
||
r_result = await db.execute(roll_stmt)
|
||
rolling = r_result.unique().scalar_one_or_none()
|
||
summaries = []
|
||
if rolling:
|
||
summaries = [
|
||
{
|
||
"id": rolling.id,
|
||
"summary_type": rolling.summary_type,
|
||
"content": rolling.content,
|
||
"source_chunk_ids": rolling.source_chunk_ids,
|
||
}
|
||
]
|
||
facts = await get_facts_for_user(db, user_id=user_id, limit=top_k)
|
||
events = await get_timeline_events_for_user(db, user_id=user_id, limit=top_k)
|
||
return {
|
||
"relevant_chunks": [],
|
||
"relevant_summaries": summaries,
|
||
"relevant_facts": _facts_to_dicts(facts),
|
||
"timeline_hints": _timeline_to_dicts(events),
|
||
"relevant_stories": [],
|
||
}
|
||
|
||
|
||
def retrieve_evidence_bundle_sync(
|
||
session: Session, user_id: str, query: str, *, top_k: int = 10
|
||
) -> dict:
|
||
"""Celery / 叙事流水线:FTS-only chunks + 元数据。"""
|
||
if not query or not query.strip():
|
||
if settings.memory_evidence_empty_query_include_rolling:
|
||
return _empty_query_bundle_sync(session, user_id, top_k)
|
||
return dict(EMPTY_EVIDENCE_BUNDLE)
|
||
q = query.strip()
|
||
chunk_rows = search_chunks_fts_sync(session, user_id, q, top_k)
|
||
relevant_chunks = [
|
||
{"id": r["id"], "content": r["content"], "chunk_index": r["chunk_index"]}
|
||
for r in chunk_rows
|
||
]
|
||
meta = fetch_evidence_metadata_sync(session, user_id, q, top_k)
|
||
return {
|
||
"relevant_chunks": relevant_chunks,
|
||
**meta,
|
||
}
|
||
|
||
|
||
async def retrieve_evidence_bundle_async(
|
||
db: AsyncSession,
|
||
user_id: str,
|
||
query: str,
|
||
*,
|
||
top_k: int = 10,
|
||
merged_chunk_dicts: list[dict],
|
||
) -> dict:
|
||
"""
|
||
异步路径:chunk 已由调用方 RRF 合并;此处只拼元数据。
|
||
|
||
merged_chunk_dicts: [{"id","content","chunk_index"}, ...]
|
||
"""
|
||
if not query or not query.strip():
|
||
if settings.memory_evidence_empty_query_include_rolling:
|
||
return await _empty_query_bundle_async(db, user_id, top_k)
|
||
return dict(EMPTY_EVIDENCE_BUNDLE)
|
||
q = query.strip()
|
||
meta = await fetch_evidence_metadata_async(db, user_id, q, top_k)
|
||
return {
|
||
"relevant_chunks": merged_chunk_dicts,
|
||
**meta,
|
||
}
|