Files
life-echo/api/app/features/memory/evidence.py
Kevin e4bf0710c7 feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路
数据库
- 新增迁移 0003:timeline_events.memory_source_id 外键 → memory_sources,便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化(摘要/事实/时间线),可配置开关与最大字符数
- 新增证据包组装:合并 chunk、摘要、事实、时间线、故事等检索结果;支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展;文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG;分段 ASR 日志与空音频处理;转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符,与分段逻辑一致

后端 - Agent
- reply_limits:按 [SPLIT] 与段落拆段,并保证非空 fallback,供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id;任务成功结?
2026-03-27 16:24:43 +08:00

245 lines
7.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
证据包组装:跨 memory + story 的检索结果合并(业务层,非纯 repo
Celery 使用 sync`HybridRetriever` 使用 async + RRF chunk 合并。
"""
from __future__ import annotations
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import Session
from app.core.config import settings
from app.features.memory.repo import (
list_summaries_for_evidence_async,
list_summaries_for_evidence_sync,
search_chunks_fts,
search_chunks_fts_sync,
search_facts_for_user_async,
search_facts_for_user_sync,
search_timeline_events_for_user_async,
search_timeline_events_for_user_sync,
)
from app.features.story.repo import (
list_recent_stories_for_evidence,
list_recent_stories_for_evidence_sync,
)
EMPTY_EVIDENCE_BUNDLE: dict = {
"relevant_chunks": [],
"relevant_summaries": [],
"relevant_facts": [],
"timeline_hints": [],
"relevant_stories": [],
}
def _facts_to_dicts(facts) -> list[dict]:
return [
{
"id": f.id,
"fact_type": f.fact_type,
"subject": f.subject,
"predicate": f.predicate,
"object_json": f.object_json,
}
for f in facts
]
def _timeline_to_dicts(events) -> list[dict]:
return [
{
"id": e.id,
"event_year": e.event_year,
"event_date": e.event_date,
"title": e.title,
"description": e.description,
}
for e in events
]
def _stories_to_dicts(story_rows) -> list[dict]:
return [
{
"id": s.id,
"title": s.title,
"summary": s.summary,
"stage": s.stage,
"story_type": s.story_type,
}
for s in story_rows
]
def fetch_evidence_metadata_sync(
session: Session, user_id: str, q: str, top_k: int
) -> dict:
"""非 chunk 证据摘要、事实、时间线、故事sync"""
facts = search_facts_for_user_sync(session, user_id, q, top_k)
events = search_timeline_events_for_user_sync(session, user_id, q, top_k)
relevant_summaries = list_summaries_for_evidence_sync(
session, user_id=user_id, q=q, limit=top_k
)
story_rows = list_recent_stories_for_evidence_sync(
session, user_id, query=q, limit=top_k
)
return {
"relevant_facts": _facts_to_dicts(facts),
"timeline_hints": _timeline_to_dicts(events),
"relevant_summaries": relevant_summaries,
"relevant_stories": _stories_to_dicts(story_rows),
}
async def fetch_evidence_metadata_async(
db: AsyncSession, user_id: str, q: str, top_k: int
) -> dict:
"""非 chunk 证据async"""
facts = await search_facts_for_user_async(db, user_id, q, top_k)
events = await search_timeline_events_for_user_async(db, user_id, q, top_k)
relevant_summaries = await list_summaries_for_evidence_async(
db, user_id=user_id, q=q, limit=top_k
)
story_rows = await list_recent_stories_for_evidence(
db, user_id=user_id, query=q, limit=top_k
)
return {
"relevant_facts": _facts_to_dicts(facts),
"timeline_hints": _timeline_to_dicts(events),
"relevant_summaries": relevant_summaries,
"relevant_stories": _stories_to_dicts(story_rows),
}
def _empty_query_bundle_sync(session: Session, user_id: str, top_k: int) -> dict:
"""无 FTS query 时的「浏览」降级rolling 摘要 + 事实/时间线 fallback。"""
from app.features.memory.models import MemorySummary
from sqlalchemy import select
from app.features.memory.repo import (
get_facts_for_user_sync,
get_timeline_events_for_user_sync,
)
rolling = (
session.execute(
select(MemorySummary)
.where(
MemorySummary.user_id == user_id,
MemorySummary.summary_type == "rolling",
)
.order_by(MemorySummary.updated_at.desc())
.limit(1)
)
.unique()
.scalar_one_or_none()
)
summaries = []
if rolling:
summaries = [
{
"id": rolling.id,
"summary_type": rolling.summary_type,
"content": rolling.content,
"source_chunk_ids": rolling.source_chunk_ids,
}
]
facts = get_facts_for_user_sync(session, user_id, top_k)
events = get_timeline_events_for_user_sync(session, user_id, top_k)
return {
"relevant_chunks": [],
"relevant_summaries": summaries,
"relevant_facts": _facts_to_dicts(facts),
"timeline_hints": _timeline_to_dicts(events),
"relevant_stories": [],
}
async def _empty_query_bundle_async(db: AsyncSession, user_id: str, top_k: int) -> dict:
from sqlalchemy import select
from app.features.memory.models import MemorySummary
from app.features.memory.repo import (
get_facts_for_user,
get_timeline_events_for_user,
)
roll_stmt = (
select(MemorySummary)
.where(
MemorySummary.user_id == user_id,
MemorySummary.summary_type == "rolling",
)
.order_by(MemorySummary.updated_at.desc())
.limit(1)
)
r_result = await db.execute(roll_stmt)
rolling = r_result.unique().scalar_one_or_none()
summaries = []
if rolling:
summaries = [
{
"id": rolling.id,
"summary_type": rolling.summary_type,
"content": rolling.content,
"source_chunk_ids": rolling.source_chunk_ids,
}
]
facts = await get_facts_for_user(db, user_id=user_id, limit=top_k)
events = await get_timeline_events_for_user(db, user_id=user_id, limit=top_k)
return {
"relevant_chunks": [],
"relevant_summaries": summaries,
"relevant_facts": _facts_to_dicts(facts),
"timeline_hints": _timeline_to_dicts(events),
"relevant_stories": [],
}
def retrieve_evidence_bundle_sync(
session: Session, user_id: str, query: str, *, top_k: int = 10
) -> dict:
"""Celery / 叙事流水线FTS-only chunks + 元数据。"""
if not query or not query.strip():
if settings.memory_evidence_empty_query_include_rolling:
return _empty_query_bundle_sync(session, user_id, top_k)
return dict(EMPTY_EVIDENCE_BUNDLE)
q = query.strip()
chunk_rows = search_chunks_fts_sync(session, user_id, q, top_k)
relevant_chunks = [
{"id": r["id"], "content": r["content"], "chunk_index": r["chunk_index"]}
for r in chunk_rows
]
meta = fetch_evidence_metadata_sync(session, user_id, q, top_k)
return {
"relevant_chunks": relevant_chunks,
**meta,
}
async def retrieve_evidence_bundle_async(
db: AsyncSession,
user_id: str,
query: str,
*,
top_k: int = 10,
merged_chunk_dicts: list[dict],
) -> dict:
"""
异步路径chunk 已由调用方 RRF 合并;此处只拼元数据。
merged_chunk_dicts: [{"id","content","chunk_index"}, ...]
"""
if not query or not query.strip():
if settings.memory_evidence_empty_query_include_rolling:
return await _empty_query_bundle_async(db, user_id, top_k)
return dict(EMPTY_EVIDENCE_BUNDLE)
q = query.strip()
meta = await fetch_evidence_metadata_async(db, user_id, q, top_k)
return {
"relevant_chunks": merged_chunk_dicts,
**meta,
}