feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路
数据库 - 新增迁移 0003:timeline_events.memory_source_id 外键 → memory_sources,便于按 ingest 源做时间线幂等 后端 - 记忆 - 新增 ingest 后 LLM 富化(摘要/事实/时间线),可配置开关与最大字符数 - 新增证据包组装:合并 chunk、摘要、事实、时间线、故事等检索结果;支持空 query 时是否仍带 rolling 等开关 - repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展;文档 memory-retrieval.md 更新 后端 - 对话 WS - 增加 PING/PONG;分段 ASR 日志与空音频处理;转写失败与「无助手回复」错误提示更明确 - 助手多段回复持久化使用统一分隔符,与分段逻辑一致 后端 - Agent - reply_limits:按 [SPLIT] 与段落拆段,并保证非空 fallback,供 WS 与 TTS 多段下发 后端 - 回忆录任务 - transcript ingest 记录 source_id;任务成功结?
This commit is contained in:
@@ -2,12 +2,8 @@
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.features.memory.repo import (
|
||||
get_facts_for_user,
|
||||
get_timeline_events_for_user,
|
||||
search_chunks_fts,
|
||||
search_chunks_vector,
|
||||
)
|
||||
from app.features.memory.evidence import retrieve_evidence_bundle_async
|
||||
from app.features.memory.repo import search_chunks_fts, search_chunks_vector
|
||||
from app.ports.embedding import EmbeddingProvider
|
||||
|
||||
|
||||
@@ -44,24 +40,31 @@ class HybridRetriever:
|
||||
"""
|
||||
Return evidence bundle:
|
||||
{relevant_chunks, relevant_summaries, relevant_facts, timeline_hints, relevant_stories}
|
||||
|
||||
`relevant_summaries` / `relevant_stories` 当前多为占位空列表;叙事 prompt 仅应依赖
|
||||
已实现填充的字段(见 `format_evidence_chunks_for_prompt`)。
|
||||
"""
|
||||
if not query.strip():
|
||||
return await retrieve_evidence_bundle_async(
|
||||
self._db,
|
||||
user_id,
|
||||
query,
|
||||
top_k=top_k,
|
||||
merged_chunk_dicts=[],
|
||||
)
|
||||
|
||||
q = query.strip()
|
||||
fts_chunks = await search_chunks_fts(
|
||||
self._db, user_id=user_id, query=query, limit=top_k * 2
|
||||
)
|
||||
|
||||
vector_chunks: list[dict] = []
|
||||
if self._embedding and query.strip():
|
||||
q_emb = await self._embedding.embed_text(query.strip())
|
||||
if self._embedding and q:
|
||||
q_emb = await self._embedding.embed_text(q)
|
||||
if q_emb:
|
||||
vector_chunks = await search_chunks_vector(
|
||||
self._db, user_id=user_id, query_embedding=q_emb, limit=top_k * 2
|
||||
)
|
||||
|
||||
merged = _rrf_merge(fts_chunks, vector_chunks)[:top_k]
|
||||
relevant_chunks = [
|
||||
merged_chunk_dicts = [
|
||||
{
|
||||
"id": c["id"],
|
||||
"content": c["content"],
|
||||
@@ -70,36 +73,10 @@ class HybridRetriever:
|
||||
for c in merged
|
||||
]
|
||||
|
||||
facts = await get_facts_for_user(self._db, user_id=user_id, limit=top_k)
|
||||
relevant_facts = [
|
||||
{
|
||||
"id": f.id,
|
||||
"fact_type": f.fact_type,
|
||||
"subject": f.subject,
|
||||
"predicate": f.predicate,
|
||||
"object_json": f.object_json,
|
||||
}
|
||||
for f in facts
|
||||
]
|
||||
|
||||
events = await get_timeline_events_for_user(
|
||||
self._db, user_id=user_id, limit=top_k
|
||||
return await retrieve_evidence_bundle_async(
|
||||
self._db,
|
||||
user_id,
|
||||
query,
|
||||
top_k=top_k,
|
||||
merged_chunk_dicts=merged_chunk_dicts,
|
||||
)
|
||||
timeline_hints = [
|
||||
{
|
||||
"id": e.id,
|
||||
"event_year": e.event_year,
|
||||
"event_date": e.event_date,
|
||||
"title": e.title,
|
||||
"description": e.description,
|
||||
}
|
||||
for e in events
|
||||
]
|
||||
|
||||
return {
|
||||
"relevant_chunks": relevant_chunks,
|
||||
"relevant_summaries": [],
|
||||
"relevant_facts": relevant_facts,
|
||||
"timeline_hints": timeline_hints,
|
||||
"relevant_stories": [],
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user