feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003:timeline_events.memory_source_id 外键 → memory_sources,便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化(摘要/事实/时间线),可配置开关与最大字符数
- 新增证据包组装:合并 chunk、摘要、事实、时间线、故事等检索结果;支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展;文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG;分段 ASR 日志与空音频处理;转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符,与分段逻辑一致

后端 - Agent
- reply_limits:按 [SPLIT] 与段落拆段,并保证非空 fallback,供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id;任务成功结?
This commit is contained in:
Kevin
2026-03-27 16:01:28 +08:00
parent 1374f6e8f5
commit e4bf0710c7
70 changed files with 3404 additions and 557 deletions

View File

@@ -2,6 +2,7 @@
MemoryService — conversation / memoir 的统一门面。
- ingest_transcript: transcript -> memory_sources, chunks, embedding, FTS
- ingest 后可选LLM 富化session/rolling 摘要、事实、时间线)
- retrieve: 委托 HybridRetriever 返回 evidence bundleFTS + 可选向量 RRF
Celery 侧使用 `ingest_transcript_sync` + `retrieve_evidence_sync`,与异步路径差异见
@@ -10,15 +11,22 @@ Celery 侧使用 `ingest_transcript_sync` + `retrieve_evidence_sync`,与异步
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.logging import get_logger
from app.features.memory.chunker import chunk_transcript
from app.features.memory.schemas import EvidenceBundle
from app.features.memory.repo import (
create_chunk,
create_curation_action,
create_source,
set_chunk_excluded,
set_memory_fact_status,
update_chunk_embedding,
update_chunk_fts,
)
from app.ports.embedding import EmbeddingProvider
logger = get_logger(__name__)
class MemoryService:
def __init__(
@@ -75,15 +83,97 @@ class MemoryService:
if emb:
await update_chunk_embedding(self._db, chunk_id, emb)
try:
from app.core.config import settings
from app.core.dependencies import get_llm_provider
from app.features.memory.enrichment import enrich_memory_after_ingest_async
if settings.memory_enrichment_enabled:
llm = get_llm_provider().langchain_llm
await enrich_memory_after_ingest_async(
self._db, user_id, source.id, llm
)
except Exception as e:
logger.warning(
"memory enrichment 跳过: {} exc_type={}", e, type(e).__name__
)
await self._db.commit()
return source.id
async def retrieve(self, user_id: str, query: str, *, top_k: int = 10) -> dict:
async def retrieve(
self, user_id: str, query: str, *, top_k: int = 10
) -> EvidenceBundle:
"""Retrieve relevant evidence. 委托 HybridRetriever。"""
from app.features.memory.retriever import HybridRetriever
retriever = HybridRetriever(self._db, embedding_provider=self._embedding)
return await retriever.retrieve(user_id=user_id, query=query, top_k=top_k)
raw = await retriever.retrieve(user_id=user_id, query=query, top_k=top_k)
return EvidenceBundle.model_validate(raw)
async def exclude_chunk(
self, user_id: str, chunk_id: str, *, reason: str = ""
) -> bool:
ok = await set_chunk_excluded(self._db, chunk_id, user_id, True)
if not ok:
return False
await create_curation_action(
self._db,
user_id=user_id,
action_type="exclude",
target_type="chunk",
target_id=chunk_id,
details={"reason": reason} if reason else None,
)
await self._db.commit()
return True
async def restore_chunk(self, user_id: str, chunk_id: str) -> bool:
ok = await set_chunk_excluded(self._db, chunk_id, user_id, False)
if not ok:
return False
await create_curation_action(
self._db,
user_id=user_id,
action_type="restore",
target_type="chunk",
target_id=chunk_id,
details=None,
)
await self._db.commit()
return True
async def confirm_fact(self, user_id: str, fact_id: str) -> bool:
ok = await set_memory_fact_status(self._db, fact_id, user_id, "confirmed")
if not ok:
return False
await create_curation_action(
self._db,
user_id=user_id,
action_type="confirm",
target_type="fact",
target_id=fact_id,
details=None,
)
await self._db.commit()
return True
async def reject_fact(
self, user_id: str, fact_id: str, *, reason: str = ""
) -> bool:
ok = await set_memory_fact_status(self._db, fact_id, user_id, "rejected")
if not ok:
return False
await create_curation_action(
self._db,
user_id=user_id,
action_type="reject",
target_type="fact",
target_id=fact_id,
details={"reason": reason} if reason else None,
)
await self._db.commit()
return ok
def ingest_transcript_sync(
@@ -128,5 +218,16 @@ def ingest_transcript_sync(
session.flush()
update_chunk_fts_sync(session, chunk.id)
try:
from app.core.config import settings
from app.features.memory.enrichment import enrich_memory_after_ingest_sync
if settings.memory_enrichment_enabled:
enrich_memory_after_ingest_sync(session, user_id, source.id, llm=None)
except Exception as e:
logger.warning(
"memory enrichment 跳过(sync): {} exc_type={}", e, type(e).__name__
)
session.commit()
return source.id