feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路
数据库 - 新增迁移 0003:timeline_events.memory_source_id 外键 → memory_sources,便于按 ingest 源做时间线幂等 后端 - 记忆 - 新增 ingest 后 LLM 富化(摘要/事实/时间线),可配置开关与最大字符数 - 新增证据包组装:合并 chunk、摘要、事实、时间线、故事等检索结果;支持空 query 时是否仍带 rolling 等开关 - repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展;文档 memory-retrieval.md 更新 后端 - 对话 WS - 增加 PING/PONG;分段 ASR 日志与空音频处理;转写失败与「无助手回复」错误提示更明确 - 助手多段回复持久化使用统一分隔符,与分段逻辑一致 后端 - Agent - reply_limits:按 [SPLIT] 与段落拆段,并保证非空 fallback,供 WS 与 TTS 多段下发 后端 - 回忆录任务 - transcript ingest 记录 source_id;任务成功结?
This commit is contained in:
@@ -1,6 +1,76 @@
|
||||
"""Chronology organization — build and update timeline events (skeleton)."""
|
||||
"""由已抽取事实生成时间线事件(LLM + JSON)。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
from app.core.langchain_llm import ainvoke_json_object, invoke_json_object
|
||||
from app.core.logging import get_logger
|
||||
from app.features.memory.llm_schemas import (
|
||||
TimelineEventsPayload,
|
||||
parse_json_payload,
|
||||
timeline_payload_to_dicts,
|
||||
)
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
MAX_FACTS_JSON = 20000
|
||||
|
||||
|
||||
def build_timeline_events_from_facts_sync(llm: Any, facts: list[dict]) -> list[dict]:
|
||||
"""facts 须含 id 字段(已落库)。"""
|
||||
if not llm or not facts:
|
||||
return []
|
||||
payload = json.dumps(facts, ensure_ascii=False)[:MAX_FACTS_JSON]
|
||||
prompt = (
|
||||
"根据下列事实(含 id)生成时间线事件,用于回忆录展示。\n"
|
||||
"每条含 event_year(整数或 null)、event_date(可选)、title、description、"
|
||||
"source_fact_ids(必须来自输入中的 id 列表)。\n"
|
||||
'只输出 JSON:{"events":[...]},无事件则 {"events":[]}。最多 15 条。\n\n'
|
||||
f"{payload}"
|
||||
)
|
||||
try:
|
||||
raw = invoke_json_object(
|
||||
llm, prompt, max_tokens=4096, agent="memory.timeline_events_sync"
|
||||
)
|
||||
parsed = parse_json_payload(raw, TimelineEventsPayload)
|
||||
if parsed is None:
|
||||
return []
|
||||
return timeline_payload_to_dicts(parsed)
|
||||
except (TypeError, ValueError) as e:
|
||||
logger.warning("build_timeline_events_from_facts_sync 失败: {}", e)
|
||||
return []
|
||||
|
||||
|
||||
async def build_timeline_events_from_facts_async(
|
||||
llm: Any, facts: list[dict]
|
||||
) -> list[dict]:
|
||||
if not llm or not facts:
|
||||
return []
|
||||
payload = json.dumps(facts, ensure_ascii=False)[:MAX_FACTS_JSON]
|
||||
prompt = (
|
||||
"根据下列事实(含 id)生成时间线事件。\n"
|
||||
"每条含 event_year、event_date、title、description、source_fact_ids(来自输入 id)。\n"
|
||||
'只输出 JSON:{"events":[...]}。\n\n'
|
||||
f"{payload}"
|
||||
)
|
||||
try:
|
||||
raw = await ainvoke_json_object(
|
||||
llm, prompt, max_tokens=4096, agent="memory.timeline_events_async"
|
||||
)
|
||||
parsed = parse_json_payload(raw, TimelineEventsPayload)
|
||||
if parsed is None:
|
||||
return []
|
||||
return timeline_payload_to_dicts(parsed)
|
||||
except (TypeError, ValueError) as e:
|
||||
logger.warning("build_timeline_events_from_facts_async 失败: {}", e)
|
||||
return []
|
||||
|
||||
|
||||
async def build_timeline_events(facts: list[dict]) -> list[dict]:
|
||||
"""Organize facts into chronological timeline events."""
|
||||
raise NotImplementedError
|
||||
"""兼容旧接口。"""
|
||||
from app.core.dependencies import get_llm_provider
|
||||
|
||||
llm = get_llm_provider().langchain_llm
|
||||
return await build_timeline_events_from_facts_async(llm, facts)
|
||||
|
||||
Reference in New Issue
Block a user