数据库 - 新增迁移 0003:timeline_events.memory_source_id 外键 → memory_sources,便于按 ingest 源做时间线幂等 后端 - 记忆 - 新增 ingest 后 LLM 富化(摘要/事实/时间线),可配置开关与最大字符数 - 新增证据包组装:合并 chunk、摘要、事实、时间线、故事等检索结果;支持空 query 时是否仍带 rolling 等开关 - repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展;文档 memory-retrieval.md 更新 后端 - 对话 WS - 增加 PING/PONG;分段 ASR 日志与空音频处理;转写失败与「无助手回复」错误提示更明确 - 助手多段回复持久化使用统一分隔符,与分段逻辑一致 后端 - Agent - reply_limits:按 [SPLIT] 与段落拆段,并保证非空 fallback,供 WS 与 TTS 多段下发 后端 - 回忆录任务 - transcript ingest 记录 source_id;任务成功结?
77 lines
2.7 KiB
Python
77 lines
2.7 KiB
Python
"""由已抽取事实生成时间线事件(LLM + JSON)。"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import json
|
||
from typing import Any
|
||
|
||
from app.core.langchain_llm import ainvoke_json_object, invoke_json_object
|
||
from app.core.logging import get_logger
|
||
from app.features.memory.llm_schemas import (
|
||
TimelineEventsPayload,
|
||
parse_json_payload,
|
||
timeline_payload_to_dicts,
|
||
)
|
||
|
||
logger = get_logger(__name__)
|
||
|
||
MAX_FACTS_JSON = 20000
|
||
|
||
|
||
def build_timeline_events_from_facts_sync(llm: Any, facts: list[dict]) -> list[dict]:
|
||
"""facts 须含 id 字段(已落库)。"""
|
||
if not llm or not facts:
|
||
return []
|
||
payload = json.dumps(facts, ensure_ascii=False)[:MAX_FACTS_JSON]
|
||
prompt = (
|
||
"根据下列事实(含 id)生成时间线事件,用于回忆录展示。\n"
|
||
"每条含 event_year(整数或 null)、event_date(可选)、title、description、"
|
||
"source_fact_ids(必须来自输入中的 id 列表)。\n"
|
||
'只输出 JSON:{"events":[...]},无事件则 {"events":[]}。最多 15 条。\n\n'
|
||
f"{payload}"
|
||
)
|
||
try:
|
||
raw = invoke_json_object(
|
||
llm, prompt, max_tokens=4096, agent="memory.timeline_events_sync"
|
||
)
|
||
parsed = parse_json_payload(raw, TimelineEventsPayload)
|
||
if parsed is None:
|
||
return []
|
||
return timeline_payload_to_dicts(parsed)
|
||
except (TypeError, ValueError) as e:
|
||
logger.warning("build_timeline_events_from_facts_sync 失败: {}", e)
|
||
return []
|
||
|
||
|
||
async def build_timeline_events_from_facts_async(
|
||
llm: Any, facts: list[dict]
|
||
) -> list[dict]:
|
||
if not llm or not facts:
|
||
return []
|
||
payload = json.dumps(facts, ensure_ascii=False)[:MAX_FACTS_JSON]
|
||
prompt = (
|
||
"根据下列事实(含 id)生成时间线事件。\n"
|
||
"每条含 event_year、event_date、title、description、source_fact_ids(来自输入 id)。\n"
|
||
'只输出 JSON:{"events":[...]}。\n\n'
|
||
f"{payload}"
|
||
)
|
||
try:
|
||
raw = await ainvoke_json_object(
|
||
llm, prompt, max_tokens=4096, agent="memory.timeline_events_async"
|
||
)
|
||
parsed = parse_json_payload(raw, TimelineEventsPayload)
|
||
if parsed is None:
|
||
return []
|
||
return timeline_payload_to_dicts(parsed)
|
||
except (TypeError, ValueError) as e:
|
||
logger.warning("build_timeline_events_from_facts_async 失败: {}", e)
|
||
return []
|
||
|
||
|
||
async def build_timeline_events(facts: list[dict]) -> list[dict]:
|
||
"""兼容旧接口。"""
|
||
from app.core.dependencies import get_llm_provider
|
||
|
||
llm = get_llm_provider().langchain_llm
|
||
return await build_timeline_events_from_facts_async(llm, facts)
|