Files
life-echo/api/app/features/memory/summarizer.py
Kevin e4bf0710c7 feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路
数据库
- 新增迁移 0003:timeline_events.memory_source_id 外键 → memory_sources,便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化(摘要/事实/时间线),可配置开关与最大字符数
- 新增证据包组装:合并 chunk、摘要、事实、时间线、故事等检索结果;支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展;文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG;分段 ASR 日志与空音频处理;转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符,与分段逻辑一致

后端 - Agent
- reply_limits:按 [SPLIT] 与段落拆段,并保证非空 fallback,供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id;任务成功结?
2026-03-27 16:24:43 +08:00

132 lines
4.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""会话摘要与滚动摘要LLM + JSON"""
from __future__ import annotations
from typing import Any
from app.core.langchain_llm import ainvoke_json_object, invoke_json_object
from app.core.logging import get_logger
from app.features.memory.llm_schemas import (
RollingSummaryPayload,
SessionSummaryPayload,
parse_json_payload,
)
logger = get_logger(__name__)
def _max_input_chars() -> int:
from app.core.config import settings
return settings.memory_enrichment_max_chars
def generate_session_summary_sync(llm: Any, chunk_texts: list[str]) -> str:
"""为本批块生成 session 级短摘要。"""
if not llm:
return ""
lim = _max_input_chars()
combined = "\n\n".join(t for t in chunk_texts if t).strip()[:lim]
if not combined:
return ""
prompt = (
"用 28 句中文概括下列口述/对话要点,不编造、不评价。只输出 JSON"
'{"summary":"..."}\n\n文本:\n'
f"{combined}"
)
try:
raw = invoke_json_object(
llm, prompt, max_tokens=2048, agent="memory.session_summary_sync"
)
parsed = parse_json_payload(raw, SessionSummaryPayload)
if parsed is None:
return ""
return str(parsed.summary or "").strip()
except (TypeError, ValueError) as e:
logger.warning("generate_session_summary_sync 失败: {}", e)
return ""
async def generate_session_summary_async(llm: Any, chunk_texts: list[str]) -> str:
if not llm:
return ""
lim = _max_input_chars()
combined = "\n\n".join(t for t in chunk_texts if t).strip()[:lim]
if not combined:
return ""
prompt = (
"用 28 句中文概括下列口述/对话要点,不编造、不评价。只输出 JSON"
'{"summary":"..."}\n\n文本:\n'
f"{combined}"
)
try:
raw = await ainvoke_json_object(
llm, prompt, max_tokens=2048, agent="memory.session_summary_async"
)
parsed = parse_json_payload(raw, SessionSummaryPayload)
if parsed is None:
return ""
return str(parsed.summary or "").strip()
except (TypeError, ValueError) as e:
logger.warning("generate_session_summary_async 失败: {}", e)
return ""
def generate_rolling_summary_sync(
llm: Any, existing_summary: str | None, new_chunk_texts: list[str]
) -> str:
"""合并已有滚动摘要与新材料。"""
if not llm:
return (existing_summary or "").strip()
lim = _max_input_chars()
new_t = "\n\n".join(t for t in new_chunk_texts if t).strip()[:lim]
if not new_t and not (existing_summary or "").strip():
return ""
ex = (existing_summary or "").strip()[:lim]
prompt = (
"将「已有滚动摘要」与「新材料」合并为更新后的滚动摘要(中文,段落)。"
"保留人物与时间线索;不编造;可省略无关细节。\n"
'只输出 JSON{"rolling_summary":"..."}\n\n'
f"【已有摘要】\n{ex}\n\n【新材料】\n{new_t}"
)
try:
raw = invoke_json_object(
llm, prompt, max_tokens=3072, agent="memory.rolling_summary_sync"
)
parsed = parse_json_payload(raw, RollingSummaryPayload)
if parsed is None:
return (existing_summary or "").strip()
return str(parsed.rolling_summary or "").strip()
except (TypeError, ValueError) as e:
logger.warning("generate_rolling_summary_sync 失败: {}", e)
return (existing_summary or "").strip()
async def generate_rolling_summary_async(
llm: Any, existing_summary: str | None, new_chunk_texts: list[str]
) -> str:
if not llm:
return (existing_summary or "").strip()
lim = _max_input_chars()
new_t = "\n\n".join(t for t in new_chunk_texts if t).strip()[:lim]
if not new_t and not (existing_summary or "").strip():
return ""
ex = (existing_summary or "").strip()[:lim]
prompt = (
"将「已有滚动摘要」与「新材料」合并为更新后的滚动摘要(中文,段落)。"
"保留人物与时间线索;不编造。\n"
'只输出 JSON{"rolling_summary":"..."}\n\n'
f"【已有摘要】\n{ex}\n\n【新材料】\n{new_t}"
)
try:
raw = await ainvoke_json_object(
llm, prompt, max_tokens=3072, agent="memory.rolling_summary_async"
)
parsed = parse_json_payload(raw, RollingSummaryPayload)
if parsed is None:
return (existing_summary or "").strip()
return str(parsed.rolling_summary or "").strip()
except (TypeError, ValueError) as e:
logger.warning("generate_rolling_summary_async 失败: {}", e)
return (existing_summary or "").strip()