feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003:timeline_events.memory_source_id 外键 → memory_sources,便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化(摘要/事实/时间线),可配置开关与最大字符数
- 新增证据包组装:合并 chunk、摘要、事实、时间线、故事等检索结果;支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展;文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG;分段 ASR 日志与空音频处理;转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符,与分段逻辑一致

后端 - Agent
- reply_limits:按 [SPLIT] 与段落拆段,并保证非空 fallback,供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id;任务成功结?
This commit is contained in:
Kevin
2026-03-27 16:01:28 +08:00
parent 1374f6e8f5
commit e4bf0710c7
70 changed files with 3404 additions and 557 deletions

View File

@@ -26,7 +26,6 @@ from app.agents.memoir.story_route_agent import (
from app.agents.state_schema import MemoirStateSchema
from app.core.logging import get_logger
from app.features.memoir.cover_eligibility import chapter_needs_cover_enqueue
from app.features.memoir.helpers import _chapter_markdown
from app.features.memoir.memoir_images.settings import MemoirImageSettings
from app.features.memoir.models import Chapter
from app.features.memoir.narrative_to_markdown import narrative_to_markdown
@@ -46,26 +45,56 @@ from app.features.story.sync_write import (
logger = get_logger(__name__)
def _gate_narrative_fidelity(oral_text: str, narrative_raw: str, llm: Any) -> str:
"""叙事 JSON 忠实度检查;不通过则回退为单段口述正文"""
def _fidelity_fallback_json(oral: str, existing_canonical: str | None) -> str:
"""忠实度未通过时的安全回退:续写场景保留旧文 + 本段口述,避免只剩一句"""
o = (oral or "").strip()[:15000]
ex = (existing_canonical or "").strip()[:15000]
if ex and o:
return json.dumps(
{"paragraphs": [{"content": ex}, {"content": o}]},
ensure_ascii=False,
)
if ex:
return json.dumps(
{"paragraphs": [{"content": ex}]},
ensure_ascii=False,
)
return json.dumps(
{"paragraphs": [{"content": o}]},
ensure_ascii=False,
)
def _gate_narrative_fidelity(
oral_text: str,
narrative_raw: str,
llm: Any,
*,
existing_canonical: str | None = None,
) -> str:
"""叙事 JSON 忠实度检查;不通过则回退为口述正文(续写时保留已有故事 + 口述)。"""
from app.agents.memoir.fidelity_check_agent import FidelityCheckAgent
if not settings.memoir_fidelity_check_enabled or not llm:
return narrative_raw
agent = FidelityCheckAgent()
if agent.passes(oral_text=oral_text, narrative_json=narrative_raw, llm=llm):
ex = (existing_canonical or "").strip() or None
if agent.passes(
oral_text=oral_text,
narrative_json=narrative_raw,
llm=llm,
existing_canonical_markdown=ex,
):
return narrative_raw
logger.warning(
"event=fidelity_gate_fallback oral_len={}",
"event=fidelity_gate_fallback oral_len={} merge={}",
len((oral_text or "").strip()),
bool(ex),
)
o = (oral_text or "").strip()
if not o:
if not o and not ex:
return narrative_raw
return json.dumps(
{"paragraphs": [{"content": o[:15000]}]},
ensure_ascii=False,
)
return _fidelity_fallback_json(o, ex)
def _should_fallback_to_transcript(md: str, oral: str) -> bool:
@@ -84,6 +113,28 @@ def _should_fallback_to_transcript(md: str, oral: str) -> bool:
return len(m) < threshold
def _coalesce_story_markdown(
md: str,
oral: str,
existing_for_narrative: str,
) -> str:
"""落库前对齐正文:空输出或过短回退时,续写场景保留「已有故事 + 本段口述」。"""
o = (oral or "").strip()
ex = (existing_for_narrative or "").strip()
m = (md or "").strip()
if not m:
if ex and o:
return f"{ex}\n\n{o}"
if o:
return o
return ex
if o and _should_fallback_to_transcript(m, o):
if ex:
return f"{ex}\n\n{o}"
return o
return m
def _is_json_narrative(text: str) -> bool:
if not text or not text.strip():
return False
@@ -102,7 +153,6 @@ def _apply_narrative_fallbacks(
narrative_raw: str,
combined_unit_text: str,
existing_for_narrative: str,
existing_chapter_md: str,
*,
chapter_category: str,
) -> str:
@@ -130,22 +180,22 @@ def _apply_narrative_fallbacks(
)
return f"{existing_for_narrative}\n\n{combined_unit_text}"
if (
not existing_for_narrative
and existing_chapter_md
and not _is_json_narrative(narrative_raw)
and len(narrative_raw) < len(existing_chapter_md) * 0.8
):
logger.warning(
"event=narrative_fallback reason=chapter_length_anomaly action=append_transcript "
"chapter_category={}",
chapter_category,
)
return f"{existing_chapter_md}\n\n{combined_unit_text}"
# 禁止把「章节级 canonical」多故事拼接写进单条 Story会把全章正文塞进一个故事
# 且该 story 若挂多章会导致各章阅读视图串台。新建故事时宁可短,也不拼接 existing_chapter_md。
md_check = narrative_to_markdown(narrative_raw).strip()
oral = (combined_unit_text or "").strip()
ex_fb = (existing_for_narrative or "").strip()
if oral and _should_fallback_to_transcript(md_check, oral):
if ex_fb:
logger.warning(
"event=narrative_fallback reason=body_too_short_vs_oral_merge "
"chapter_category={} oral_len={} md_len={}",
chapter_category,
len(oral),
len(md_check),
)
return f"{ex_fb}\n\n{oral}"
logger.warning(
"event=narrative_fallback reason=body_too_short_vs_oral "
"chapter_category={} oral_len={} md_len={}",
@@ -210,7 +260,6 @@ def _run_batch_plan_writes(
chapter: Chapter,
chapter_category: str,
evidence_text: str,
existing_chapter_md: str,
slot_snippets: dict[str, str],
user_id: str,
user_profile: str,
@@ -240,20 +289,24 @@ def _run_batch_plan_writes(
birth_year=user_birth_year,
llm=llm,
)
narrative_raw = _gate_narrative_fidelity(unit_text, narrative_raw, llm)
narrative_raw = _gate_narrative_fidelity(
unit_text,
narrative_raw,
llm,
existing_canonical=existing_for_narrative or None,
)
narrative_raw = _apply_narrative_fallbacks(
narrative_raw,
unit_text,
existing_for_narrative,
existing_chapter_md,
chapter_category=chapter_category,
)
md = narrative_to_markdown(narrative_raw).strip()
if not md:
md = unit_text.strip()
elif _should_fallback_to_transcript(md, unit_text.strip()):
md = unit_text.strip()
md = _coalesce_story_markdown(
narrative_to_markdown(narrative_raw).strip(),
unit_text.strip(),
existing_for_narrative or "",
)
if target_story_id:
append_story_version_sync(session, target_story_id, md)
@@ -347,7 +400,6 @@ def run_story_pipeline_for_category_batch(
slot_snippets[key] = snip
title = chapter.title if chapter else f"{chapter_category} 回忆"
existing_chapter_md = _chapter_markdown(chapter) if chapter else ""
if not chapter:
title = narrative_agent.generate_title(
@@ -404,7 +456,6 @@ def run_story_pipeline_for_category_batch(
chapter=chapter,
chapter_category=chapter_category,
evidence_text=evidence_text,
existing_chapter_md=existing_chapter_md,
slot_snippets=slot_snippets,
user_id=user_id,
user_profile=user_profile,
@@ -439,21 +490,25 @@ def run_story_pipeline_for_category_batch(
birth_year=user_birth_year,
llm=llm,
)
narrative_raw = _gate_narrative_fidelity(combined_text, narrative_raw, llm)
narrative_raw = _gate_narrative_fidelity(
combined_text,
narrative_raw,
llm,
existing_canonical=existing_for_narrative or None,
)
narrative_raw = _apply_narrative_fallbacks(
narrative_raw,
combined_text,
existing_for_narrative,
existing_chapter_md,
chapter_category=chapter_category,
)
md = narrative_to_markdown(narrative_raw).strip()
if not md:
md = combined_text.strip()
elif _should_fallback_to_transcript(md, combined_text.strip()):
md = combined_text.strip()
md = _coalesce_story_markdown(
narrative_to_markdown(narrative_raw).strip(),
combined_text.strip(),
existing_for_narrative or "",
)
do_append = target_story_id is not None