数据库 - 新增迁移 0003:timeline_events.memory_source_id 外键 → memory_sources,便于按 ingest 源做时间线幂等 后端 - 记忆 - 新增 ingest 后 LLM 富化(摘要/事实/时间线),可配置开关与最大字符数 - 新增证据包组装:合并 chunk、摘要、事实、时间线、故事等检索结果;支持空 query 时是否仍带 rolling 等开关 - repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展;文档 memory-retrieval.md 更新 后端 - 对话 WS - 增加 PING/PONG;分段 ASR 日志与空音频处理;转写失败与「无助手回复」错误提示更明确 - 助手多段回复持久化使用统一分隔符,与分段逻辑一致 后端 - Agent - reply_limits:按 [SPLIT] 与段落拆段,并保证非空 fallback,供 WS 与 TTS 多段下发 后端 - 回忆录任务 - transcript ingest 记录 source_id;任务成功结?
552 lines
18 KiB
Python
552 lines
18 KiB
Python
"""
|
||
Celery 用:按批次将 transcript 写入 Story,并物化 Chapter canonical_markdown。
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import json
|
||
import uuid
|
||
from typing import Any
|
||
|
||
from sqlalchemy import select
|
||
from sqlalchemy.orm import Session, joinedload
|
||
|
||
from app.agents.memoir.narrative_agent import NarrativeAgent
|
||
from app.agents.memoir.prompts import (
|
||
STAGE_TO_ORDER,
|
||
format_evidence_chunks_for_prompt,
|
||
format_narrative_user_content,
|
||
)
|
||
from app.core.config import settings
|
||
from app.agents.memoir.story_route_agent import (
|
||
PLAN_BATCH_MAX_SEGMENTS,
|
||
StoryBatchPlan,
|
||
StoryRouteAgent,
|
||
)
|
||
from app.agents.state_schema import MemoirStateSchema
|
||
from app.core.logging import get_logger
|
||
from app.features.memoir.cover_eligibility import chapter_needs_cover_enqueue
|
||
from app.features.memoir.memoir_images.settings import MemoirImageSettings
|
||
from app.features.memoir.models import Chapter
|
||
from app.features.memoir.narrative_to_markdown import narrative_to_markdown
|
||
from app.features.memoir.repo import (
|
||
compose_chapter_from_story_links_sync,
|
||
reorder_chapter_story_links_by_life_order_sync,
|
||
)
|
||
from app.features.memory.repo import retrieve_evidence_sync
|
||
from app.features.story.models import Story
|
||
from app.features.story.sync_write import (
|
||
append_story_version_sync,
|
||
create_story_with_version_sync,
|
||
ensure_chapter_story_link_sync,
|
||
list_active_stories_for_user_sync,
|
||
)
|
||
|
||
logger = get_logger(__name__)
|
||
|
||
|
||
def _fidelity_fallback_json(oral: str, existing_canonical: str | None) -> str:
|
||
"""忠实度未通过时的安全回退:续写场景保留旧文 + 本段口述,避免只剩一句。"""
|
||
o = (oral or "").strip()[:15000]
|
||
ex = (existing_canonical or "").strip()[:15000]
|
||
if ex and o:
|
||
return json.dumps(
|
||
{"paragraphs": [{"content": ex}, {"content": o}]},
|
||
ensure_ascii=False,
|
||
)
|
||
if ex:
|
||
return json.dumps(
|
||
{"paragraphs": [{"content": ex}]},
|
||
ensure_ascii=False,
|
||
)
|
||
return json.dumps(
|
||
{"paragraphs": [{"content": o}]},
|
||
ensure_ascii=False,
|
||
)
|
||
|
||
|
||
def _gate_narrative_fidelity(
|
||
oral_text: str,
|
||
narrative_raw: str,
|
||
llm: Any,
|
||
*,
|
||
existing_canonical: str | None = None,
|
||
) -> str:
|
||
"""叙事 JSON 忠实度检查;不通过则回退为口述正文(续写时保留已有故事 + 口述)。"""
|
||
from app.agents.memoir.fidelity_check_agent import FidelityCheckAgent
|
||
|
||
if not settings.memoir_fidelity_check_enabled or not llm:
|
||
return narrative_raw
|
||
agent = FidelityCheckAgent()
|
||
ex = (existing_canonical or "").strip() or None
|
||
if agent.passes(
|
||
oral_text=oral_text,
|
||
narrative_json=narrative_raw,
|
||
llm=llm,
|
||
existing_canonical_markdown=ex,
|
||
):
|
||
return narrative_raw
|
||
logger.warning(
|
||
"event=fidelity_gate_fallback oral_len={} merge={}",
|
||
len((oral_text or "").strip()),
|
||
bool(ex),
|
||
)
|
||
o = (oral_text or "").strip()
|
||
if not o and not ex:
|
||
return narrative_raw
|
||
return _fidelity_fallback_json(o, ex)
|
||
|
||
|
||
def _should_fallback_to_transcript(md: str, oral: str) -> bool:
|
||
"""模型输出相对口述明显过短时回退为口述原文(防「1999」类压缩)。"""
|
||
o = (oral or "").strip()
|
||
if not o:
|
||
return False
|
||
m = (md or "").strip()
|
||
if not m:
|
||
return True
|
||
if len(o) < 12:
|
||
return len(m) < len(o)
|
||
ratio = float(settings.memoir_narrative_fallback_body_ratio)
|
||
min_abs = int(settings.memoir_narrative_fallback_min_chars)
|
||
threshold = max(min_abs, int(len(o) * ratio))
|
||
return len(m) < threshold
|
||
|
||
|
||
def _coalesce_story_markdown(
|
||
md: str,
|
||
oral: str,
|
||
existing_for_narrative: str,
|
||
) -> str:
|
||
"""落库前对齐正文:空输出或过短回退时,续写场景保留「已有故事 + 本段口述」。"""
|
||
o = (oral or "").strip()
|
||
ex = (existing_for_narrative or "").strip()
|
||
m = (md or "").strip()
|
||
if not m:
|
||
if ex and o:
|
||
return f"{ex}\n\n{o}"
|
||
if o:
|
||
return o
|
||
return ex
|
||
if o and _should_fallback_to_transcript(m, o):
|
||
if ex:
|
||
return f"{ex}\n\n{o}"
|
||
return o
|
||
return m
|
||
|
||
|
||
def _is_json_narrative(text: str) -> bool:
|
||
if not text or not text.strip():
|
||
return False
|
||
s = text.strip()
|
||
return s.startswith("{") and "paragraphs" in s
|
||
|
||
|
||
def _ordered_text_for_segment_ids(
|
||
category_segments: list, segment_ids: list[str]
|
||
) -> str:
|
||
id_to_text = {seg.id: (seg.user_input_text or "") for seg in category_segments}
|
||
return "\n\n".join(id_to_text.get(sid, "") for sid in segment_ids)
|
||
|
||
|
||
def _apply_narrative_fallbacks(
|
||
narrative_raw: str,
|
||
combined_unit_text: str,
|
||
existing_for_narrative: str,
|
||
*,
|
||
chapter_category: str,
|
||
) -> str:
|
||
# 整篇合并(JSON)输出异常缩水:回退为旧文 + 本段口述,避免覆盖丢失
|
||
if existing_for_narrative and _is_json_narrative(narrative_raw):
|
||
merged_md = narrative_to_markdown(narrative_raw).strip()
|
||
ex = (existing_for_narrative or "").strip()
|
||
if ex and len(ex) > 400 and len(merged_md) < len(ex) * 0.35:
|
||
logger.warning(
|
||
"event=narrative_fallback reason=merge_shrink action=append_oral "
|
||
"chapter_category={}",
|
||
chapter_category,
|
||
)
|
||
return f"{ex}\n\n{combined_unit_text.strip()}"
|
||
|
||
if (
|
||
existing_for_narrative
|
||
and not _is_json_narrative(narrative_raw)
|
||
and len(narrative_raw) < len(existing_for_narrative) * 0.8
|
||
):
|
||
logger.warning(
|
||
"event=narrative_fallback reason=length_anomaly action=append_raw "
|
||
"chapter_category={}",
|
||
chapter_category,
|
||
)
|
||
return f"{existing_for_narrative}\n\n{combined_unit_text}"
|
||
|
||
# 禁止把「章节级 canonical」(多故事拼接)写进单条 Story:会把全章正文塞进一个故事,
|
||
# 且该 story 若挂多章会导致各章阅读视图串台。新建故事时宁可短,也不拼接 existing_chapter_md。
|
||
|
||
md_check = narrative_to_markdown(narrative_raw).strip()
|
||
oral = (combined_unit_text or "").strip()
|
||
ex_fb = (existing_for_narrative or "").strip()
|
||
if oral and _should_fallback_to_transcript(md_check, oral):
|
||
if ex_fb:
|
||
logger.warning(
|
||
"event=narrative_fallback reason=body_too_short_vs_oral_merge "
|
||
"chapter_category={} oral_len={} md_len={}",
|
||
chapter_category,
|
||
len(oral),
|
||
len(md_check),
|
||
)
|
||
return f"{ex_fb}\n\n{oral}"
|
||
logger.warning(
|
||
"event=narrative_fallback reason=body_too_short_vs_oral "
|
||
"chapter_category={} oral_len={} md_len={}",
|
||
chapter_category,
|
||
len(oral),
|
||
len(md_check),
|
||
)
|
||
return oral
|
||
|
||
return narrative_raw
|
||
|
||
|
||
def _ensure_chapter_record(
|
||
session: Session,
|
||
*,
|
||
user_id: str,
|
||
chapter_category: str,
|
||
title: str,
|
||
source_ids: list[str],
|
||
calculated_order_index: int,
|
||
) -> Chapter:
|
||
stmt_chapter = (
|
||
select(Chapter)
|
||
.where(
|
||
Chapter.user_id == user_id,
|
||
Chapter.category == chapter_category,
|
||
Chapter.is_active == True, # noqa: E712
|
||
)
|
||
.options(
|
||
joinedload(Chapter.images),
|
||
joinedload(Chapter.story_links),
|
||
)
|
||
)
|
||
chapter = session.execute(stmt_chapter).unique().scalar_one_or_none()
|
||
if not chapter:
|
||
chapter = Chapter(
|
||
id=str(uuid.uuid4()),
|
||
user_id=user_id,
|
||
title=title,
|
||
order_index=calculated_order_index,
|
||
status="completed",
|
||
category=chapter_category,
|
||
is_new=True,
|
||
source_segments=source_ids,
|
||
)
|
||
session.add(chapter)
|
||
session.flush()
|
||
else:
|
||
chapter.source_segments = list(
|
||
set((chapter.source_segments or []) + source_ids)
|
||
)
|
||
chapter.is_new = True
|
||
session.flush()
|
||
return chapter
|
||
|
||
|
||
def _run_batch_plan_writes(
|
||
session: Session,
|
||
*,
|
||
plan: StoryBatchPlan,
|
||
category_segments: list,
|
||
chapter: Chapter,
|
||
chapter_category: str,
|
||
evidence_text: str,
|
||
slot_snippets: dict[str, str],
|
||
user_id: str,
|
||
user_profile: str,
|
||
user_birth_year: int | None,
|
||
llm: Any,
|
||
narrative_agent: NarrativeAgent,
|
||
) -> set[str]:
|
||
dispatch_ids: set[str] = set()
|
||
for unit in plan.units:
|
||
unit_text = _ordered_text_for_segment_ids(category_segments, unit.segment_ids)
|
||
new_content_input = format_narrative_user_content(unit_text, evidence_text)
|
||
|
||
target_story_id: str | None = None
|
||
existing_for_narrative = ""
|
||
if unit.decision == "append_story" and unit.target_story_id:
|
||
st = session.get(Story, unit.target_story_id)
|
||
if st and st.user_id == user_id:
|
||
target_story_id = st.id
|
||
existing_for_narrative = (st.canonical_markdown or "").strip()
|
||
|
||
narrative_raw = narrative_agent.generate_narrative(
|
||
stage=chapter_category,
|
||
slots=slot_snippets,
|
||
new_content=new_content_input,
|
||
existing_content=existing_for_narrative,
|
||
user_profile=user_profile,
|
||
birth_year=user_birth_year,
|
||
llm=llm,
|
||
)
|
||
narrative_raw = _gate_narrative_fidelity(
|
||
unit_text,
|
||
narrative_raw,
|
||
llm,
|
||
existing_canonical=existing_for_narrative or None,
|
||
)
|
||
narrative_raw = _apply_narrative_fallbacks(
|
||
narrative_raw,
|
||
unit_text,
|
||
existing_for_narrative,
|
||
chapter_category=chapter_category,
|
||
)
|
||
|
||
md = _coalesce_story_markdown(
|
||
narrative_to_markdown(narrative_raw).strip(),
|
||
unit_text.strip(),
|
||
existing_for_narrative or "",
|
||
)
|
||
|
||
if target_story_id:
|
||
append_story_version_sync(session, target_story_id, md)
|
||
dispatch_ids.add(target_story_id)
|
||
ensure_chapter_story_link_sync(
|
||
session, chapter_id=chapter.id, story_id=target_story_id
|
||
)
|
||
else:
|
||
story_title = (unit.new_story_title or "").strip()
|
||
if not story_title:
|
||
story_title = narrative_agent.generate_title(
|
||
stage=chapter_category,
|
||
emotion="neutral",
|
||
slots=slot_snippets,
|
||
user_profile=user_profile,
|
||
birth_year=user_birth_year,
|
||
llm=llm,
|
||
)
|
||
st = create_story_with_version_sync(
|
||
session,
|
||
user_id=user_id,
|
||
title=story_title,
|
||
canonical_markdown=md,
|
||
stage=chapter_category,
|
||
)
|
||
dispatch_ids.add(st.id)
|
||
ensure_chapter_story_link_sync(
|
||
session, chapter_id=chapter.id, story_id=st.id
|
||
)
|
||
return dispatch_ids
|
||
|
||
|
||
def run_story_pipeline_for_category_batch(
|
||
session: Session,
|
||
*,
|
||
user_id: str,
|
||
chapter_category: str,
|
||
category_segments: list,
|
||
state: MemoirStateSchema,
|
||
user_profile: str,
|
||
user_birth_year: int | None,
|
||
llm: Any,
|
||
) -> tuple[Chapter | None, bool, set[str]]:
|
||
"""
|
||
返回 (chapter, needs_cover_enqueue, story_ids_to_dispatch_after_commit)。
|
||
"""
|
||
narrative_agent = NarrativeAgent()
|
||
route_agent = StoryRouteAgent()
|
||
dispatch_ids: set[str] = set()
|
||
|
||
segment_texts = [seg.user_input_text or "" for seg in category_segments]
|
||
combined_text = "\n\n".join(segment_texts)
|
||
source_ids = [seg.id for seg in category_segments]
|
||
|
||
try:
|
||
evidence = retrieve_evidence_sync(session, user_id, combined_text, top_k=10)
|
||
except Exception as e:
|
||
logger.warning("Evidence 检索跳过: {}", e)
|
||
evidence = {
|
||
"relevant_chunks": [],
|
||
"relevant_summaries": [],
|
||
"relevant_facts": [],
|
||
"timeline_hints": [],
|
||
"relevant_stories": [],
|
||
}
|
||
|
||
evidence_text = format_evidence_chunks_for_prompt(evidence)
|
||
new_content_input = format_narrative_user_content(combined_text, evidence_text)
|
||
|
||
stmt_chapter = (
|
||
select(Chapter)
|
||
.where(
|
||
Chapter.user_id == user_id,
|
||
Chapter.category == chapter_category,
|
||
Chapter.is_active == True, # noqa: E712
|
||
)
|
||
.options(
|
||
joinedload(Chapter.images),
|
||
joinedload(Chapter.story_links),
|
||
)
|
||
)
|
||
chapter = session.execute(stmt_chapter).unique().scalar_one_or_none()
|
||
|
||
slot_snippets: dict[str, str] = {}
|
||
stage_slots = state.slots.get(chapter_category, {}) or {}
|
||
for key, value in stage_slots.items():
|
||
snip = getattr(value, "snippet", None) or (
|
||
value.get("snippet") if isinstance(value, dict) else None
|
||
)
|
||
if snip:
|
||
slot_snippets[key] = snip
|
||
|
||
title = chapter.title if chapter else f"{chapter_category} 回忆"
|
||
|
||
if not chapter:
|
||
title = narrative_agent.generate_title(
|
||
stage=chapter_category,
|
||
emotion="neutral",
|
||
slots=slot_snippets,
|
||
user_profile=user_profile,
|
||
birth_year=user_birth_year,
|
||
llm=llm,
|
||
)
|
||
|
||
candidates = list_active_stories_for_user_sync(session, user_id)
|
||
valid_ids = {s.id for s in candidates}
|
||
|
||
batch_for_route = (
|
||
f"{combined_text}\n\n{evidence_text}"
|
||
if evidence_text.strip()
|
||
else combined_text
|
||
)
|
||
|
||
calculated_order_index = STAGE_TO_ORDER.get(chapter_category, 999)
|
||
|
||
use_batch_plan = (
|
||
llm
|
||
and len(category_segments) >= 2
|
||
and len(category_segments) <= PLAN_BATCH_MAX_SEGMENTS
|
||
)
|
||
plan: StoryBatchPlan | None = None
|
||
if use_batch_plan:
|
||
segs = [(seg.id, seg.user_input_text or "") for seg in category_segments]
|
||
plan = route_agent.plan_batch(
|
||
chapter_category=chapter_category,
|
||
chapter_title=title,
|
||
segments=segs,
|
||
candidate_stories=candidates,
|
||
llm=llm,
|
||
valid_story_ids=valid_ids,
|
||
)
|
||
|
||
chapter = _ensure_chapter_record(
|
||
session,
|
||
user_id=user_id,
|
||
chapter_category=chapter_category,
|
||
title=title,
|
||
source_ids=source_ids,
|
||
calculated_order_index=calculated_order_index,
|
||
)
|
||
|
||
if plan is not None:
|
||
dispatch_ids = _run_batch_plan_writes(
|
||
session,
|
||
plan=plan,
|
||
category_segments=category_segments,
|
||
chapter=chapter,
|
||
chapter_category=chapter_category,
|
||
evidence_text=evidence_text,
|
||
slot_snippets=slot_snippets,
|
||
user_id=user_id,
|
||
user_profile=user_profile,
|
||
user_birth_year=user_birth_year,
|
||
llm=llm,
|
||
narrative_agent=narrative_agent,
|
||
)
|
||
else:
|
||
route = route_agent.decide(
|
||
chapter_category=chapter_category,
|
||
chapter_title=title,
|
||
batch_transcript=batch_for_route,
|
||
candidate_stories=candidates,
|
||
llm=llm,
|
||
valid_story_ids=valid_ids,
|
||
)
|
||
|
||
target_story_id: str | None = None
|
||
existing_for_narrative = ""
|
||
if route.decision == "append_story" and route.target_story_id:
|
||
st = session.get(Story, route.target_story_id)
|
||
if st and st.user_id == user_id:
|
||
target_story_id = st.id
|
||
existing_for_narrative = (st.canonical_markdown or "").strip()
|
||
|
||
narrative_raw = narrative_agent.generate_narrative(
|
||
stage=chapter_category,
|
||
slots=slot_snippets,
|
||
new_content=new_content_input,
|
||
existing_content=existing_for_narrative,
|
||
user_profile=user_profile,
|
||
birth_year=user_birth_year,
|
||
llm=llm,
|
||
)
|
||
narrative_raw = _gate_narrative_fidelity(
|
||
combined_text,
|
||
narrative_raw,
|
||
llm,
|
||
existing_canonical=existing_for_narrative or None,
|
||
)
|
||
|
||
narrative_raw = _apply_narrative_fallbacks(
|
||
narrative_raw,
|
||
combined_text,
|
||
existing_for_narrative,
|
||
chapter_category=chapter_category,
|
||
)
|
||
|
||
md = _coalesce_story_markdown(
|
||
narrative_to_markdown(narrative_raw).strip(),
|
||
combined_text.strip(),
|
||
existing_for_narrative or "",
|
||
)
|
||
|
||
do_append = target_story_id is not None
|
||
|
||
if do_append:
|
||
append_story_version_sync(session, target_story_id, md)
|
||
dispatch_ids.add(target_story_id)
|
||
ensure_chapter_story_link_sync(
|
||
session, chapter_id=chapter.id, story_id=target_story_id
|
||
)
|
||
else:
|
||
story_title = (route.new_story_title or "").strip()
|
||
if not story_title:
|
||
story_title = narrative_agent.generate_title(
|
||
stage=chapter_category,
|
||
emotion="neutral",
|
||
slots=slot_snippets,
|
||
user_profile=user_profile,
|
||
birth_year=user_birth_year,
|
||
llm=llm,
|
||
)
|
||
st = create_story_with_version_sync(
|
||
session,
|
||
user_id=user_id,
|
||
title=story_title,
|
||
canonical_markdown=md,
|
||
stage=chapter_category,
|
||
)
|
||
dispatch_ids.add(st.id)
|
||
ensure_chapter_story_link_sync(
|
||
session, chapter_id=chapter.id, story_id=st.id
|
||
)
|
||
|
||
reorder_chapter_story_links_by_life_order_sync(session, chapter.id)
|
||
compose_chapter_from_story_links_sync(session, chapter.id)
|
||
session.flush()
|
||
|
||
image_settings = MemoirImageSettings.from_env()
|
||
needs_cover = image_settings.enabled and chapter_needs_cover_enqueue(chapter)
|
||
|
||
return chapter, needs_cover, dispatch_ids
|