- 新增 utterance_substance:短时/应答/元话语可跳过记忆检索、阶段 LLM 与资料抽取 LLM;可配置 - 输入归一化:LLM 模式默认仅语音/ASR;配置项写入 .env.example - Memoir Phase1:可选 batch LLM 一次性抽取+分类(失败回退逐段);Extraction 空槽位时阶段与 current_stage 对齐,prompt 约束收紧 - 叙事与忠实度:narrative_safety、证据重叠/场合锚点、标题 slots 与履历短语 grounded;fidelity 解析失败 fail-open 可配置 - 章节管线:锁 TTL 上调、锁竞争 Celery 重试、Phase2 immediate singleflight 等;story_pipeline_sync / chapter_compose / memoir_tasks 联动 - Memory:compaction / repo / summarizer / evidence 小修;事实 FTS 未命中是否回退最近事实可配置 - 新增 memoir_pipeline_trace;补充 memoir_reliability 文档与多项回归/门控测试
985 lines
33 KiB
Python
985 lines
33 KiB
Python
"""
|
||
Celery 用:按批次将 transcript 写入 Story,并标记 Chapter 需物化(markdown_compose_dirty)。
|
||
|
||
同步路径不执行 compose;物化由 commit 后 `recompose_chapter` 异步完成。
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import json
|
||
import re
|
||
import time
|
||
import uuid
|
||
from typing import Any
|
||
|
||
from sqlalchemy import select
|
||
from sqlalchemy.orm import Session, joinedload
|
||
|
||
from app.agents.memoir.narrative_agent import NarrativeAgent
|
||
from app.agents.memoir.prompts import (
|
||
format_evidence_chunks_for_prompt,
|
||
format_narrative_user_content,
|
||
)
|
||
from app.agents.stage_constants import (
|
||
CATEGORY_TO_CHAT_STAGE,
|
||
CHAPTER_CATEGORIES,
|
||
CHAT_STAGES,
|
||
STAGE_TO_ORDER,
|
||
)
|
||
from app.agents.memoir.story_route_agent import (
|
||
PLAN_BATCH_MAX_SEGMENTS,
|
||
StoryBatchPlan,
|
||
StoryRouteAgent,
|
||
)
|
||
from app.agents.state_schema import MemoirStateSchema
|
||
from app.core.config import settings
|
||
from app.core.logging import get_logger
|
||
from app.features.memoir.cover_eligibility import chapter_needs_cover_enqueue
|
||
from app.features.memoir.memoir_images.settings import MemoirImageSettings
|
||
from app.features.memoir.models import Chapter
|
||
from app.features.memoir.narrative_to_markdown import narrative_to_markdown
|
||
from app.features.memoir.narrative_safety import (
|
||
body_contains_prompt_artifact,
|
||
evidence_leakage_heuristic,
|
||
evidence_scene_anchor_leak,
|
||
strip_evidence_for_overlap_check,
|
||
)
|
||
from app.features.memoir.oral_normalize import (
|
||
apply_oral_rules,
|
||
normalize_oral_for_memoir,
|
||
)
|
||
from app.features.memoir.repo import (
|
||
mark_chapter_dirty_sync,
|
||
reorder_chapter_story_links_by_life_order_sync,
|
||
)
|
||
from app.features.memory.repo import retrieve_evidence_sync
|
||
from app.features.story.models import Story
|
||
from app.features.story.sync_write import (
|
||
append_story_version_sync,
|
||
count_story_versions_sync,
|
||
create_story_with_version_sync,
|
||
ensure_chapter_story_link_sync,
|
||
list_active_stories_for_user_sync,
|
||
)
|
||
|
||
logger = get_logger(__name__)
|
||
|
||
# 标题中若出现下列多字履历表述,则必须在 hay(正文+口述+传入标题的 slots)中逐字出现,否则剔除无果片段或降级占位
|
||
_MEMOIR_TITLE_HAY_GROUNDING_PHRASES: tuple[str, ...] = (
|
||
"晋升旅长",
|
||
"晋升为旅长",
|
||
"晋升师长",
|
||
"晋升军长",
|
||
"旅长职务",
|
||
"师长职务",
|
||
)
|
||
|
||
# summary 章节跨阶段汇总 slots 时的上限(防叙事 prompt 膨胀)
|
||
MAX_SUMMARY_SLOT_KEYS = 80
|
||
MAX_SUMMARY_SLOT_CHARS = 12_000
|
||
|
||
|
||
def _slot_snippets_for_narrative(
|
||
*,
|
||
state: MemoirStateSchema,
|
||
chapter_category: str,
|
||
user_id: str,
|
||
) -> dict[str, str]:
|
||
"""按章节类目收集 slot 片段;summary 时跨 CHAT_STAGES 汇总并做 key/字符上限。"""
|
||
slot_snippets: dict[str, str] = {}
|
||
if chapter_category == "summary":
|
||
total_chars = 0
|
||
keys_added = 0
|
||
capped = False
|
||
for chat_stage_key in CHAT_STAGES:
|
||
if keys_added >= MAX_SUMMARY_SLOT_KEYS:
|
||
capped = True
|
||
break
|
||
stage_slots = state.slots.get(chat_stage_key, {}) or {}
|
||
for key in sorted(stage_slots.keys()):
|
||
if keys_added >= MAX_SUMMARY_SLOT_KEYS:
|
||
capped = True
|
||
break
|
||
value = stage_slots[key]
|
||
snip = getattr(value, "snippet", None) or (
|
||
value.get("snippet") if isinstance(value, dict) else None
|
||
)
|
||
if not snip:
|
||
continue
|
||
composite = f"{chat_stage_key}_{key}"
|
||
s = str(snip).strip()
|
||
if total_chars + len(s) > MAX_SUMMARY_SLOT_CHARS:
|
||
remain = MAX_SUMMARY_SLOT_CHARS - total_chars
|
||
if remain > 32:
|
||
slot_snippets[composite] = s[:remain] + "…"
|
||
capped = True
|
||
break
|
||
slot_snippets[composite] = s
|
||
total_chars += len(s)
|
||
keys_added += 1
|
||
if capped:
|
||
break
|
||
if capped:
|
||
logger.info(
|
||
"event=summary_slot_snippets_capped user_id={} keys={} chars={}",
|
||
user_id,
|
||
len(slot_snippets),
|
||
total_chars,
|
||
)
|
||
return slot_snippets
|
||
|
||
chat_stage = CATEGORY_TO_CHAT_STAGE.get(chapter_category, chapter_category)
|
||
stage_slots = state.slots.get(chat_stage, {}) or {}
|
||
for key in sorted(stage_slots.keys()):
|
||
value = stage_slots[key]
|
||
snip = getattr(value, "snippet", None) or (
|
||
value.get("snippet") if isinstance(value, dict) else None
|
||
)
|
||
if snip:
|
||
slot_snippets[key] = str(snip).strip()
|
||
return slot_snippets
|
||
|
||
|
||
def _placeholder_title(chapter_category: str) -> str:
|
||
return CHAPTER_CATEGORIES.get(chapter_category, chapter_category)
|
||
|
||
|
||
def _title_slots_filtered_for_generation(
|
||
slot_snippets: dict[str, str], *, md: str, oral_scope: str
|
||
) -> dict[str, str]:
|
||
"""仅保留与正文或本批口述有文本重叠的 slot,降低档案/历史 slot 串台到标题。"""
|
||
if not settings.memoir_title_slots_require_body_or_oral_match:
|
||
return dict(slot_snippets)
|
||
hay = f"{(md or '').strip()}\n{(oral_scope or '').strip()}"
|
||
if not hay.strip():
|
||
return {}
|
||
out: dict[str, str] = {}
|
||
for k, v in (slot_snippets or {}).items():
|
||
if k == "content_excerpt":
|
||
continue
|
||
s = (v or "").strip()
|
||
if len(s) < 2:
|
||
continue
|
||
if s in hay:
|
||
out[k] = s
|
||
continue
|
||
prefix = s[: min(12, len(s))]
|
||
if len(prefix) >= 4 and prefix in hay:
|
||
out[k] = s
|
||
return out
|
||
|
||
|
||
def _title_hay_for_grounding(
|
||
merged_slots: dict[str, str], md: str, oral_scope: str
|
||
) -> str:
|
||
"""与标题模型可见材料一致的依据串(用于事后逐字 grounding)。"""
|
||
parts: list[str] = [(md or "").strip(), (oral_scope or "").strip()]
|
||
for k, v in (merged_slots or {}).items():
|
||
if k == "content_excerpt":
|
||
continue
|
||
if (v or "").strip():
|
||
parts.append(str(v).strip())
|
||
return "\n".join(p for p in parts if p)
|
||
|
||
|
||
def _strip_ungrounded_title_segments(
|
||
title: str,
|
||
hay: str,
|
||
*,
|
||
chapter_category: str,
|
||
) -> str:
|
||
"""
|
||
按 · / • 分节丢弃含未落地履历短语的小节;全部丢弃则占位。
|
||
"""
|
||
if not settings.memoir_title_hay_grounding_strict_phrases_enabled:
|
||
return (title or "").strip() or _placeholder_title(chapter_category)
|
||
t = (title or "").strip()
|
||
h = (hay or "").strip()
|
||
if not t:
|
||
return _placeholder_title(chapter_category)
|
||
segments = [s.strip() for s in re.split(r"\s*[·•]\s*", t) if s.strip()]
|
||
if not segments:
|
||
segments = [t]
|
||
kept: list[str] = []
|
||
for seg in segments:
|
||
bad = any(
|
||
phrase in seg and phrase not in h
|
||
for phrase in _MEMOIR_TITLE_HAY_GROUNDING_PHRASES
|
||
)
|
||
if bad:
|
||
logger.info(
|
||
"event=memoir_title_segment_ungrounded segment_preview={} chapter_category={}",
|
||
seg[:40],
|
||
chapter_category,
|
||
)
|
||
continue
|
||
kept.append(seg)
|
||
if not kept:
|
||
return _placeholder_title(chapter_category)
|
||
if len(kept) == 1:
|
||
return kept[0]
|
||
return " · ".join(kept)
|
||
|
||
|
||
def _maybe_generate_title(
|
||
narrative_agent: "NarrativeAgent",
|
||
*,
|
||
chapter_category: str,
|
||
md: str,
|
||
slot_snippets: dict[str, str],
|
||
user_profile: str,
|
||
user_birth_year: int | None,
|
||
llm: Any,
|
||
oral_scope: str = "",
|
||
narrow_profile_for_title: bool = True,
|
||
) -> str:
|
||
"""Generate a title only when body is long enough; otherwise return placeholder."""
|
||
body_len = len((md or "").strip())
|
||
if body_len < settings.story_title_min_body_chars:
|
||
return _placeholder_title(chapter_category)
|
||
content_excerpt = (md or "").strip()[:300]
|
||
merged_slots = _title_slots_filtered_for_generation(
|
||
slot_snippets, md=md, oral_scope=oral_scope
|
||
)
|
||
if content_excerpt and "content_excerpt" not in merged_slots:
|
||
merged_slots["content_excerpt"] = content_excerpt
|
||
# 标题默认不注入完整档案,仅年龄提示仍可用(来自 birth_year)
|
||
profile_for_title = "" if narrow_profile_for_title else user_profile
|
||
raw_title = narrative_agent.generate_title(
|
||
stage=chapter_category,
|
||
emotion="neutral",
|
||
slots=merged_slots,
|
||
user_profile=profile_for_title,
|
||
birth_year=user_birth_year,
|
||
llm=llm,
|
||
)
|
||
hay = _title_hay_for_grounding(merged_slots, md, oral_scope)
|
||
return _strip_ungrounded_title_segments(
|
||
raw_title, hay, chapter_category=chapter_category
|
||
)
|
||
|
||
|
||
def _route_segment_texts(category_segments: list) -> list[tuple[str, str]]:
|
||
"""批量路由 plan_batch:每段仅做规则归一,避免 N 次 LLM。"""
|
||
out: list[tuple[str, str]] = []
|
||
for seg in category_segments:
|
||
raw = seg.user_input_text or ""
|
||
if (
|
||
settings.memoir_oral_normalize_enabled
|
||
and (settings.memoir_oral_normalize_mode or "rules").strip().lower()
|
||
!= "off"
|
||
):
|
||
t = apply_oral_rules(raw)
|
||
else:
|
||
t = raw
|
||
out.append((str(seg.id), t))
|
||
return out
|
||
|
||
|
||
def _fidelity_fallback_json(oral: str, existing_canonical: str | None) -> str:
|
||
"""忠实度未通过时的安全回退:续写场景保留旧文 + 本段口述,避免只剩一句。"""
|
||
o = (oral or "").strip()[:15000]
|
||
ex = (existing_canonical or "").strip()[:15000]
|
||
if ex and o:
|
||
return json.dumps(
|
||
{"paragraphs": [{"content": ex}, {"content": o}]},
|
||
ensure_ascii=False,
|
||
)
|
||
if ex:
|
||
return json.dumps(
|
||
{"paragraphs": [{"content": ex}]},
|
||
ensure_ascii=False,
|
||
)
|
||
return json.dumps(
|
||
{"paragraphs": [{"content": o}]},
|
||
ensure_ascii=False,
|
||
)
|
||
|
||
|
||
def _gate_narrative_fidelity(
|
||
oral_text: str,
|
||
narrative_raw: str,
|
||
llm: Any,
|
||
*,
|
||
existing_canonical: str | None = None,
|
||
) -> tuple[str, str]:
|
||
"""返回 (文本, fallback 原因);忠实度不通过时第二项为 fidelity_failed。"""
|
||
from app.agents.memoir.fidelity_check_agent import FidelityCheckAgent
|
||
|
||
if not settings.memoir_fidelity_check_enabled or not llm:
|
||
return narrative_raw, "none"
|
||
agent = FidelityCheckAgent()
|
||
ex = (existing_canonical or "").strip() or None
|
||
is_append = bool(ex)
|
||
if agent.passes(
|
||
oral_text=oral_text,
|
||
narrative_json=narrative_raw,
|
||
llm=llm,
|
||
existing_canonical_markdown=ex,
|
||
is_append=is_append,
|
||
):
|
||
return narrative_raw, "none"
|
||
logger.warning(
|
||
"event=fidelity_gate_fallback oral_len={} merge={}",
|
||
len((oral_text or "").strip()),
|
||
bool(ex),
|
||
)
|
||
o = (oral_text or "").strip()
|
||
if not o and not ex:
|
||
return narrative_raw, "none"
|
||
return _fidelity_fallback_json(o, ex), "fidelity_failed"
|
||
|
||
|
||
def _apply_narrative_body_safety(
|
||
md: str,
|
||
*,
|
||
oral: str,
|
||
existing_for_narrative: str,
|
||
evidence_text: str,
|
||
chapter_category: str,
|
||
) -> tuple[str, str]:
|
||
"""prompt 标记或摘录子串疑似渗入正文时,回退为口述/旧文拼接。"""
|
||
m = (md or "").strip()
|
||
ex = (existing_for_narrative or "").strip()
|
||
o = (oral or "").strip()
|
||
min_len = int(settings.memoir_narrative_evidence_overlap_min_chars)
|
||
ev_plain = strip_evidence_for_overlap_check(evidence_text)
|
||
if m and body_contains_prompt_artifact(m):
|
||
logger.warning(
|
||
"event=narrative_invariant_failed reason=prompt_artifact chapter_category={}",
|
||
chapter_category,
|
||
)
|
||
return _coalesce_story_markdown("", oral, existing_for_narrative), (
|
||
"prompt_artifact_in_body"
|
||
)
|
||
if (
|
||
m
|
||
and evidence_text.strip()
|
||
and evidence_leakage_heuristic(m, ev_plain, o, ex, min_len)
|
||
):
|
||
logger.warning(
|
||
"event=narrative_invariant_failed reason=evidence_leak chapter_category={}",
|
||
chapter_category,
|
||
)
|
||
return _coalesce_story_markdown("", oral, existing_for_narrative), (
|
||
"evidence_leak_heuristic"
|
||
)
|
||
if (
|
||
settings.memoir_evidence_scene_anchor_check_enabled
|
||
and m
|
||
and evidence_text.strip()
|
||
and evidence_scene_anchor_leak(m, ev_plain, o, ex)
|
||
):
|
||
logger.warning(
|
||
"event=narrative_invariant_failed reason=evidence_scene_anchor chapter_category={}",
|
||
chapter_category,
|
||
)
|
||
return _coalesce_story_markdown("", oral, existing_for_narrative), (
|
||
"evidence_scene_anchor"
|
||
)
|
||
return m, "none"
|
||
|
||
|
||
def _coalesce_story_markdown(
|
||
md: str,
|
||
oral: str,
|
||
existing_for_narrative: str,
|
||
) -> str:
|
||
"""落库前对齐正文:空输出时续写场景保留「已有故事 + 本段口述」。"""
|
||
o = (oral or "").strip()
|
||
ex = (existing_for_narrative or "").strip()
|
||
m = (md or "").strip()
|
||
if not m:
|
||
if ex and o:
|
||
return f"{ex}\n\n{o}"
|
||
if o:
|
||
return o
|
||
return ex
|
||
return m
|
||
|
||
|
||
def _is_json_narrative(text: str) -> bool:
|
||
if not text or not text.strip():
|
||
return False
|
||
s = text.strip()
|
||
return s.startswith("{") and "paragraphs" in s
|
||
|
||
|
||
def _ordered_text_for_segment_ids(
|
||
category_segments: list, segment_ids: list[str]
|
||
) -> str:
|
||
id_to_text = {seg.id: (seg.user_input_text or "") for seg in category_segments}
|
||
return "\n\n".join(id_to_text.get(sid, "") for sid in segment_ids)
|
||
|
||
|
||
def _apply_narrative_fallbacks(
|
||
narrative_raw: str,
|
||
combined_unit_text: str,
|
||
existing_for_narrative: str,
|
||
*,
|
||
chapter_category: str,
|
||
) -> tuple[str, str]:
|
||
"""返回 (文本, fallback_type);无改写时为 none。
|
||
|
||
仅防 merge/append 场景下模型输出极端缩水(丢旧内容),不再按口述字数比例回退。
|
||
"""
|
||
if existing_for_narrative and _is_json_narrative(narrative_raw):
|
||
merged_md = narrative_to_markdown(narrative_raw).strip()
|
||
ex = (existing_for_narrative or "").strip()
|
||
if ex and len(ex) > 400 and len(merged_md) < len(ex) * 0.25:
|
||
logger.warning(
|
||
"event=narrative_fallback reason=merge_shrink action=append_oral "
|
||
"chapter_category={}",
|
||
chapter_category,
|
||
)
|
||
return f"{ex}\n\n{combined_unit_text.strip()}", "merge_shrink"
|
||
|
||
if (
|
||
existing_for_narrative
|
||
and not _is_json_narrative(narrative_raw)
|
||
and len(narrative_raw) < len(existing_for_narrative) * 0.5
|
||
):
|
||
logger.warning(
|
||
"event=narrative_fallback reason=length_anomaly action=append_raw "
|
||
"chapter_category={}",
|
||
chapter_category,
|
||
)
|
||
return (
|
||
f"{existing_for_narrative}\n\n{combined_unit_text}",
|
||
"coalesce_to_old_plus_oral",
|
||
)
|
||
|
||
return narrative_raw, "none"
|
||
|
||
|
||
def _merge_fallback_type(gate_ft: str, apply_ft: str) -> str:
|
||
if apply_ft != "none":
|
||
return apply_ft
|
||
return gate_ft
|
||
|
||
|
||
def _story_meta_for_route(
|
||
session: Session, candidates: list
|
||
) -> dict[str, dict[str, int]]:
|
||
meta: dict[str, dict[str, int]] = {}
|
||
for s in candidates:
|
||
sid = str(s.id)
|
||
meta[sid] = {
|
||
"char_count": len((s.canonical_markdown or "").strip()),
|
||
"version_count": count_story_versions_sync(session, sid),
|
||
}
|
||
return meta
|
||
|
||
|
||
def _ensure_chapter_record(
|
||
session: Session,
|
||
*,
|
||
user_id: str,
|
||
chapter_category: str,
|
||
title: str,
|
||
source_ids: list[str],
|
||
calculated_order_index: int,
|
||
) -> Chapter:
|
||
stmt_chapter = (
|
||
select(Chapter)
|
||
.where(
|
||
Chapter.user_id == user_id,
|
||
Chapter.category == chapter_category,
|
||
Chapter.is_active == True, # noqa: E712
|
||
)
|
||
.options(
|
||
joinedload(Chapter.images),
|
||
joinedload(Chapter.story_links),
|
||
)
|
||
)
|
||
chapter = session.execute(stmt_chapter).unique().scalar_one_or_none()
|
||
if not chapter:
|
||
chapter = Chapter(
|
||
id=str(uuid.uuid4()),
|
||
user_id=user_id,
|
||
title=title,
|
||
order_index=calculated_order_index,
|
||
status="completed",
|
||
category=chapter_category,
|
||
is_new=True,
|
||
source_segments=source_ids,
|
||
)
|
||
session.add(chapter)
|
||
session.flush()
|
||
else:
|
||
chapter.source_segments = list(
|
||
set((chapter.source_segments or []) + source_ids)
|
||
)
|
||
chapter.is_new = True
|
||
session.flush()
|
||
return chapter
|
||
|
||
|
||
def _run_batch_plan_writes(
|
||
session: Session,
|
||
*,
|
||
plan: StoryBatchPlan,
|
||
category_segments: list,
|
||
chapter: Chapter,
|
||
chapter_category: str,
|
||
evidence_text: str,
|
||
slot_snippets: dict[str, str],
|
||
user_id: str,
|
||
user_profile: str,
|
||
user_birth_year: int | None,
|
||
llm: Any,
|
||
narrative_agent: NarrativeAgent,
|
||
background_voice: str = "default",
|
||
occupation: str = "",
|
||
memoir_correlation_id: str | None = None,
|
||
) -> set[str]:
|
||
dispatch_ids: set[str] = set()
|
||
max_chars = int(settings.story_append_max_canonical_chars)
|
||
max_ver = int(settings.story_append_max_versions)
|
||
for unit in plan.units:
|
||
t0 = time.perf_counter()
|
||
unit_text = _ordered_text_for_segment_ids(category_segments, unit.segment_ids)
|
||
oral_unit = normalize_oral_for_memoir(unit_text, llm=llm)
|
||
ut_raw = (unit_text or "").strip()
|
||
ut_norm = (oral_unit or "").strip()
|
||
if ut_raw != ut_norm:
|
||
logger.info(
|
||
"event=oral_normalized context=batch_unit raw_len={} norm_len={}",
|
||
len(ut_raw),
|
||
len(ut_norm),
|
||
)
|
||
new_content_input = format_narrative_user_content(oral_unit, evidence_text)
|
||
|
||
target_story_id: str | None = None
|
||
existing_for_narrative = ""
|
||
decision_source = "batch_plan"
|
||
if unit.decision == "append_story" and unit.target_story_id:
|
||
st = session.get(Story, unit.target_story_id)
|
||
if st and st.user_id == user_id:
|
||
canon = (st.canonical_markdown or "").strip()
|
||
vc = count_story_versions_sync(session, str(st.id))
|
||
if len(canon) > max_chars or vc >= max_ver:
|
||
logger.info(
|
||
"event=append_overflow_to_new story_id={} canonical_chars={} "
|
||
"versions={} decision_source=batch_plan",
|
||
str(st.id),
|
||
len(canon),
|
||
vc,
|
||
)
|
||
target_story_id = None
|
||
existing_for_narrative = ""
|
||
decision_source = "forced_new_due_to_append_limit"
|
||
else:
|
||
target_story_id = st.id
|
||
existing_for_narrative = canon
|
||
|
||
raw_gen = narrative_agent.generate_narrative(
|
||
stage=chapter_category,
|
||
slots=slot_snippets,
|
||
new_content=new_content_input,
|
||
existing_content=existing_for_narrative,
|
||
user_profile=user_profile,
|
||
birth_year=user_birth_year,
|
||
llm=llm,
|
||
background_voice=background_voice,
|
||
occupation=occupation,
|
||
fallback_plain_oral=ut_norm,
|
||
)
|
||
json_invalid = False
|
||
s0 = (raw_gen or "").strip()
|
||
if s0.startswith("{") and "paragraphs" in s0:
|
||
try:
|
||
json.loads(s0)
|
||
except json.JSONDecodeError:
|
||
json_invalid = True
|
||
|
||
narrative_raw, fb_gate = _gate_narrative_fidelity(
|
||
oral_unit,
|
||
raw_gen,
|
||
llm,
|
||
existing_canonical=existing_for_narrative or None,
|
||
)
|
||
narrative_raw, fb_apply = _apply_narrative_fallbacks(
|
||
narrative_raw,
|
||
oral_unit,
|
||
existing_for_narrative,
|
||
chapter_category=chapter_category,
|
||
)
|
||
fallback_type = _merge_fallback_type(fb_gate, fb_apply)
|
||
if json_invalid and fallback_type == "none":
|
||
fallback_type = "json_invalid"
|
||
|
||
md = _coalesce_story_markdown(
|
||
narrative_to_markdown(narrative_raw).strip(),
|
||
oral_unit.strip(),
|
||
existing_for_narrative or "",
|
||
)
|
||
md, inv_fb = _apply_narrative_body_safety(
|
||
md,
|
||
oral=oral_unit,
|
||
existing_for_narrative=existing_for_narrative or "",
|
||
evidence_text=evidence_text,
|
||
chapter_category=chapter_category,
|
||
)
|
||
if inv_fb != "none":
|
||
fallback_type = (
|
||
inv_fb if fallback_type == "none" else f"{fallback_type}+{inv_fb}"
|
||
)
|
||
|
||
if target_story_id:
|
||
append_story_version_sync(session, str(target_story_id), md)
|
||
dispatch_ids.add(str(target_story_id))
|
||
ensure_chapter_story_link_sync(
|
||
session, chapter_id=str(chapter.id), story_id=str(target_story_id)
|
||
)
|
||
sid_log = target_story_id
|
||
is_append = True
|
||
else:
|
||
story_title = _maybe_generate_title(
|
||
narrative_agent,
|
||
chapter_category=chapter_category,
|
||
md=md,
|
||
slot_snippets=slot_snippets,
|
||
user_profile=user_profile,
|
||
user_birth_year=user_birth_year,
|
||
llm=llm,
|
||
oral_scope=ut_norm,
|
||
)
|
||
st = create_story_with_version_sync(
|
||
session,
|
||
user_id=user_id,
|
||
title=story_title,
|
||
canonical_markdown=md,
|
||
stage=chapter_category,
|
||
)
|
||
dispatch_ids.add(str(st.id))
|
||
ensure_chapter_story_link_sync(
|
||
session, chapter_id=str(chapter.id), story_id=str(st.id)
|
||
)
|
||
sid_log = st.id
|
||
is_append = False
|
||
|
||
elapsed = time.perf_counter() - t0
|
||
logger.info(
|
||
"event=story_generated memoir_correlation_id={} route_type=batch "
|
||
"decision_source={} route_decision={} "
|
||
"unit_segments={} used_evidence={} narrative_json_valid={} fidelity_passed={} "
|
||
"fallback_type={} oral_len={} md_len={} chapter_category={} is_append={} "
|
||
"story_id={} seconds={:.3f} oral_normalize_changed={}",
|
||
memoir_correlation_id or "",
|
||
decision_source,
|
||
unit.decision,
|
||
len(unit.segment_ids),
|
||
bool(evidence_text.strip()),
|
||
_is_json_narrative(raw_gen),
|
||
fb_gate == "none",
|
||
fallback_type,
|
||
len(ut_norm),
|
||
len(md.strip()),
|
||
chapter_category,
|
||
is_append,
|
||
sid_log,
|
||
elapsed,
|
||
ut_raw != ut_norm,
|
||
)
|
||
return dispatch_ids
|
||
|
||
|
||
def run_story_pipeline_for_category_batch(
|
||
session: Session,
|
||
*,
|
||
user_id: str,
|
||
chapter_category: str,
|
||
category_segments: list,
|
||
state: MemoirStateSchema,
|
||
user_profile: str,
|
||
user_birth_year: int | None,
|
||
llm: Any,
|
||
background_voice: str = "default",
|
||
occupation: str = "",
|
||
memoir_correlation_id: str | None = None,
|
||
) -> tuple[Chapter | None, bool, set[str]]:
|
||
"""
|
||
返回 (chapter, needs_cover_enqueue, story_ids_to_dispatch_after_commit)。
|
||
"""
|
||
narrative_agent = NarrativeAgent()
|
||
route_agent = StoryRouteAgent()
|
||
dispatch_ids: set[str] = set()
|
||
|
||
segment_texts = [seg.user_input_text or "" for seg in category_segments]
|
||
combined_text = "\n\n".join(segment_texts)
|
||
source_ids = [seg.id for seg in category_segments]
|
||
|
||
n_units = len(category_segments)
|
||
top_k = int(settings.evidence_top_k_default)
|
||
if n_units > int(settings.evidence_large_batch_threshold):
|
||
top_k = int(settings.evidence_top_k_large_batch)
|
||
try:
|
||
evidence = retrieve_evidence_sync(session, user_id, combined_text, top_k=top_k)
|
||
except Exception as e:
|
||
logger.warning("Evidence 检索跳过: {}", e)
|
||
evidence = {
|
||
"relevant_chunks": [],
|
||
"relevant_summaries": [],
|
||
"relevant_facts": [],
|
||
"timeline_hints": [],
|
||
"relevant_stories": [],
|
||
}
|
||
|
||
evidence_text = format_evidence_chunks_for_prompt(evidence)
|
||
oral_for_memoir = normalize_oral_for_memoir(combined_text, llm=llm)
|
||
ct_raw = (combined_text or "").strip()
|
||
om_norm = (oral_for_memoir or "").strip()
|
||
if ct_raw != om_norm:
|
||
logger.info(
|
||
"event=oral_normalized context=category_batch raw_len={} norm_len={}",
|
||
len(ct_raw),
|
||
len(om_norm),
|
||
)
|
||
new_content_input = format_narrative_user_content(oral_for_memoir, evidence_text)
|
||
logger.info(
|
||
"event=memoir_story_pipeline_start memoir_correlation_id={} user_id={} "
|
||
"chapter_category={} segment_count={}",
|
||
memoir_correlation_id or "",
|
||
user_id,
|
||
chapter_category,
|
||
len(category_segments),
|
||
)
|
||
|
||
stmt_chapter = (
|
||
select(Chapter)
|
||
.where(
|
||
Chapter.user_id == user_id,
|
||
Chapter.category == chapter_category,
|
||
Chapter.is_active == True, # noqa: E712
|
||
)
|
||
.options(
|
||
joinedload(Chapter.images),
|
||
joinedload(Chapter.story_links),
|
||
)
|
||
)
|
||
chapter = session.execute(stmt_chapter).unique().scalar_one_or_none()
|
||
|
||
slot_snippets = _slot_snippets_for_narrative(
|
||
state=state,
|
||
chapter_category=chapter_category,
|
||
user_id=user_id,
|
||
)
|
||
|
||
title = chapter.title if chapter else _placeholder_title(chapter_category)
|
||
|
||
# 仅同 chapter_category(story.stage)的 Story 可作为 append 候选,避免跨章节链接导致多章内容相同
|
||
all_stories = list_active_stories_for_user_sync(session, user_id)
|
||
candidates = [s for s in all_stories if s.stage == chapter_category]
|
||
valid_ids = {str(s.id) for s in candidates}
|
||
story_meta = _story_meta_for_route(session, candidates)
|
||
|
||
# Story route 仅依据本批用户口述;evidence 只进入叙事/合并,不参与 new/append 判定。
|
||
route_transcript = oral_for_memoir
|
||
|
||
calculated_order_index = STAGE_TO_ORDER.get(chapter_category, 999)
|
||
|
||
use_batch_plan = (
|
||
llm
|
||
and len(category_segments) >= 2
|
||
and len(category_segments) <= PLAN_BATCH_MAX_SEGMENTS
|
||
)
|
||
plan: StoryBatchPlan | None = None
|
||
if use_batch_plan:
|
||
segs = _route_segment_texts(category_segments)
|
||
plan = route_agent.plan_batch(
|
||
chapter_category=chapter_category,
|
||
chapter_title=title,
|
||
segments=segs,
|
||
candidate_stories=candidates,
|
||
llm=llm,
|
||
valid_story_ids=valid_ids,
|
||
story_meta=story_meta,
|
||
)
|
||
|
||
chapter = _ensure_chapter_record(
|
||
session,
|
||
user_id=user_id,
|
||
chapter_category=chapter_category,
|
||
title=title,
|
||
source_ids=source_ids,
|
||
calculated_order_index=calculated_order_index,
|
||
)
|
||
|
||
if plan is not None:
|
||
dispatch_ids = _run_batch_plan_writes(
|
||
session,
|
||
plan=plan,
|
||
category_segments=category_segments,
|
||
chapter=chapter,
|
||
chapter_category=chapter_category,
|
||
evidence_text=evidence_text,
|
||
slot_snippets=slot_snippets,
|
||
user_id=user_id,
|
||
user_profile=user_profile,
|
||
user_birth_year=user_birth_year,
|
||
llm=llm,
|
||
narrative_agent=narrative_agent,
|
||
background_voice=background_voice,
|
||
occupation=occupation,
|
||
memoir_correlation_id=memoir_correlation_id,
|
||
)
|
||
else:
|
||
route = route_agent.decide(
|
||
chapter_category=chapter_category,
|
||
chapter_title=title,
|
||
batch_transcript=route_transcript,
|
||
candidate_stories=candidates,
|
||
llm=llm,
|
||
valid_story_ids=valid_ids,
|
||
story_meta=story_meta,
|
||
)
|
||
|
||
t0 = time.perf_counter()
|
||
target_story_id: str | None = None
|
||
existing_for_narrative = ""
|
||
decision_source = "fallback_no_llm" if not llm else "single_decide"
|
||
max_chars = int(settings.story_append_max_canonical_chars)
|
||
max_ver = int(settings.story_append_max_versions)
|
||
if route.decision == "append_story" and route.target_story_id:
|
||
st = session.get(Story, route.target_story_id)
|
||
if st and st.user_id == user_id:
|
||
canon = (st.canonical_markdown or "").strip()
|
||
vc = count_story_versions_sync(session, str(st.id))
|
||
if len(canon) > max_chars or vc >= max_ver:
|
||
logger.info(
|
||
"event=append_overflow_to_new story_id={} canonical_chars={} "
|
||
"versions={} decision_source=single_decide",
|
||
str(st.id),
|
||
len(canon),
|
||
vc,
|
||
)
|
||
target_story_id = None
|
||
existing_for_narrative = ""
|
||
decision_source = "forced_new_due_to_append_limit"
|
||
else:
|
||
target_story_id = st.id
|
||
existing_for_narrative = canon
|
||
|
||
raw_gen = narrative_agent.generate_narrative(
|
||
stage=chapter_category,
|
||
slots=slot_snippets,
|
||
new_content=new_content_input,
|
||
existing_content=existing_for_narrative,
|
||
user_profile=user_profile,
|
||
birth_year=user_birth_year,
|
||
llm=llm,
|
||
background_voice=background_voice,
|
||
occupation=occupation,
|
||
fallback_plain_oral=om_norm,
|
||
)
|
||
json_invalid = False
|
||
s0 = (raw_gen or "").strip()
|
||
if s0.startswith("{") and "paragraphs" in s0:
|
||
try:
|
||
json.loads(s0)
|
||
except json.JSONDecodeError:
|
||
json_invalid = True
|
||
|
||
narrative_raw, fb_gate = _gate_narrative_fidelity(
|
||
oral_for_memoir,
|
||
raw_gen,
|
||
llm,
|
||
existing_canonical=existing_for_narrative or None,
|
||
)
|
||
|
||
narrative_raw, fb_apply = _apply_narrative_fallbacks(
|
||
narrative_raw,
|
||
oral_for_memoir,
|
||
existing_for_narrative,
|
||
chapter_category=chapter_category,
|
||
)
|
||
fallback_type = _merge_fallback_type(fb_gate, fb_apply)
|
||
if json_invalid and fallback_type == "none":
|
||
fallback_type = "json_invalid"
|
||
|
||
md = _coalesce_story_markdown(
|
||
narrative_to_markdown(narrative_raw).strip(),
|
||
oral_for_memoir.strip(),
|
||
existing_for_narrative or "",
|
||
)
|
||
md, inv_fb = _apply_narrative_body_safety(
|
||
md,
|
||
oral=oral_for_memoir,
|
||
existing_for_narrative=existing_for_narrative or "",
|
||
evidence_text=evidence_text,
|
||
chapter_category=chapter_category,
|
||
)
|
||
if inv_fb != "none":
|
||
fallback_type = (
|
||
inv_fb if fallback_type == "none" else f"{fallback_type}+{inv_fb}"
|
||
)
|
||
|
||
do_append = target_story_id is not None
|
||
|
||
if do_append:
|
||
append_story_version_sync(session, str(target_story_id), md)
|
||
dispatch_ids.add(str(target_story_id))
|
||
ensure_chapter_story_link_sync(
|
||
session, chapter_id=str(chapter.id), story_id=str(target_story_id)
|
||
)
|
||
sid_log = target_story_id
|
||
is_append = True
|
||
else:
|
||
story_title = _maybe_generate_title(
|
||
narrative_agent,
|
||
chapter_category=chapter_category,
|
||
md=md,
|
||
slot_snippets=slot_snippets,
|
||
user_profile=user_profile,
|
||
user_birth_year=user_birth_year,
|
||
llm=llm,
|
||
oral_scope=om_norm,
|
||
)
|
||
st = create_story_with_version_sync(
|
||
session,
|
||
user_id=user_id,
|
||
title=story_title,
|
||
canonical_markdown=md,
|
||
stage=chapter_category,
|
||
)
|
||
dispatch_ids.add(str(st.id))
|
||
ensure_chapter_story_link_sync(
|
||
session, chapter_id=str(chapter.id), story_id=str(st.id)
|
||
)
|
||
sid_log = st.id
|
||
is_append = False
|
||
|
||
elapsed = time.perf_counter() - t0
|
||
logger.info(
|
||
"event=story_generated memoir_correlation_id={} route_type=single "
|
||
"decision_source={} route_decision={} "
|
||
"unit_segments={} used_evidence={} narrative_json_valid={} fidelity_passed={} "
|
||
"fallback_type={} oral_len={} md_len={} chapter_category={} is_append={} "
|
||
"story_id={} seconds={:.3f} oral_normalize_changed={}",
|
||
memoir_correlation_id or "",
|
||
decision_source,
|
||
route.decision,
|
||
len(category_segments),
|
||
bool(evidence_text.strip()),
|
||
_is_json_narrative(raw_gen),
|
||
fb_gate == "none",
|
||
fallback_type,
|
||
len(om_norm),
|
||
len(md.strip()),
|
||
chapter_category,
|
||
is_append,
|
||
sid_log,
|
||
elapsed,
|
||
ct_raw != om_norm,
|
||
)
|
||
|
||
reorder_chapter_story_links_by_life_order_sync(session, str(chapter.id))
|
||
mark_chapter_dirty_sync(session, str(chapter.id))
|
||
session.flush()
|
||
|
||
image_settings = MemoirImageSettings.from_env()
|
||
needs_cover = image_settings.enabled and chapter_needs_cover_enqueue(chapter)
|
||
|
||
return chapter, needs_cover, dispatch_ids
|