feat(memoir): 路由阶段不要求标题，按正文字数门闸延迟 LLM 标题

- 从 story 路由 prompt/校验中移除 new_story_title，改由叙事管线在正文足够长时生成 - 新增 story_title_min_body_chars；短正文使用章节类别占位标题 - CATEGORY_TO_CHAT_STAGE 对齐访谈 state.slots 的 stage 键 - 删除相对口述长度的叙事回退，仅保留 merge JSON 极端缩水类 fallback - evidence_format：解析 object_json 并优化事实条目标点符号 - 更新 narrative / experience 相关单测
2026-04-02 14:38:40 +08:00
parent bb16d3a5c9
commit 3ae39838c0
8 changed files with 125 additions and 132 deletions
--- a/api/app/features/memoir/story_pipeline_sync.py
+++ b/api/app/features/memoir/story_pipeline_sync.py
@@ -19,7 +19,11 @@ from app.agents.memoir.prompts import (
    format_evidence_chunks_for_prompt,
    format_narrative_user_content,
 )
-from app.agents.stage_constants import STAGE_TO_ORDER
+from app.agents.stage_constants import (
+    CATEGORY_TO_CHAT_STAGE,
+    CHAPTER_CATEGORIES,
+    STAGE_TO_ORDER,
+)
 from app.agents.memoir.story_route_agent import (
    PLAN_BATCH_MAX_SEGMENTS,
    StoryBatchPlan,
@@ -53,6 +57,38 @@ from app.features.story.sync_write import (
 logger = get_logger(__name__)


+def _placeholder_title(chapter_category: str) -> str:
+    return CHAPTER_CATEGORIES.get(chapter_category, chapter_category)
+
+
+def _maybe_generate_title(
+    narrative_agent: "NarrativeAgent",
+    *,
+    chapter_category: str,
+    md: str,
+    slot_snippets: dict[str, str],
+    user_profile: str,
+    user_birth_year: int | None,
+    llm: Any,
+) -> str:
+    """Generate a title only when body is long enough; otherwise return placeholder."""
+    body_len = len((md or "").strip())
+    if body_len < settings.story_title_min_body_chars:
+        return _placeholder_title(chapter_category)
+    content_excerpt = (md or "").strip()[:300]
+    merged_slots = dict(slot_snippets)
+    if content_excerpt and "content_excerpt" not in merged_slots:
+        merged_slots["content_excerpt"] = content_excerpt
+    return narrative_agent.generate_title(
+        stage=chapter_category,
+        emotion="neutral",
+        slots=merged_slots,
+        user_profile=user_profile,
+        birth_year=user_birth_year,
+        llm=llm,
+    )
+
+
 def _route_segment_texts(category_segments: list) -> list[tuple[str, str]]:
    """批量路由 plan_batch：每段仅做规则归一，避免 N 次 LLM。"""
    out: list[tuple[str, str]] = []
@@ -122,28 +158,12 @@ def _gate_narrative_fidelity(
    return _fidelity_fallback_json(o, ex), "fidelity_failed"


-def _should_fallback_to_transcript(md: str, oral: str) -> bool:
-    """模型输出相对口述极度过短时才回退（仅防极端压缩如「1999」）。"""
-    o = (oral or "").strip()
-    if not o:
-        return False
-    m = (md or "").strip()
-    if not m:
-        return True
-    if len(o) < 12:
-        return len(m) < len(o)
-    ratio = float(settings.memoir_narrative_fallback_body_ratio)
-    min_abs = int(settings.memoir_narrative_fallback_min_chars)
-    threshold = max(min_abs, int(len(o) * ratio))
-    return len(m) < threshold
-
-
 def _coalesce_story_markdown(
    md: str,
    oral: str,
    existing_for_narrative: str,
 ) -> str:
-    """落库前对齐正文：空输出或过短回退时，续写场景保留「已有故事 + 本段口述」。"""
+    """落库前对齐正文：空输出时续写场景保留「已有故事 + 本段口述」。"""
    o = (oral or "").strip()
    ex = (existing_for_narrative or "").strip()
    m = (md or "").strip()
@@ -153,10 +173,6 @@ def _coalesce_story_markdown(
        if o:
            return o
        return ex
-    if o and _should_fallback_to_transcript(m, o):
-        if ex:
-            return f"{ex}\n\n{o}"
-        return o
    return m


@@ -181,8 +197,10 @@ def _apply_narrative_fallbacks(
    *,
    chapter_category: str,
 ) -> tuple[str, str]:
-    """返回 (文本, fallback_type)；无改写时为 none。"""
-    # 整篇合并（JSON）输出异常缩水：回退为旧文 + 本段口述，避免覆盖丢失
+    """返回 (文本, fallback_type)；无改写时为 none。
+
+    仅防 merge/append 场景下模型输出极端缩水（丢旧内容），不再按口述字数比例回退。
+    """
    if existing_for_narrative and _is_json_narrative(narrative_raw):
        merged_md = narrative_to_markdown(narrative_raw).strip()
        ex = (existing_for_narrative or "").strip()
@@ -209,28 +227,6 @@ def _apply_narrative_fallbacks(
            "coalesce_to_old_plus_oral",
        )

-    md_check = narrative_to_markdown(narrative_raw).strip()
-    oral = (combined_unit_text or "").strip()
-    ex_fb = (existing_for_narrative or "").strip()
-    if oral and _should_fallback_to_transcript(md_check, oral):
-        if ex_fb:
-            logger.warning(
-                "event=narrative_fallback reason=body_too_short_vs_oral_merge "
-                "chapter_category={} oral_len={} md_len={}",
-                chapter_category,
-                len(oral),
-                len(md_check),
-            )
-            return f"{ex_fb}\n\n{oral}", "coalesce_to_old_plus_oral"
-        logger.warning(
-            "event=narrative_fallback reason=body_too_short_vs_oral "
-            "chapter_category={} oral_len={} md_len={}",
-            chapter_category,
-            len(oral),
-            len(md_check),
-        )
-        return oral, "coalesce_to_oral"
-
    return narrative_raw, "none"


@@ -404,16 +400,15 @@ def _run_batch_plan_writes(
            sid_log = target_story_id
            is_append = True
        else:
-            story_title = (unit.new_story_title or "").strip()
-            if not story_title:
-                story_title = narrative_agent.generate_title(
-                    stage=chapter_category,
-                    emotion="neutral",
-                    slots=slot_snippets,
-                    user_profile=user_profile,
-                    birth_year=user_birth_year,
-                    llm=llm,
-                )
+            story_title = _maybe_generate_title(
+                narrative_agent,
+                chapter_category=chapter_category,
+                md=md,
+                slot_snippets=slot_snippets,
+                user_profile=user_profile,
+                user_birth_year=user_birth_year,
+                llm=llm,
+            )
            st = create_story_with_version_sync(
                session,
                user_id=user_id,
@@ -519,7 +514,8 @@ def run_story_pipeline_for_category_batch(
    chapter = session.execute(stmt_chapter).unique().scalar_one_or_none()

    slot_snippets: dict[str, str] = {}
-    stage_slots = state.slots.get(chapter_category, {}) or {}
+    chat_stage = CATEGORY_TO_CHAT_STAGE.get(chapter_category, chapter_category)
+    stage_slots = state.slots.get(chat_stage, {}) or {}
    for key, value in stage_slots.items():
        snip = getattr(value, "snippet", None) or (
            value.get("snippet") if isinstance(value, dict) else None
@@ -527,17 +523,7 @@ def run_story_pipeline_for_category_batch(
        if snip:
            slot_snippets[key] = snip

-    title = chapter.title if chapter else f"{chapter_category} 回忆"
-
-    if not chapter:
-        title = narrative_agent.generate_title(
-            stage=chapter_category,
-            emotion="neutral",
-            slots=slot_snippets,
-            user_profile=user_profile,
-            birth_year=user_birth_year,
-            llm=llm,
-        )
+    title = chapter.title if chapter else _placeholder_title(chapter_category)

    # 仅同 chapter_category（story.stage）的 Story 可作为 append 候选，避免跨章节链接导致多章内容相同
    all_stories = list_active_stories_for_user_sync(session, user_id)
@@ -684,16 +670,15 @@ def run_story_pipeline_for_category_batch(
            sid_log = target_story_id
            is_append = True
        else:
-            story_title = (route.new_story_title or "").strip()
-            if not story_title:
-                story_title = narrative_agent.generate_title(
-                    stage=chapter_category,
-                    emotion="neutral",
-                    slots=slot_snippets,
-                    user_profile=user_profile,
-                    birth_year=user_birth_year,
-                    llm=llm,
-                )
+            story_title = _maybe_generate_title(
+                narrative_agent,
+                chapter_category=chapter_category,
+                md=md,
+                slot_snippets=slot_snippets,
+                user_profile=user_profile,
+                user_birth_year=user_birth_year,
+                llm=llm,
+            )
            st = create_story_with_version_sync(
                session,
                user_id=user_id,