feat(api+app): 对话阶段化、回忆录流水线与客户端会话体验

- DB: segments 用户输入文本（Alembic 0002） - Chat: 阶段检测/阶段提示/回复限制，编排与访谈/画像 prompts 调整 - Memoir: 忠实度检查 agent，叙事与分类等链路更新 - Core: agent 日志、Alembic 启动、LangChain/日志/配置等 - Story: time_hints；Memory 检索与相关测试 - Expo: 助手头像、会话页与消息拆分、实时会话与文案/i18n - Docs/scripts/tests: 迁移脚本、LLM JSON/记忆检索文档、新增单测
2026-03-26 12:13:36 +08:00
parent 49b089354c
commit a3f61fcc0f
94 changed files with 3332 additions and 672 deletions
--- a/api/app/features/memoir/story_pipeline_sync.py
+++ b/api/app/features/memoir/story_pipeline_sync.py
@@ -4,6 +4,7 @@ Celery 用：按批次将 transcript 写入 Story，并物化 Chapter canonical_

 from __future__ import annotations

+import json
 import uuid
 from typing import Any

@@ -29,7 +30,10 @@ from app.features.memoir.helpers import _chapter_markdown
 from app.features.memoir.memoir_images.settings import MemoirImageSettings
 from app.features.memoir.models import Chapter
 from app.features.memoir.narrative_to_markdown import narrative_to_markdown
-from app.features.memoir.repo import compose_chapter_from_story_links_sync
+from app.features.memoir.repo import (
+    compose_chapter_from_story_links_sync,
+    reorder_chapter_story_links_by_life_order_sync,
+)
 from app.features.memory.repo import retrieve_evidence_sync
 from app.features.story.models import Story
 from app.features.story.sync_write import (
@@ -42,6 +46,28 @@ from app.features.story.sync_write import (
 logger = get_logger(__name__)


+def _gate_narrative_fidelity(oral_text: str, narrative_raw: str, llm: Any) -> str:
+    """叙事 JSON 忠实度检查；不通过则回退为单段口述正文。"""
+    from app.agents.memoir.fidelity_check_agent import FidelityCheckAgent
+
+    if not settings.memoir_fidelity_check_enabled or not llm:
+        return narrative_raw
+    agent = FidelityCheckAgent()
+    if agent.passes(oral_text=oral_text, narrative_json=narrative_raw, llm=llm):
+        return narrative_raw
+    logger.warning(
+        "event=fidelity_gate_fallback oral_len={}",
+        len((oral_text or "").strip()),
+    )
+    o = (oral_text or "").strip()
+    if not o:
+        return narrative_raw
+    return json.dumps(
+        {"paragraphs": [{"content": o[:15000]}]},
+        ensure_ascii=False,
+    )
+
+
 def _should_fallback_to_transcript(md: str, oral: str) -> bool:
    """模型输出相对口述明显过短时回退为口述原文（防「1999」类压缩）。"""
    o = (oral or "").strip()
@@ -68,7 +94,7 @@ def _is_json_narrative(text: str) -> bool:
 def _ordered_text_for_segment_ids(
    category_segments: list, segment_ids: list[str]
 ) -> str:
-    id_to_text = {seg.id: (seg.transcript_text or "") for seg in category_segments}
+    id_to_text = {seg.id: (seg.user_input_text or "") for seg in category_segments}
    return "\n\n".join(id_to_text.get(sid, "") for sid in segment_ids)


@@ -80,12 +106,28 @@ def _apply_narrative_fallbacks(
    *,
    chapter_category: str,
 ) -> str:
+    # 整篇合并（JSON）输出异常缩水：回退为旧文 + 本段口述，避免覆盖丢失
+    if existing_for_narrative and _is_json_narrative(narrative_raw):
+        merged_md = narrative_to_markdown(narrative_raw).strip()
+        ex = (existing_for_narrative or "").strip()
+        if ex and len(ex) > 400 and len(merged_md) < len(ex) * 0.35:
+            logger.warning(
+                "event=narrative_fallback reason=merge_shrink action=append_oral "
+                "chapter_category={}",
+                chapter_category,
+            )
+            return f"{ex}\n\n{combined_unit_text.strip()}"
+
    if (
        existing_for_narrative
        and not _is_json_narrative(narrative_raw)
        and len(narrative_raw) < len(existing_for_narrative) * 0.8
    ):
-        logger.warning("叙事长度异常: 回退为原文追加")
+        logger.warning(
+            "event=narrative_fallback reason=length_anomaly action=append_raw "
+            "chapter_category={}",
+            chapter_category,
+        )
        return f"{existing_for_narrative}\n\n{combined_unit_text}"

    if (
@@ -95,7 +137,8 @@ def _apply_narrative_fallbacks(
        and len(narrative_raw) < len(existing_chapter_md) * 0.8
    ):
        logger.warning(
-            "章节级长度异常: 回退为 transcript 追加, category=%s",
+            "event=narrative_fallback reason=chapter_length_anomaly action=append_transcript "
+            "chapter_category={}",
            chapter_category,
        )
        return f"{existing_chapter_md}\n\n{combined_unit_text}"
@@ -104,7 +147,8 @@ def _apply_narrative_fallbacks(
    oral = (combined_unit_text or "").strip()
    if oral and _should_fallback_to_transcript(md_check, oral):
        logger.warning(
-            "叙事相对口述过短，回退为口述原文 category=%s oral_len=%s md_len=%s",
+            "event=narrative_fallback reason=body_too_short_vs_oral "
+            "chapter_category={} oral_len={} md_len={}",
            chapter_category,
            len(oral),
            len(md_check),
@@ -196,6 +240,7 @@ def _run_batch_plan_writes(
            birth_year=user_birth_year,
            llm=llm,
        )
+        narrative_raw = _gate_narrative_fidelity(unit_text, narrative_raw, llm)
        narrative_raw = _apply_narrative_fallbacks(
            narrative_raw,
            unit_text,
@@ -259,14 +304,14 @@ def run_story_pipeline_for_category_batch(
    route_agent = StoryRouteAgent()
    dispatch_ids: set[str] = set()

-    segment_texts = [seg.transcript_text or "" for seg in category_segments]
+    segment_texts = [seg.user_input_text or "" for seg in category_segments]
    combined_text = "\n\n".join(segment_texts)
    source_ids = [seg.id for seg in category_segments]

    try:
        evidence = retrieve_evidence_sync(session, user_id, combined_text, top_k=10)
    except Exception as e:
-        logger.warning("Evidence 检索跳过: %s", e)
+        logger.warning("Evidence 检索跳过: {}", e)
        evidence = {
            "relevant_chunks": [],
            "relevant_summaries": [],
@@ -332,7 +377,7 @@ def run_story_pipeline_for_category_batch(
    )
    plan: StoryBatchPlan | None = None
    if use_batch_plan:
-        segs = [(seg.id, seg.transcript_text or "") for seg in category_segments]
+        segs = [(seg.id, seg.user_input_text or "") for seg in category_segments]
        plan = route_agent.plan_batch(
            chapter_category=chapter_category,
            chapter_title=title,
@@ -394,6 +439,7 @@ def run_story_pipeline_for_category_batch(
            birth_year=user_birth_year,
            llm=llm,
        )
+        narrative_raw = _gate_narrative_fidelity(combined_text, narrative_raw, llm)

        narrative_raw = _apply_narrative_fallbacks(
            narrative_raw,
@@ -440,6 +486,7 @@ def run_story_pipeline_for_category_batch(
                session, chapter_id=chapter.id, story_id=st.id
            )

+    reorder_chapter_story_links_by_life_order_sync(session, chapter.id)
    compose_chapter_from_story_links_sync(session, chapter.id)
    session.flush()