修复：CI 部署环境与 ref 错配、迁移碎片化、图片意图 source_span、章节物化脏版式、会话历史与本地语音不一致

新增：TTS 上传 COS 与分片、章节 reading_segments 物化与快照、markdown 清洗、会话消息 repository、语音 store 重构与相关测试
2026-03-20 16:36:42 +08:00
parent 7317bf10cd
commit 8af37e5e8e
65 changed files with 1704 additions and 504 deletions
--- a/api/app/features/memoir/chapter_markdown_compose.py
+++ b/api/app/features/memoir/chapter_markdown_compose.py
@@ -1,29 +1,16 @@
 """
 按 chapter_story_links 顺序将各 story 正文物化为单一 markdown（无 LLM）。
 保留 story 内 asset:// 引用不变。
+章节级 canonical：仅正文拼接，故事间用 ---；故事标题仅存 stories.title。
+PDF 导出可单独物化「## 标题 + 正文」版本。
 """

 from typing import Any

-
-def compose_ordered_stories_to_markdown(
-    ordered: list[tuple[str, str]],
-) -> str:
-    """
-    :param ordered: (story_title, canonical_markdown) 已按阅读顺序排好
-    :return: 章节级 markdown；每个故事为 ## 标题 + 正文，故事之间用 markdown 水平线 --- 分隔
-      （配图在 story 正文中，自然落在该故事块内、--- 之前）
-    """
-    parts: list[str] = []
-    for title, md in ordered:
-        title = (title or "").strip() or "故事"
-        body = (md or "").strip()
-        parts.append(f"## {title}\n\n{body}" if body else f"## {title}")
-    return "\n\n---\n\n".join(parts)
+from app.features.memoir.markdown_sanitize import sanitize_story_for_chapter_compose


-def materialize_chapter_markdown_from_loaded_chapter(chapter: Any) -> str:
-    """要求 chapter.story_links 已 eager-load，且各 link.story 可用。"""
+def _gather_title_body_pairs(chapter: Any) -> list[tuple[str, str]]:
    links = sorted(
        list(getattr(chapter, "story_links", None) or []),
        key=lambda x: getattr(x, "order_index", 0),
@@ -36,4 +23,45 @@ def materialize_chapter_markdown_from_loaded_chapter(chapter: Any) -> str:
        title = (getattr(st, "title", None) or "").strip()
        body = (getattr(st, "canonical_markdown", None) or "").strip()
        pairs.append((title, body))
-    return compose_ordered_stories_to_markdown(pairs)
+    return pairs
+
+
+def compose_ordered_stories_to_markdown(ordered: list[tuple[str, str]]) -> str:
+    """
+    :param ordered: (story_title, canonical_markdown) 已按阅读顺序排好（title 仅用于清洗去重）
+    :return: 章节级 markdown；仅各故事正文，非空块之间用 \\n\\n---\\n\\n 分隔
+    """
+    bodies: list[str] = []
+    for title, md in ordered:
+        raw = (md or "").strip()
+        if not raw:
+            continue
+        cleaned = sanitize_story_for_chapter_compose(raw, title)
+        if cleaned:
+            bodies.append(cleaned)
+    return "\n\n---\n\n".join(bodies)
+
+
+def compose_ordered_stories_to_pdf_markdown(ordered: list[tuple[str, str]]) -> str:
+    """PDF：每故事 ## 标题 + 正文，块间 ---（标题来自元数据，不写回章节 canonical）。"""
+    parts: list[str] = []
+    for title, md in ordered:
+        t = (title or "").strip() or "故事"
+        raw = (md or "").strip()
+        if not raw:
+            continue
+        body = sanitize_story_for_chapter_compose(raw, title)
+        if not body:
+            continue
+        parts.append(f"## {t}\n\n{body}")
+    return "\n\n---\n\n".join(parts)
+
+
+def materialize_chapter_markdown_from_loaded_chapter(chapter: Any) -> str:
+    """要求 chapter.story_links 已 eager-load，且各 link.story 可用。"""
+    return compose_ordered_stories_to_markdown(_gather_title_body_pairs(chapter))
+
+
+def materialize_chapter_pdf_markdown_from_loaded_chapter(chapter: Any) -> str:
+    """PDF 专用：含每段 ## 故事名。"""
+    return compose_ordered_stories_to_pdf_markdown(_gather_title_body_pairs(chapter))