修复：CI 部署环境与 ref 错配、迁移碎片化、图片意图 source_span、章节物化脏版式、会话历史与本地语音不一致

新增：TTS 上传 COS 与分片、章节 reading_segments 物化与快照、markdown 清洗、会话消息 repository、语音 store 重构与相关测试
2026-03-20 16:36:42 +08:00
parent 7317bf10cd
commit 8af37e5e8e
65 changed files with 1704 additions and 504 deletions
--- a/api/app/features/memoir/reading_segment_materialize.py
+++ b/api/app/features/memoir/reading_segment_materialize.py
@@ -0,0 +1,188 @@
+"""
+章节阅读片段物化：与 canonical 同一生成时机写入 reading_segments_json（无签名 URL）；
+API 读时 hydrate 或（dirty / 无快照）回退为运行时物化。
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+from app.features.memoir.asset_resolver import (
+    collect_asset_ids_from_markdown,
+    resolve_asset_refs_in_markdown,
+)
+from app.features.memoir.markdown_sanitize import sanitize_story_for_chapter_compose
+from app.features.memoir.models import Chapter
+
+
+def _primary_story_intent_asset_id(story: Any) -> str | None:
+    for it in getattr(story, "image_intents", None) or []:
+        if getattr(it, "intent_role", None) == "primary":
+            aid = getattr(it, "asset_id", None)
+            return str(aid) if aid else None
+    return None
+
+
+def _cover_intent_snapshot_from_story(story: Any) -> dict | None:
+    """primary intent 元数据（无 url），供 JSON 持久化。"""
+    intents = getattr(story, "image_intents", None) or []
+    primary = None
+    for it in intents:
+        if getattr(it, "intent_role", None) == "primary":
+            primary = it
+            break
+    if not primary:
+        return None
+    aid = getattr(primary, "asset_id", None)
+    if not aid:
+        return None
+    status = getattr(primary, "status", None) or "pending"
+    return {
+        "asset_id": str(aid),
+        "status": status,
+        "description": getattr(primary, "caption", None) or "故事配图",
+        "prompt": getattr(primary, "prompt_brief", None),
+        "style": getattr(primary, "style_profile", None),
+        "error": getattr(primary, "error", None),
+        "created_at": primary.created_at.isoformat() if primary.created_at else None,
+        "updated_at": primary.updated_at.isoformat() if primary.updated_at else None,
+    }
+
+
+def _cover_dict_from_snapshot_row(
+    snap: dict[str, Any], asset_url_map: dict[str, str]
+) -> dict:
+    aid = snap.get("asset_id")
+    url = asset_url_map.get(str(aid)) if aid else None
+    return {
+        "placeholder": "",
+        "description": snap.get("description") or "故事配图",
+        "index": 0,
+        "status": snap.get("status") or "pending",
+        "prompt": snap.get("prompt"),
+        "url": url,
+        "storage_key": None,
+        "provider": None,
+        "style": snap.get("style"),
+        "size": None,
+        "error": snap.get("error"),
+        "retryable": None,
+        "created_at": snap.get("created_at"),
+        "updated_at": snap.get("updated_at"),
+    }
+
+
+def build_reading_segments_snapshot(ch: Chapter) -> list[dict[str, Any]]:
+    """
+    物化阅读片段快照：body 保留 asset://；cover 仅存 intent 元数据（正文已含同 asset 则省略）。
+    与 append_chapter_compose_version 同路径写入。
+    """
+    links = sorted(
+        list(getattr(ch, "story_links", None) or []),
+        key=lambda x: getattr(x, "order_index", 0),
+    )
+    out: list[dict[str, Any]] = []
+    for link in links:
+        st = getattr(link, "story", None)
+        if st is None:
+            continue
+        title = (getattr(st, "title", None) or "").strip()
+        raw = (getattr(st, "canonical_markdown", None) or "").strip()
+        body = sanitize_story_for_chapter_compose(raw, title)
+        if not body:
+            continue
+        primary_aid = _primary_story_intent_asset_id(st)
+        inline_ids = set(collect_asset_ids_from_markdown(body))
+        cover: dict | None = None
+        if primary_aid and primary_aid not in inline_ids:
+            cover = _cover_intent_snapshot_from_story(st)
+        out.append(
+            {
+                "story_id": st.id,
+                "body_markdown": body,
+                "cover_image": cover,
+            }
+        )
+    return out
+
+
+def materialize_chapter_reading_segments(
+    ch: Chapter,
+    asset_url_map: dict[str, str] | None = None,
+) -> list[dict[str, Any]]:
+    """运行时物化（解析签名 URL），与旧 build_reading_segments 行为一致。"""
+    from app.features.memoir import helpers as h
+
+    asset_url_map = asset_url_map or {}
+    resolve = lambda aid: asset_url_map.get(aid)  # noqa: E731
+    links = sorted(
+        list(getattr(ch, "story_links", None) or []),
+        key=lambda x: getattr(x, "order_index", 0),
+    )
+    segments: list[dict[str, Any]] = []
+    for link in links:
+        st = getattr(link, "story", None)
+        if st is None:
+            continue
+        title = (getattr(st, "title", None) or "").strip()
+        raw = (getattr(st, "canonical_markdown", None) or "").strip()
+        body = sanitize_story_for_chapter_compose(raw, title)
+        if not body:
+            continue
+        body_md = resolve_asset_refs_in_markdown(body, resolve)
+        img_raw = h.story_primary_cover_image_dict(st, asset_url_map=asset_url_map)
+        primary_aid = _primary_story_intent_asset_id(st)
+        inline_ids = set(collect_asset_ids_from_markdown(body))
+        if img_raw and primary_aid and primary_aid in inline_ids:
+            img_raw = None
+        img_norm = h.first_normalized_image_for_api(img_raw) if img_raw else None
+        segments.append(
+            {
+                "story_id": st.id,
+                "body_markdown": body_md,
+                "cover_image": img_norm,
+            }
+        )
+    return segments
+
+
+def hydrate_reading_segments_from_snapshot(
+    ch: Chapter,
+    asset_url_map: dict[str, str] | None = None,
+) -> list[dict[str, Any]]:
+    """将持久化快照解析为 API 形态（签名 URL）。"""
+    from app.features.memoir import helpers as h
+
+    asset_url_map = asset_url_map or {}
+    resolve = lambda aid: asset_url_map.get(aid)  # noqa: E731
+    rows = getattr(ch, "reading_segments_json", None) or []
+    out: list[dict[str, Any]] = []
+    for row in rows:
+        body = resolve_asset_refs_in_markdown(row["body_markdown"], resolve)
+        ci = row.get("cover_image")
+        if ci:
+            img_raw = _cover_dict_from_snapshot_row(ci, asset_url_map)
+            img_norm = h.first_normalized_image_for_api(img_raw)
+        else:
+            img_norm = None
+        out.append(
+            {
+                "story_id": row["story_id"],
+                "body_markdown": body,
+                "cover_image": img_norm,
+            }
+        )
+    return out
+
+
+def resolve_reading_segments_for_chapter_detail(
+    ch: Chapter,
+    asset_url_map: dict[str, str] | None = None,
+) -> list[dict[str, Any]]:
+    """章节详情：dirty 或无快照列时运行时物化；否则 hydrate。"""
+    asset_url_map = asset_url_map or {}
+    dirty = getattr(ch, "markdown_compose_dirty", True)
+    has_snapshot = getattr(ch, "reading_segments_json", None) is not None
+    if has_snapshot and not dirty:
+        return hydrate_reading_segments_from_snapshot(ch, asset_url_map=asset_url_map)
+    return materialize_chapter_reading_segments(ch, asset_url_map=asset_url_map)