api/app/features/memoir/pdf_service.py

"""
PDF 生成服务（从 services 迁入 memoir feature）
"""

from app.core.logging import get_logger
from io import BytesIO
from typing import List, Optional

import httpx
from PIL import Image
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.platypus import (
    Image as ReportLabImage,
    PageBreak,
    Paragraph,
    SimpleDocTemplate,
    Spacer,
)
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.cidfonts import UnicodeCIDFont

from app.features.memoir.asset_resolver import (
    collect_asset_ids_from_markdown,
    split_markdown_by_asset_refs,
    strip_legacy_image_placeholders,
)
from app.features.memoir.helpers import (
    _chapter_markdown,
    sections_to_content_and_images,
)
from app.features.memoir.memoir_images.parser import PLACEHOLDER_RE
from app.features.memoir.memoir_images.schema import (
    IMAGE_STATUS_COMPLETED,
    normalize_image_assets,
)
from app.features.memoir.memoir_images.storage import (
    CosDownloadUrlError,
    TencentCosStorageService,
    mark_image_delivery_unavailable,
    resolve_image_storage_key,
)

logger = get_logger(__name__)


def strip_image_placeholders(text: str) -> str:
    return PLACEHOLDER_RE.sub("", text or "").strip()


def split_content_blocks(content: str, images: list[dict]) -> list[dict]:
    blocks: list[dict] = []
    remaining = content
    for image in sorted(images or [], key=lambda item: item.get("index", 0)):
        placeholder = image.get("placeholder")
        if not placeholder or placeholder not in remaining:
            continue
        before, remaining = remaining.split(placeholder, 1)
        cleaned_before = strip_image_placeholders(before)
        if cleaned_before:
            blocks.append({"type": "text", "value": cleaned_before})
        if image.get("status") == IMAGE_STATUS_COMPLETED and image.get("url"):
            blocks.append({"type": "image", "url": image["url"]})
    cleaned_remaining = strip_image_placeholders(remaining)
    if cleaned_remaining:
        blocks.append({"type": "text", "value": cleaned_remaining})
    return blocks


def _prepare_pdf_image_assets(images: list[dict]) -> list[dict]:
    storage = TencentCosStorageService.from_env()
    prepared_assets: list[dict] = []
    for item in normalize_image_assets(images):
        asset = dict(item)
        storage_key = resolve_image_storage_key(asset)
        if asset.get("status") == IMAGE_STATUS_COMPLETED and storage_key:
            try:
                asset["url"] = storage.get_download_url(storage_key)
            except CosDownloadUrlError as exc:
                logger.warning(
                    "PDF 图片签名失败: key=%s, retryable=%s, request_id=%s, error=%s",
                    storage_key,
                    exc.retryable,
                    exc.request_id,
                    exc,
                )
                asset = mark_image_delivery_unavailable(asset)
            except Exception as exc:
                logger.warning("PDF 图片签名失败: key=%s, error=%s", storage_key, exc)
                asset = mark_image_delivery_unavailable(asset)
        prepared_assets.append(asset)
    return prepared_assets


def _fit_image_size(
    image_bytes: bytes, max_width: float, max_height: float
) -> tuple[float, float]:
    with Image.open(BytesIO(image_bytes)) as image:
        width, height = image.size
    if width <= 0 or height <= 0:
        return max_width, max_height
    scale = min(max_width / width, max_height / height)
    return width * scale, height * scale


class PDFService:
    def __init__(self):
        try:
            pdfmetrics.registerFont(UnicodeCIDFont("STSong-Light"))
            self.chinese_font = "STSong-Light"
        except Exception:
            self.chinese_font = "Helvetica"

    async def _fetch_image_bytes(self, url: str) -> bytes | None:
        try:
            async with httpx.AsyncClient(timeout=30) as client:
                response = await client.get(url)
                response.raise_for_status()
                return response.content
        except Exception as exc:
            logger.warning("PDF 图片下载失败: url=%s, error=%s", url, exc)
            return None

    async def generate_pdf(
        self,
        book,
        chapters: List,
        asset_url_map: Optional[dict[str, str]] = None,
    ) -> bytes:
        buffer = BytesIO()
        doc = SimpleDocTemplate(buffer, pagesize=A4)
        styles = getSampleStyleSheet()
        title_style = ParagraphStyle(
            "CustomTitle",
            parent=styles["Heading1"],
            fontSize=24,
            spaceAfter=30,
            alignment=1,
            fontName=self.chinese_font,
        )
        heading_style = ParagraphStyle(
            "CustomHeading",
            parent=styles["Heading1"],
            fontSize=18,
            spaceAfter=12,
            fontName=self.chinese_font,
        )
        normal_style = ParagraphStyle(
            "CustomNormal",
            parent=styles["Normal"],
            fontSize=12,
            leading=18,
            fontName=self.chinese_font,
        )
        story = []
        story.append(Paragraph(book.title, title_style))
        story.append(Spacer(1, 0.5 * inch))
        story.append(PageBreak())
        story.append(Paragraph("目录", heading_style))
        story.append(Spacer(1, 0.2 * inch))
        for i, chapter in enumerate(chapters, 1):
            story.append(Paragraph(f"{i}. {chapter.title}", normal_style))
        story.append(PageBreak())
        for chapter in chapters:
            story.append(Paragraph(chapter.title, heading_style))
            story.append(Spacer(1, 0.2 * inch))
            # 正文真源：canonical_markdown（与 API / 前端一致）
            markdown = _chapter_markdown(chapter)
            _, images_list = sections_to_content_and_images(chapter)
            if not markdown:
                markdown = getattr(chapter, "content", "") or ""
            if not images_list:
                images_list = list(getattr(chapter, "images", None) or [])
            prepared_images = _prepare_pdf_image_assets(images_list)
            blocks: list[dict]
            if asset_url_map and collect_asset_ids_from_markdown(markdown):
                blocks = split_markdown_by_asset_refs(
                    markdown,
                    lambda aid: asset_url_map.get(aid) if asset_url_map else None,
                )
                for b in blocks:
                    if b.get("type") == "text":
                        b["value"] = strip_legacy_image_placeholders(
                            b.get("value") or ""
                        )
            else:
                blocks = split_content_blocks(markdown, prepared_images)
            for block in blocks:
                if block["type"] == "text":
                    paragraphs = block["value"].split("\n\n")
                    for para in paragraphs:
                        if para.strip():
                            story.append(Paragraph(para.strip(), normal_style))
                            story.append(Spacer(1, 0.1 * inch))
                elif block["type"] == "image":
                    image_bytes = await self._fetch_image_bytes(block["url"])
                    if image_bytes:
                        try:
                            width, height = _fit_image_size(
                                image_bytes,
                                max_width=5 * inch,
                                max_height=3.75 * inch,
                            )
                            img = ReportLabImage(
                                BytesIO(image_bytes), width=width, height=height
                            )
                            story.append(img)
                            story.append(Spacer(1, 0.2 * inch))
                        except Exception as exc:
                            logger.warning("PDF 图片嵌入失败: %s", exc)
            story.append(PageBreak())
        doc.build(story)
        buffer.seek(0)
        return buffer.read()


pdf_service = PDFService()
-												添加API服务模块

											
										
										
											2026-01-07 11:56:46 +08:00
+								"""
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								PDF 生成服务（从 services 迁入 memoir feature）
-												添加API服务模块

											
										
										
											2026-01-07 11:56:46 +08:00
+								"""
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								from app.core.logging import get_logger
-												fix: fix various issues before merging

											
										
										
											2026-03-11 11:27:32 +08:00
+								from io import BytesIO
-												重构回忆录为 story-first / markdown-first 架构并整合图片意图与前端 UI 修复

本次 squash merge 将 codex-story-first-image-intent 的整体改动合入 development，核心内容包括：

1. 后端数据与迁移：新增 stories、story_versions、story_image_intents、chapter_cover_intents、assets 等模型与 Alembic 迁移，建立 story-first、markdown-first、asset-first 的主数据链路。

2. 生成与任务链：引入 StoryBuilderOrchestrator、ChapterComposerOrchestrator、story_image_tasks、chapter_cover_tasks，图片生成从正文占位符改为结构化 intent -> asset -> markdown 回填。

3. 并发与一致性：为 story/chapter intent 增加 claim_token、claimed_at、attempt_count，采用数据库原子 claim 为主、Redis 锁为辅，避免重复生成、锁误删和 processing 卡死。

4. Memoir 读写路径：章节 canonical_markdown 成为正文真源，列表/详情接口补齐 markdown、cover_asset、word_count 等字段，PDF 与 asset 解析链路同步升级。

5. Memory / Retrieval：扩展 transcript ingest、chunking、evidence 检索与 story 聚合基础设施，为后续 story-first RAG 与多 agent 编排提供底座。

6. App 端体验：章节页继续走 MarkdownRenderer 阅读链，同时吸收 fix3-19 的跨平台 UI glitch 修复；更新对话页、首页、文案资源与章节列表映射逻辑。

7. 测试与文档：补充 asset resolver、story image task、章节封面派发、markdown 映射等回归测试，并加入图片占位符退役设计文档。

											
										
										
											2026-03-20 10:30:07 +08:00
+								from typing import List, Optional
-												feat(api): embed memoir chapter images in PDF export and strip placeholders

Made-with: Cursor

											
										
										
											2026-03-10 16:06:09 +08:00
 								import httpx
-												fix: fix various issues before merging

											
										
										
											2026-03-11 11:27:32 +08:00
+								from PIL import Image
-												feat(api): embed memoir chapter images in PDF export and strip placeholders

Made-with: Cursor

											
										
										
											2026-03-10 16:06:09 +08:00
+								from reportlab.lib.pagesizes import A4
-												添加API服务模块

											
										
										
											2026-01-07 11:56:46 +08:00
+								from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
 								from reportlab.lib.units import inch
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								from reportlab.platypus import (
 								    Image as ReportLabImage,
 								    PageBreak,
 								    Paragraph,
 								    SimpleDocTemplate,
 								    Spacer,
 								)
-												添加API服务模块

											
										
										
											2026-01-07 11:56:46 +08:00
+								from reportlab.pdfbase import pdfmetrics
 								from reportlab.pdfbase.cidfonts import UnicodeCIDFont
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
-												重构回忆录为 story-first / markdown-first 架构并整合图片意图与前端 UI 修复

本次 squash merge 将 codex-story-first-image-intent 的整体改动合入 development，核心内容包括：

1. 后端数据与迁移：新增 stories、story_versions、story_image_intents、chapter_cover_intents、assets 等模型与 Alembic 迁移，建立 story-first、markdown-first、asset-first 的主数据链路。

2. 生成与任务链：引入 StoryBuilderOrchestrator、ChapterComposerOrchestrator、story_image_tasks、chapter_cover_tasks，图片生成从正文占位符改为结构化 intent -> asset -> markdown 回填。

3. 并发与一致性：为 story/chapter intent 增加 claim_token、claimed_at、attempt_count，采用数据库原子 claim 为主、Redis 锁为辅，避免重复生成、锁误删和 processing 卡死。

4. Memoir 读写路径：章节 canonical_markdown 成为正文真源，列表/详情接口补齐 markdown、cover_asset、word_count 等字段，PDF 与 asset 解析链路同步升级。

5. Memory / Retrieval：扩展 transcript ingest、chunking、evidence 检索与 story 聚合基础设施，为后续 story-first RAG 与多 agent 编排提供底座。

6. App 端体验：章节页继续走 MarkdownRenderer 阅读链，同时吸收 fix3-19 的跨平台 UI glitch 修复；更新对话页、首页、文案资源与章节列表映射逻辑。

7. 测试与文档：补充 asset resolver、story image task、章节封面派发、markdown 映射等回归测试，并加入图片占位符退役设计文档。

											
										
										
											2026-03-20 10:30:07 +08:00
+								from app.features.memoir.asset_resolver import (
 								    collect_asset_ids_from_markdown,
 								    split_markdown_by_asset_refs,
 								    strip_legacy_image_placeholders,
 								)
 								from app.features.memoir.helpers import (
 								    _chapter_markdown,
 								    sections_to_content_and_images,
 								)
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								from app.features.memoir.memoir_images.parser import PLACEHOLDER_RE
 								from app.features.memoir.memoir_images.schema import (
 								    IMAGE_STATUS_COMPLETED,
 								    normalize_image_assets,
 								)
 								from app.features.memoir.memoir_images.storage import (
-												修复回忆录图片重试状态透传与前端展示

											
										
										
											2026-03-11 15:20:59 +08:00
+								    CosDownloadUrlError,
 								    TencentCosStorageService,
 								    mark_image_delivery_unavailable,
 								    resolve_image_storage_key,
 								)
-												feat(api): embed memoir chapter images in PDF export and strip placeholders

Made-with: Cursor

											
										
										
											2026-03-10 16:06:09 +08:00
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								logger = get_logger(__name__)
-												feat(api): embed memoir chapter images in PDF export and strip placeholders

Made-with: Cursor

											
										
										
											2026-03-10 16:06:09 +08:00
 								def strip_image_placeholders(text: str) -> str:
 								    return PLACEHOLDER_RE.sub("", text or "").strip()
 								def split_content_blocks(content: str, images: list[dict]) -> list[dict]:
 								    blocks: list[dict] = []
 								    remaining = content
 								    for image in sorted(images or [], key=lambda item: item.get("index", 0)):
 								        placeholder = image.get("placeholder")
 								        if not placeholder or placeholder not in remaining:
 								            continue
 								        before, remaining = remaining.split(placeholder, 1)
 								        cleaned_before = strip_image_placeholders(before)
 								        if cleaned_before:
 								            blocks.append({"type": "text", "value": cleaned_before})
-												fix: fix various issues before merging

											
										
										
											2026-03-11 11:27:32 +08:00
+								        if image.get("status") == IMAGE_STATUS_COMPLETED and image.get("url"):
-												feat(api): embed memoir chapter images in PDF export and strip placeholders

Made-with: Cursor

											
										
										
											2026-03-10 16:06:09 +08:00
+								            blocks.append({"type": "image", "url": image["url"]})
 								    cleaned_remaining = strip_image_placeholders(remaining)
 								    if cleaned_remaining:
 								        blocks.append({"type": "text", "value": cleaned_remaining})
 								    return blocks
-												添加API服务模块

											
										
										
											2026-01-07 11:56:46 +08:00
-												Fix memoir image delivery and Android rendering

											
										
										
											2026-03-11 10:06:12 +08:00
+								def _prepare_pdf_image_assets(images: list[dict]) -> list[dict]:
 								    storage = TencentCosStorageService.from_env()
 								    prepared_assets: list[dict] = []
-												fix: fix various issues before merging

											
										
										
											2026-03-11 11:27:32 +08:00
+								    for item in normalize_image_assets(images):
-												Fix memoir image delivery and Android rendering

											
										
										
											2026-03-11 10:06:12 +08:00
+								        asset = dict(item)
 								        storage_key = resolve_image_storage_key(asset)
-												fix: fix various issues before merging

											
										
										
											2026-03-11 11:27:32 +08:00
+								        if asset.get("status") == IMAGE_STATUS_COMPLETED and storage_key:
-												Fix memoir image delivery and Android rendering

											
										
										
											2026-03-11 10:06:12 +08:00
+								            try:
 								                asset["url"] = storage.get_download_url(storage_key)
-												修复回忆录图片重试状态透传与前端展示

											
										
										
											2026-03-11 15:20:59 +08:00
+								            except CosDownloadUrlError as exc:
 								                logger.warning(
 								                    "PDF 图片签名失败: key=%s, retryable=%s, request_id=%s, error=%s",
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								                    storage_key,
 								                    exc.retryable,
 								                    exc.request_id,
 								                    exc,
-												修复回忆录图片重试状态透传与前端展示

											
										
										
											2026-03-11 15:20:59 +08:00
+								                )
 								                asset = mark_image_delivery_unavailable(asset)
-												Fix memoir image delivery and Android rendering

											
										
										
											2026-03-11 10:06:12 +08:00
+								            except Exception as exc:
-												修复回忆录图片重试状态透传与前端展示

											
										
										
											2026-03-11 15:20:59 +08:00
+								                logger.warning("PDF 图片签名失败: key=%s, error=%s", storage_key, exc)
 								                asset = mark_image_delivery_unavailable(asset)
-												Fix memoir image delivery and Android rendering

											
										
										
											2026-03-11 10:06:12 +08:00
+								        prepared_assets.append(asset)
 								    return prepared_assets
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								def _fit_image_size(
 								    image_bytes: bytes, max_width: float, max_height: float
 								) -> tuple[float, float]:
-												fix: fix various issues before merging

											
										
										
											2026-03-11 11:27:32 +08:00
+								    with Image.open(BytesIO(image_bytes)) as image:
 								        width, height = image.size
 								    if width <= 0 or height <= 0:
 								        return max_width, max_height
 								    scale = min(max_width / width, max_height / height)
 								    return width * scale, height * scale
-												添加API服务模块

											
										
										
											2026-01-07 11:56:46 +08:00
+								class PDFService:
 								    def __init__(self):
 								        try:
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								            pdfmetrics.registerFont(UnicodeCIDFont("STSong-Light"))
 								            self.chinese_font = "STSong-Light"
-												添加API服务模块

											
										
										
											2026-01-07 11:56:46 +08:00
+								        except Exception:
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								            self.chinese_font = "Helvetica"
-												feat(api): embed memoir chapter images in PDF export and strip placeholders

Made-with: Cursor

											
										
										
											2026-03-10 16:06:09 +08:00
 								    async def _fetch_image_bytes(self, url: str) -> bytes | None:
 								        try:
 								            async with httpx.AsyncClient(timeout=30) as client:
 								                response = await client.get(url)
 								                response.raise_for_status()
 								                return response.content
 								        except Exception as exc:
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								            logger.warning("PDF 图片下载失败: url=%s, error=%s", url, exc)
-												feat(api): embed memoir chapter images in PDF export and strip placeholders

Made-with: Cursor

											
										
										
											2026-03-10 16:06:09 +08:00
+								            return None
-												重构回忆录为 story-first / markdown-first 架构并整合图片意图与前端 UI 修复

本次 squash merge 将 codex-story-first-image-intent 的整体改动合入 development，核心内容包括：

1. 后端数据与迁移：新增 stories、story_versions、story_image_intents、chapter_cover_intents、assets 等模型与 Alembic 迁移，建立 story-first、markdown-first、asset-first 的主数据链路。

2. 生成与任务链：引入 StoryBuilderOrchestrator、ChapterComposerOrchestrator、story_image_tasks、chapter_cover_tasks，图片生成从正文占位符改为结构化 intent -> asset -> markdown 回填。

3. 并发与一致性：为 story/chapter intent 增加 claim_token、claimed_at、attempt_count，采用数据库原子 claim 为主、Redis 锁为辅，避免重复生成、锁误删和 processing 卡死。

4. Memoir 读写路径：章节 canonical_markdown 成为正文真源，列表/详情接口补齐 markdown、cover_asset、word_count 等字段，PDF 与 asset 解析链路同步升级。

5. Memory / Retrieval：扩展 transcript ingest、chunking、evidence 检索与 story 聚合基础设施，为后续 story-first RAG 与多 agent 编排提供底座。

6. App 端体验：章节页继续走 MarkdownRenderer 阅读链，同时吸收 fix3-19 的跨平台 UI glitch 修复；更新对话页、首页、文案资源与章节列表映射逻辑。

7. 测试与文档：补充 asset resolver、story image task、章节封面派发、markdown 映射等回归测试，并加入图片占位符退役设计文档。

											
										
										
											2026-03-20 10:30:07 +08:00
+								    async def generate_pdf(
 								        self,
 								        book,
 								        chapters: List,
 								        asset_url_map: Optional[dict[str, str]] = None,
 								    ) -> bytes:
-												添加API服务模块

											
										
										
											2026-01-07 11:56:46 +08:00
+								        buffer = BytesIO()
 								        doc = SimpleDocTemplate(buffer, pagesize=A4)
 								        styles = getSampleStyleSheet()
 								        title_style = ParagraphStyle(
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								            "CustomTitle",
 								            parent=styles["Heading1"],
-												添加API服务模块

											
										
										
											2026-01-07 11:56:46 +08:00
+								            fontSize=24,
 								            spaceAfter=30,
-												feat(api): embed memoir chapter images in PDF export and strip placeholders

Made-with: Cursor

											
										
										
											2026-03-10 16:06:09 +08:00
+								            alignment=1,
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								            fontName=self.chinese_font,
-												添加API服务模块

											
										
										
											2026-01-07 11:56:46 +08:00
+								        )
 								        heading_style = ParagraphStyle(
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								            "CustomHeading",
 								            parent=styles["Heading1"],
-												添加API服务模块

											
										
										
											2026-01-07 11:56:46 +08:00
+								            fontSize=18,
 								            spaceAfter=12,
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								            fontName=self.chinese_font,
-												添加API服务模块

											
										
										
											2026-01-07 11:56:46 +08:00
+								        )
 								        normal_style = ParagraphStyle(
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								            "CustomNormal",
 								            parent=styles["Normal"],
-												添加API服务模块

											
										
										
											2026-01-07 11:56:46 +08:00
+								            fontSize=12,
 								            leading=18,
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								            fontName=self.chinese_font,
-												添加API服务模块

											
										
										
											2026-01-07 11:56:46 +08:00
+								        )
 								        story = []
 								        story.append(Paragraph(book.title, title_style))
-												feat(api): embed memoir chapter images in PDF export and strip placeholders

Made-with: Cursor

											
										
										
											2026-03-10 16:06:09 +08:00
+								        story.append(Spacer(1, 0.5 * inch))
-												添加API服务模块

											
										
										
											2026-01-07 11:56:46 +08:00
+								        story.append(PageBreak())
 								        story.append(Paragraph("目录", heading_style))
-												feat(api): embed memoir chapter images in PDF export and strip placeholders

Made-with: Cursor

											
										
										
											2026-03-10 16:06:09 +08:00
+								        story.append(Spacer(1, 0.2 * inch))
-												添加API服务模块

											
										
										
											2026-01-07 11:56:46 +08:00
+								        for i, chapter in enumerate(chapters, 1):
 								            story.append(Paragraph(f"{i}. {chapter.title}", normal_style))
 								        story.append(PageBreak())
 								        for chapter in chapters:
 								            story.append(Paragraph(chapter.title, heading_style))
-												feat(api): embed memoir chapter images in PDF export and strip placeholders

Made-with: Cursor

											
										
										
											2026-03-10 16:06:09 +08:00
+								            story.append(Spacer(1, 0.2 * inch))
-												重构回忆录为 story-first / markdown-first 架构并整合图片意图与前端 UI 修复

本次 squash merge 将 codex-story-first-image-intent 的整体改动合入 development，核心内容包括：

1. 后端数据与迁移：新增 stories、story_versions、story_image_intents、chapter_cover_intents、assets 等模型与 Alembic 迁移，建立 story-first、markdown-first、asset-first 的主数据链路。

2. 生成与任务链：引入 StoryBuilderOrchestrator、ChapterComposerOrchestrator、story_image_tasks、chapter_cover_tasks，图片生成从正文占位符改为结构化 intent -> asset -> markdown 回填。

3. 并发与一致性：为 story/chapter intent 增加 claim_token、claimed_at、attempt_count，采用数据库原子 claim 为主、Redis 锁为辅，避免重复生成、锁误删和 processing 卡死。

4. Memoir 读写路径：章节 canonical_markdown 成为正文真源，列表/详情接口补齐 markdown、cover_asset、word_count 等字段，PDF 与 asset 解析链路同步升级。

5. Memory / Retrieval：扩展 transcript ingest、chunking、evidence 检索与 story 聚合基础设施，为后续 story-first RAG 与多 agent 编排提供底座。

6. App 端体验：章节页继续走 MarkdownRenderer 阅读链，同时吸收 fix3-19 的跨平台 UI glitch 修复；更新对话页、首页、文案资源与章节列表映射逻辑。

7. 测试与文档：补充 asset resolver、story image task、章节封面派发、markdown 映射等回归测试，并加入图片占位符退役设计文档。

											
										
										
											2026-03-20 10:30:07 +08:00
+								            # 正文真源：canonical_markdown（与 API / 前端一致）
 								            markdown = _chapter_markdown(chapter)
 								            _, images_list = sections_to_content_and_images(chapter)
 								            if not markdown:
 								                markdown = getattr(chapter, "content", "") or ""
 								            if not images_list:
 								                images_list = list(getattr(chapter, "images", None) or [])
 								            prepared_images = _prepare_pdf_image_assets(images_list)
 								            blocks: list[dict]
 								            if asset_url_map and collect_asset_ids_from_markdown(markdown):
 								                blocks = split_markdown_by_asset_refs(
 								                    markdown,
 								                    lambda aid: asset_url_map.get(aid) if asset_url_map else None,
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								                )
-												重构回忆录为 story-first / markdown-first 架构并整合图片意图与前端 UI 修复

本次 squash merge 将 codex-story-first-image-intent 的整体改动合入 development，核心内容包括：

1. 后端数据与迁移：新增 stories、story_versions、story_image_intents、chapter_cover_intents、assets 等模型与 Alembic 迁移，建立 story-first、markdown-first、asset-first 的主数据链路。

2. 生成与任务链：引入 StoryBuilderOrchestrator、ChapterComposerOrchestrator、story_image_tasks、chapter_cover_tasks，图片生成从正文占位符改为结构化 intent -> asset -> markdown 回填。

3. 并发与一致性：为 story/chapter intent 增加 claim_token、claimed_at、attempt_count，采用数据库原子 claim 为主、Redis 锁为辅，避免重复生成、锁误删和 processing 卡死。

4. Memoir 读写路径：章节 canonical_markdown 成为正文真源，列表/详情接口补齐 markdown、cover_asset、word_count 等字段，PDF 与 asset 解析链路同步升级。

5. Memory / Retrieval：扩展 transcript ingest、chunking、evidence 检索与 story 聚合基础设施，为后续 story-first RAG 与多 agent 编排提供底座。

6. App 端体验：章节页继续走 MarkdownRenderer 阅读链，同时吸收 fix3-19 的跨平台 UI glitch 修复；更新对话页、首页、文案资源与章节列表映射逻辑。

7. 测试与文档：补充 asset resolver、story image task、章节封面派发、markdown 映射等回归测试，并加入图片占位符退役设计文档。

											
										
										
											2026-03-20 10:30:07 +08:00
+								                for b in blocks:
 								                    if b.get("type") == "text":
 								                        b["value"] = strip_legacy_image_placeholders(
 								                            b.get("value") or ""
 								                        )
 								            else:
 								                blocks = split_content_blocks(markdown, prepared_images)
-												feat(api): embed memoir chapter images in PDF export and strip placeholders

Made-with: Cursor

											
										
										
											2026-03-10 16:06:09 +08:00
+								            for block in blocks:
 								                if block["type"] == "text":
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								                    paragraphs = block["value"].split("\n\n")
-												feat(api): embed memoir chapter images in PDF export and strip placeholders

Made-with: Cursor

											
										
										
											2026-03-10 16:06:09 +08:00
+								                    for para in paragraphs:
 								                        if para.strip():
 								                            story.append(Paragraph(para.strip(), normal_style))
 								                            story.append(Spacer(1, 0.1 * inch))
 								                elif block["type"] == "image":
 								                    image_bytes = await self._fetch_image_bytes(block["url"])
 								                    if image_bytes:
 								                        try:
-												fix: fix various issues before merging

											
										
										
											2026-03-11 11:27:32 +08:00
+								                            width, height = _fit_image_size(
 								                                image_bytes,
 								                                max_width=5 * inch,
 								                                max_height=3.75 * inch,
 								                            )
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								                            img = ReportLabImage(
 								                                BytesIO(image_bytes), width=width, height=height
 								                            )
-												feat(api): embed memoir chapter images in PDF export and strip placeholders

Made-with: Cursor

											
										
										
											2026-03-10 16:06:09 +08:00
+								                            story.append(img)
 								                            story.append(Spacer(1, 0.2 * inch))
 								                        except Exception as exc:
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								                            logger.warning("PDF 图片嵌入失败: %s", exc)
-												添加API服务模块

											
										
										
											2026-01-07 11:56:46 +08:00
+								            story.append(PageBreak())
 								        doc.build(story)
 								        buffer.seek(0)
 								        return buffer.read()
 								pdf_service = PDFService()