life-echo/api/app/features/memoir/pdf_service.py

"""
PDF 生成服务（从 services 迁入 memoir feature）
"""

from io import BytesIO
from typing import List, Optional

import httpx
from PIL import Image
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
from reportlab.lib.units import inch
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.cidfonts import UnicodeCIDFont
from reportlab.platypus import (
    Image as ReportLabImage,
)
from reportlab.platypus import (
    PageBreak,
    Paragraph,
    SimpleDocTemplate,
    Spacer,
)

from app.core.logging import get_logger
from app.features.memoir.asset_resolver import (
    collect_asset_ids_from_markdown,
    split_markdown_by_asset_refs,
    strip_image_placeholders,
)
from app.features.memoir.chapter_markdown_compose import (
    materialize_chapter_pdf_markdown_from_loaded_chapter,
)
from app.features.memoir.helpers import _chapter_markdown

logger = get_logger(__name__)


def _chapter_markdown_for_pdf(chapter) -> str:
    """有 story 编排时 PDF 使用「## 故事名 + 正文」物化；否则沿用章节 canonical。"""
    links = getattr(chapter, "story_links", None) or []
    if links and any(getattr(link, "story", None) for link in links):
        return materialize_chapter_pdf_markdown_from_loaded_chapter(chapter)
    return _chapter_markdown(chapter)


def _fit_image_size(
    image_bytes: bytes, max_width: float, max_height: float
) -> tuple[float, float]:
    with Image.open(BytesIO(image_bytes)) as image:
        width, height = image.size
    if width <= 0 or height <= 0:
        return max_width, max_height
    scale = min(max_width / width, max_height / height)
    return width * scale, height * scale


class PDFService:
    def __init__(self):
        try:
            pdfmetrics.registerFont(UnicodeCIDFont("STSong-Light"))
            self.chinese_font = "STSong-Light"
        except Exception:
            self.chinese_font = "Helvetica"

    async def _fetch_image_bytes(self, url: str) -> bytes | None:
        try:
            async with httpx.AsyncClient(timeout=30) as client:
                response = await client.get(url)
                response.raise_for_status()
                return response.content
        except Exception as exc:
            logger.warning("PDF 图片下载失败: url={}, error={}", url, exc)
            return None

    async def generate_pdf(
        self,
        book,
        chapters: List,
        asset_url_map: Optional[dict[str, str]] = None,
    ) -> bytes:
        buffer = BytesIO()
        doc = SimpleDocTemplate(buffer, pagesize=A4)
        styles = getSampleStyleSheet()
        title_style = ParagraphStyle(
            "CustomTitle",
            parent=styles["Heading1"],
            fontSize=24,
            spaceAfter=30,
            alignment=1,
            fontName=self.chinese_font,
        )
        heading_style = ParagraphStyle(
            "CustomHeading",
            parent=styles["Heading1"],
            fontSize=18,
            spaceAfter=12,
            fontName=self.chinese_font,
        )
        normal_style = ParagraphStyle(
            "CustomNormal",
            parent=styles["Normal"],
            fontSize=12,
            leading=18,
            fontName=self.chinese_font,
        )
        story = []
        story.append(Paragraph(book.title, title_style))
        story.append(Spacer(1, 0.5 * inch))
        story.append(PageBreak())
        story.append(Paragraph("目录", heading_style))
        story.append(Spacer(1, 0.2 * inch))
        for i, chapter in enumerate(chapters, 1):
            story.append(Paragraph(f"{i}. {chapter.title}", normal_style))
        story.append(PageBreak())
        for chapter in chapters:
            story.append(Paragraph(chapter.title, heading_style))
            story.append(Spacer(1, 0.2 * inch))
            # 有 story_links 时按章节内故事注入 ## 标题（与物化章节正文不含故事标题区分）
            markdown = _chapter_markdown_for_pdf(chapter)
            blocks: list[dict]
            if asset_url_map and collect_asset_ids_from_markdown(markdown):
                blocks = split_markdown_by_asset_refs(
                    markdown,
                    lambda aid: asset_url_map.get(aid) if asset_url_map else None,
                )
                for b in blocks:
                    if b.get("type") == "text":
                        b["value"] = strip_image_placeholders(b.get("value") or "")
            else:
                cleaned_markdown = strip_image_placeholders(markdown or "")
                blocks = (
                    [{"type": "text", "value": cleaned_markdown}]
                    if cleaned_markdown
                    else []
                )
            for block in blocks:
                if block["type"] == "text":
                    paragraphs = block["value"].split("\n\n")
                    for para in paragraphs:
                        if para.strip():
                            story.append(Paragraph(para.strip(), normal_style))
                            story.append(Spacer(1, 0.1 * inch))
                elif block["type"] == "image":
                    image_bytes = await self._fetch_image_bytes(block["url"])
                    if image_bytes:
                        try:
                            width, height = _fit_image_size(
                                image_bytes,
                                max_width=5 * inch,
                                max_height=3.75 * inch,
                            )
                            img = ReportLabImage(
                                BytesIO(image_bytes), width=width, height=height
                            )
                            story.append(img)
                            story.append(Spacer(1, 0.2 * inch))
                        except Exception as exc:
                            logger.warning("PDF 图片嵌入失败: {}", exc)
            story.append(PageBreak())
        doc.build(story)
        buffer.seek(0)
        return buffer.read()


pdf_service = PDFService()