api/services/pdf_service.py

"""
PDF 生成服务
"""
import logging
import re
from typing import List

import httpx
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak, Image as ReportLabImage
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.cidfonts import UnicodeCIDFont
from io import BytesIO
from services.memoir_images.storage import TencentCosStorageService, resolve_image_storage_key

logger = logging.getLogger(__name__)

PLACEHOLDER_RE = re.compile(r"\{\{\{\{IMAGE:.*?\}\}\}\}|\{\{IMAGE:.*?\}\}", re.DOTALL)


def strip_image_placeholders(text: str) -> str:
    return PLACEHOLDER_RE.sub("", text or "").strip()


def split_content_blocks(content: str, images: list[dict]) -> list[dict]:
    blocks: list[dict] = []
    remaining = content
    for image in sorted(images or [], key=lambda item: item.get("index", 0)):
        placeholder = image.get("placeholder")
        if not placeholder or placeholder not in remaining:
            continue
        before, remaining = remaining.split(placeholder, 1)
        cleaned_before = strip_image_placeholders(before)
        if cleaned_before:
            blocks.append({"type": "text", "value": cleaned_before})
        if image.get("status") == "completed" and image.get("url"):
            blocks.append({"type": "image", "url": image["url"]})
    cleaned_remaining = strip_image_placeholders(remaining)
    if cleaned_remaining:
        blocks.append({"type": "text", "value": cleaned_remaining})
    return blocks


def _prepare_pdf_image_assets(images: list[dict]) -> list[dict]:
    storage = TencentCosStorageService.from_env()
    prepared_assets: list[dict] = []

    for item in images or []:
        asset = dict(item)
        storage_key = resolve_image_storage_key(asset)
        if asset.get("status") == "completed" and storage_key:
            try:
                asset["url"] = storage.get_download_url(storage_key)
            except Exception as exc:
                logger.warning(f"PDF 图片签名失败: key={storage_key}, error={exc}")
        prepared_assets.append(asset)

    return prepared_assets


class PDFService:
    """PDF 生成服务"""

    def __init__(self):
        try:
            pdfmetrics.registerFont(UnicodeCIDFont('STSong-Light'))
            self.chinese_font = 'STSong-Light'
        except Exception:
            self.chinese_font = 'Helvetica'

    async def _fetch_image_bytes(self, url: str) -> bytes | None:
        try:
            async with httpx.AsyncClient(timeout=30) as client:
                response = await client.get(url)
                response.raise_for_status()
                return response.content
        except Exception as exc:
            logger.warning(f"PDF 图片下载失败: url={url}, error={exc}")
            return None

    async def generate_pdf(self, book, chapters: List) -> bytes:
        buffer = BytesIO()
        doc = SimpleDocTemplate(buffer, pagesize=A4)

        styles = getSampleStyleSheet()
        title_style = ParagraphStyle(
            'CustomTitle',
            parent=styles['Heading1'],
            fontSize=24,
            spaceAfter=30,
            alignment=1,
            fontName=self.chinese_font
        )

        heading_style = ParagraphStyle(
            'CustomHeading',
            parent=styles['Heading1'],
            fontSize=18,
            spaceAfter=12,
            fontName=self.chinese_font
        )

        normal_style = ParagraphStyle(
            'CustomNormal',
            parent=styles['Normal'],
            fontSize=12,
            leading=18,
            fontName=self.chinese_font
        )

        story = []

        story.append(Paragraph(book.title, title_style))
        story.append(Spacer(1, 0.5 * inch))
        story.append(PageBreak())

        story.append(Paragraph("目录", heading_style))
        story.append(Spacer(1, 0.2 * inch))
        for i, chapter in enumerate(chapters, 1):
            story.append(Paragraph(f"{i}. {chapter.title}", normal_style))
        story.append(PageBreak())

        for chapter in chapters:
            story.append(Paragraph(chapter.title, heading_style))
            story.append(Spacer(1, 0.2 * inch))

            images = _prepare_pdf_image_assets(getattr(chapter, "images", None) or [])
            blocks = split_content_blocks(chapter.content, images)

            for block in blocks:
                if block["type"] == "text":
                    paragraphs = block["value"].split('\n\n')
                    for para in paragraphs:
                        if para.strip():
                            story.append(Paragraph(para.strip(), normal_style))
                            story.append(Spacer(1, 0.1 * inch))
                elif block["type"] == "image":
                    image_bytes = await self._fetch_image_bytes(block["url"])
                    if image_bytes:
                        try:
                            img = ReportLabImage(BytesIO(image_bytes), width=5 * inch, height=3.75 * inch)
                            story.append(img)
                            story.append(Spacer(1, 0.2 * inch))
                        except Exception as exc:
                            logger.warning(f"PDF 图片嵌入失败: {exc}")

            story.append(PageBreak())

        doc.build(story)
        buffer.seek(0)
        return buffer.read()


pdf_service = PDFService()
添加API服务模块 2026-01-07 11:56:46 +08:00			`"""`
			`PDF 生成服务`
			`"""`
feat(api): embed memoir chapter images in PDF export and strip placeholders Made-with: Cursor 2026-03-10 16:06:09 +08:00			`import logging`
			`import re`
添加API服务模块 2026-01-07 11:56:46 +08:00			`from typing import List`
feat(api): embed memoir chapter images in PDF export and strip placeholders Made-with: Cursor 2026-03-10 16:06:09 +08:00
			`import httpx`
			`from reportlab.lib.pagesizes import A4`
添加API服务模块 2026-01-07 11:56:46 +08:00			`from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle`
			`from reportlab.lib.units import inch`
feat(api): embed memoir chapter images in PDF export and strip placeholders Made-with: Cursor 2026-03-10 16:06:09 +08:00			`from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak, Image as ReportLabImage`
添加API服务模块 2026-01-07 11:56:46 +08:00			`from reportlab.pdfbase import pdfmetrics`
			`from reportlab.pdfbase.cidfonts import UnicodeCIDFont`
			`from io import BytesIO`
Fix memoir image delivery and Android rendering 2026-03-11 10:06:12 +08:00			`from services.memoir_images.storage import TencentCosStorageService, resolve_image_storage_key`
feat(api): embed memoir chapter images in PDF export and strip placeholders Made-with: Cursor 2026-03-10 16:06:09 +08:00
			`logger = logging.getLogger(__name__)`

			`PLACEHOLDER_RE = re.compile(r"\{\{\{\{IMAGE:.?\}\}\}\}\|\{\{IMAGE:.?\}\}", re.DOTALL)`


			`def strip_image_placeholders(text: str) -> str:`
			`return PLACEHOLDER_RE.sub("", text or "").strip()`


			`def split_content_blocks(content: str, images: list[dict]) -> list[dict]:`
			`blocks: list[dict] = []`
			`remaining = content`
			`for image in sorted(images or [], key=lambda item: item.get("index", 0)):`
			`placeholder = image.get("placeholder")`
			`if not placeholder or placeholder not in remaining:`
			`continue`
			`before, remaining = remaining.split(placeholder, 1)`
			`cleaned_before = strip_image_placeholders(before)`
			`if cleaned_before:`
			`blocks.append({"type": "text", "value": cleaned_before})`
			`if image.get("status") == "completed" and image.get("url"):`
			`blocks.append({"type": "image", "url": image["url"]})`
			`cleaned_remaining = strip_image_placeholders(remaining)`
			`if cleaned_remaining:`
			`blocks.append({"type": "text", "value": cleaned_remaining})`
			`return blocks`
添加API服务模块 2026-01-07 11:56:46 +08:00

Fix memoir image delivery and Android rendering 2026-03-11 10:06:12 +08:00			`def _prepare_pdf_image_assets(images: list[dict]) -> list[dict]:`
			`storage = TencentCosStorageService.from_env()`
			`prepared_assets: list[dict] = []`

			`for item in images or []:`
			`asset = dict(item)`
			`storage_key = resolve_image_storage_key(asset)`
			`if asset.get("status") == "completed" and storage_key:`
			`try:`
			`asset["url"] = storage.get_download_url(storage_key)`
			`except Exception as exc:`
			`logger.warning(f"PDF 图片签名失败: key={storage_key}, error={exc}")`
			`prepared_assets.append(asset)`

			`return prepared_assets`


添加API服务模块 2026-01-07 11:56:46 +08:00			`class PDFService:`
			`"""PDF 生成服务"""`
feat(api): embed memoir chapter images in PDF export and strip placeholders Made-with: Cursor 2026-03-10 16:06:09 +08:00
添加API服务模块 2026-01-07 11:56:46 +08:00			`def __init__(self):`
			`try:`
			`pdfmetrics.registerFont(UnicodeCIDFont('STSong-Light'))`
			`self.chinese_font = 'STSong-Light'`
			`except Exception:`
			`self.chinese_font = 'Helvetica'`
feat(api): embed memoir chapter images in PDF export and strip placeholders Made-with: Cursor 2026-03-10 16:06:09 +08:00
			`async def _fetch_image_bytes(self, url: str) -> bytes \| None:`
			`try:`
			`async with httpx.AsyncClient(timeout=30) as client:`
			`response = await client.get(url)`
			`response.raise_for_status()`
			`return response.content`
			`except Exception as exc:`
			`logger.warning(f"PDF 图片下载失败: url={url}, error={exc}")`
			`return None`

添加API服务模块 2026-01-07 11:56:46 +08:00			`async def generate_pdf(self, book, chapters: List) -> bytes:`
			`buffer = BytesIO()`
			`doc = SimpleDocTemplate(buffer, pagesize=A4)`
feat(api): embed memoir chapter images in PDF export and strip placeholders Made-with: Cursor 2026-03-10 16:06:09 +08:00
添加API服务模块 2026-01-07 11:56:46 +08:00			`styles = getSampleStyleSheet()`
			`title_style = ParagraphStyle(`
			`'CustomTitle',`
			`parent=styles['Heading1'],`
			`fontSize=24,`
			`spaceAfter=30,`
feat(api): embed memoir chapter images in PDF export and strip placeholders Made-with: Cursor 2026-03-10 16:06:09 +08:00			`alignment=1,`
添加API服务模块 2026-01-07 11:56:46 +08:00			`fontName=self.chinese_font`
			`)`
feat(api): embed memoir chapter images in PDF export and strip placeholders Made-with: Cursor 2026-03-10 16:06:09 +08:00
添加API服务模块 2026-01-07 11:56:46 +08:00			`heading_style = ParagraphStyle(`
			`'CustomHeading',`
			`parent=styles['Heading1'],`
			`fontSize=18,`
			`spaceAfter=12,`
			`fontName=self.chinese_font`
			`)`
feat(api): embed memoir chapter images in PDF export and strip placeholders Made-with: Cursor 2026-03-10 16:06:09 +08:00
添加API服务模块 2026-01-07 11:56:46 +08:00			`normal_style = ParagraphStyle(`
			`'CustomNormal',`
			`parent=styles['Normal'],`
			`fontSize=12,`
			`leading=18,`
			`fontName=self.chinese_font`
			`)`
feat(api): embed memoir chapter images in PDF export and strip placeholders Made-with: Cursor 2026-03-10 16:06:09 +08:00
添加API服务模块 2026-01-07 11:56:46 +08:00			`story = []`
feat(api): embed memoir chapter images in PDF export and strip placeholders Made-with: Cursor 2026-03-10 16:06:09 +08:00
添加API服务模块 2026-01-07 11:56:46 +08:00			`story.append(Paragraph(book.title, title_style))`
feat(api): embed memoir chapter images in PDF export and strip placeholders Made-with: Cursor 2026-03-10 16:06:09 +08:00			`story.append(Spacer(1, 0.5 * inch))`
添加API服务模块 2026-01-07 11:56:46 +08:00			`story.append(PageBreak())`
feat(api): embed memoir chapter images in PDF export and strip placeholders Made-with: Cursor 2026-03-10 16:06:09 +08:00
添加API服务模块 2026-01-07 11:56:46 +08:00			`story.append(Paragraph("目录", heading_style))`
feat(api): embed memoir chapter images in PDF export and strip placeholders Made-with: Cursor 2026-03-10 16:06:09 +08:00			`story.append(Spacer(1, 0.2 * inch))`
添加API服务模块 2026-01-07 11:56:46 +08:00			`for i, chapter in enumerate(chapters, 1):`
			`story.append(Paragraph(f"{i}. {chapter.title}", normal_style))`
			`story.append(PageBreak())`
feat(api): embed memoir chapter images in PDF export and strip placeholders Made-with: Cursor 2026-03-10 16:06:09 +08:00
添加API服务模块 2026-01-07 11:56:46 +08:00			`for chapter in chapters:`
			`story.append(Paragraph(chapter.title, heading_style))`
feat(api): embed memoir chapter images in PDF export and strip placeholders Made-with: Cursor 2026-03-10 16:06:09 +08:00			`story.append(Spacer(1, 0.2 * inch))`

Fix memoir image delivery and Android rendering 2026-03-11 10:06:12 +08:00			`images = _prepare_pdf_image_assets(getattr(chapter, "images", None) or [])`
feat(api): embed memoir chapter images in PDF export and strip placeholders Made-with: Cursor 2026-03-10 16:06:09 +08:00			`blocks = split_content_blocks(chapter.content, images)`

			`for block in blocks:`
			`if block["type"] == "text":`
			`paragraphs = block["value"].split('\n\n')`
			`for para in paragraphs:`
			`if para.strip():`
			`story.append(Paragraph(para.strip(), normal_style))`
			`story.append(Spacer(1, 0.1 * inch))`
			`elif block["type"] == "image":`
			`image_bytes = await self._fetch_image_bytes(block["url"])`
			`if image_bytes:`
			`try:`
			`img = ReportLabImage(BytesIO(image_bytes), width=5 * inch, height=3.75 * inch)`
			`story.append(img)`
			`story.append(Spacer(1, 0.2 * inch))`
			`except Exception as exc:`
			`logger.warning(f"PDF 图片嵌入失败: {exc}")`

添加API服务模块 2026-01-07 11:56:46 +08:00			`story.append(PageBreak())`
feat(api): embed memoir chapter images in PDF export and strip placeholders Made-with: Cursor 2026-03-10 16:06:09 +08:00
添加API服务模块 2026-01-07 11:56:46 +08:00			`doc.build(story)`
			`buffer.seek(0)`
			`return buffer.read()`


			`pdf_service = PDFService()`