feat(api): embed memoir chapter images in PDF export and strip placeholders

Made-with: Cursor
2026-03-10 16:06:09 +08:00
parent 879466fde1
commit f5afeb39ef
2 changed files with 124 additions and 47 deletions
--- a/api/services/pdf_service.py
+++ b/api/services/pdf_service.py
@@ -1,57 +1,81 @@
 """
 PDF 生成服务
 """
+import logging
+import re
 from typing import List
-from reportlab.lib.pagesizes import letter, A4
+
+import httpx
+from reportlab.lib.pagesizes import A4
 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
 from reportlab.lib.units import inch
-from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
+from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak, Image as ReportLabImage
 from reportlab.pdfbase import pdfmetrics
-from reportlab.pdfbase.ttfonts import TTFont
 from reportlab.pdfbase.cidfonts import UnicodeCIDFont
 from io import BytesIO
-import os
+
+logger = logging.getLogger(__name__)
+
+PLACEHOLDER_RE = re.compile(r"\{\{\{\{IMAGE:.*?\}\}\}\}|\{\{IMAGE:.*?\}\}", re.DOTALL)
+
+
+def strip_image_placeholders(text: str) -> str:
+    return PLACEHOLDER_RE.sub("", text or "").strip()
+
+
+def split_content_blocks(content: str, images: list[dict]) -> list[dict]:
+    blocks: list[dict] = []
+    remaining = content
+    for image in sorted(images or [], key=lambda item: item.get("index", 0)):
+        placeholder = image.get("placeholder")
+        if not placeholder or placeholder not in remaining:
+            continue
+        before, remaining = remaining.split(placeholder, 1)
+        cleaned_before = strip_image_placeholders(before)
+        if cleaned_before:
+            blocks.append({"type": "text", "value": cleaned_before})
+        if image.get("status") == "completed" and image.get("url"):
+            blocks.append({"type": "image", "url": image["url"]})
+    cleaned_remaining = strip_image_placeholders(remaining)
+    if cleaned_remaining:
+        blocks.append({"type": "text", "value": cleaned_remaining})
+    return blocks


 class PDFService:
    """PDF 生成服务"""
-    
+
    def __init__(self):
-        # 尝试注册中文字体
        try:
-            # 使用系统字体或 ReportLab 内置的中文字体
-            # 如果没有中文字体文件，使用 UnicodeCIDFont
            pdfmetrics.registerFont(UnicodeCIDFont('STSong-Light'))
            self.chinese_font = 'STSong-Light'
        except Exception:
-            # 如果注册失败，使用默认字体（可能不支持中文）
            self.chinese_font = 'Helvetica'
-    
+
+    async def _fetch_image_bytes(self, url: str) -> bytes | None:
+        try:
+            async with httpx.AsyncClient(timeout=30) as client:
+                response = await client.get(url)
+                response.raise_for_status()
+                return response.content
+        except Exception as exc:
+            logger.warning(f"PDF 图片下载失败: url={url}, error={exc}")
+            return None
+
    async def generate_pdf(self, book, chapters: List) -> bytes:
-        """
-        生成 PDF
-        
-        Args:
-            book: 回忆录对象
-            chapters: 章节列表
-        
-        Returns:
-            PDF 字节数据
-        """
        buffer = BytesIO()
        doc = SimpleDocTemplate(buffer, pagesize=A4)
-        
-        # 创建样式
+
        styles = getSampleStyleSheet()
        title_style = ParagraphStyle(
            'CustomTitle',
            parent=styles['Heading1'],
            fontSize=24,
            spaceAfter=30,
-            alignment=1,  # 居中
+            alignment=1,
            fontName=self.chinese_font
        )
-        
+
        heading_style = ParagraphStyle(
            'CustomHeading',
            parent=styles['Heading1'],
@@ -59,7 +83,7 @@ class PDFService:
            spaceAfter=12,
            fontName=self.chinese_font
        )
-        
+
        normal_style = ParagraphStyle(
            'CustomNormal',
            parent=styles['Normal'],
@@ -67,41 +91,48 @@ class PDFService:
            leading=18,
            fontName=self.chinese_font
        )
-        
-        # 构建内容
+
        story = []
-        
-        # 封面
+
        story.append(Paragraph(book.title, title_style))
-        story.append(Spacer(1, 0.5*inch))
+        story.append(Spacer(1, 0.5 * inch))
        story.append(PageBreak())
-        
-        # 目录
+
        story.append(Paragraph("目录", heading_style))
-        story.append(Spacer(1, 0.2*inch))
+        story.append(Spacer(1, 0.2 * inch))
        for i, chapter in enumerate(chapters, 1):
            story.append(Paragraph(f"{i}. {chapter.title}", normal_style))
        story.append(PageBreak())
-        
-        # 章节内容
+
        for chapter in chapters:
            story.append(Paragraph(chapter.title, heading_style))
-            story.append(Spacer(1, 0.2*inch))
-            
-            # 分段处理内容
-            paragraphs = chapter.content.split('\n\n')
-            for para in paragraphs:
-                if para.strip():
-                    story.append(Paragraph(para.strip(), normal_style))
-                    story.append(Spacer(1, 0.1*inch))
-            
+            story.append(Spacer(1, 0.2 * inch))
+
+            images = getattr(chapter, "images", None) or []
+            blocks = split_content_blocks(chapter.content, images)
+
+            for block in blocks:
+                if block["type"] == "text":
+                    paragraphs = block["value"].split('\n\n')
+                    for para in paragraphs:
+                        if para.strip():
+                            story.append(Paragraph(para.strip(), normal_style))
+                            story.append(Spacer(1, 0.1 * inch))
+                elif block["type"] == "image":
+                    image_bytes = await self._fetch_image_bytes(block["url"])
+                    if image_bytes:
+                        try:
+                            img = ReportLabImage(BytesIO(image_bytes), width=5 * inch, height=3.75 * inch)
+                            story.append(img)
+                            story.append(Spacer(1, 0.2 * inch))
+                        except Exception as exc:
+                            logger.warning(f"PDF 图片嵌入失败: {exc}")
+
            story.append(PageBreak())
-        
-        # 生成 PDF
+
        doc.build(story)
        buffer.seek(0)
        return buffer.read()


-# 全局实例
 pdf_service = PDFService()