feat(api): embed memoir chapter images in PDF export and strip placeholders

Made-with: Cursor
This commit is contained in:
Kevin
2026-03-10 16:06:09 +08:00
parent 879466fde1
commit f5afeb39ef
2 changed files with 124 additions and 47 deletions

View File

@@ -1,57 +1,81 @@
"""
PDF 生成服务
"""
import logging
import re
from typing import List
from reportlab.lib.pagesizes import letter, A4
import httpx
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak, Image as ReportLabImage
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.pdfbase.cidfonts import UnicodeCIDFont
from io import BytesIO
import os
logger = logging.getLogger(__name__)
PLACEHOLDER_RE = re.compile(r"\{\{\{\{IMAGE:.*?\}\}\}\}|\{\{IMAGE:.*?\}\}", re.DOTALL)
def strip_image_placeholders(text: str) -> str:
return PLACEHOLDER_RE.sub("", text or "").strip()
def split_content_blocks(content: str, images: list[dict]) -> list[dict]:
blocks: list[dict] = []
remaining = content
for image in sorted(images or [], key=lambda item: item.get("index", 0)):
placeholder = image.get("placeholder")
if not placeholder or placeholder not in remaining:
continue
before, remaining = remaining.split(placeholder, 1)
cleaned_before = strip_image_placeholders(before)
if cleaned_before:
blocks.append({"type": "text", "value": cleaned_before})
if image.get("status") == "completed" and image.get("url"):
blocks.append({"type": "image", "url": image["url"]})
cleaned_remaining = strip_image_placeholders(remaining)
if cleaned_remaining:
blocks.append({"type": "text", "value": cleaned_remaining})
return blocks
class PDFService:
"""PDF 生成服务"""
def __init__(self):
# 尝试注册中文字体
try:
# 使用系统字体或 ReportLab 内置的中文字体
# 如果没有中文字体文件,使用 UnicodeCIDFont
pdfmetrics.registerFont(UnicodeCIDFont('STSong-Light'))
self.chinese_font = 'STSong-Light'
except Exception:
# 如果注册失败,使用默认字体(可能不支持中文)
self.chinese_font = 'Helvetica'
async def _fetch_image_bytes(self, url: str) -> bytes | None:
try:
async with httpx.AsyncClient(timeout=30) as client:
response = await client.get(url)
response.raise_for_status()
return response.content
except Exception as exc:
logger.warning(f"PDF 图片下载失败: url={url}, error={exc}")
return None
async def generate_pdf(self, book, chapters: List) -> bytes:
"""
生成 PDF
Args:
book: 回忆录对象
chapters: 章节列表
Returns:
PDF 字节数据
"""
buffer = BytesIO()
doc = SimpleDocTemplate(buffer, pagesize=A4)
# 创建样式
styles = getSampleStyleSheet()
title_style = ParagraphStyle(
'CustomTitle',
parent=styles['Heading1'],
fontSize=24,
spaceAfter=30,
alignment=1, # 居中
alignment=1,
fontName=self.chinese_font
)
heading_style = ParagraphStyle(
'CustomHeading',
parent=styles['Heading1'],
@@ -59,7 +83,7 @@ class PDFService:
spaceAfter=12,
fontName=self.chinese_font
)
normal_style = ParagraphStyle(
'CustomNormal',
parent=styles['Normal'],
@@ -67,41 +91,48 @@ class PDFService:
leading=18,
fontName=self.chinese_font
)
# 构建内容
story = []
# 封面
story.append(Paragraph(book.title, title_style))
story.append(Spacer(1, 0.5*inch))
story.append(Spacer(1, 0.5 * inch))
story.append(PageBreak())
# 目录
story.append(Paragraph("目录", heading_style))
story.append(Spacer(1, 0.2*inch))
story.append(Spacer(1, 0.2 * inch))
for i, chapter in enumerate(chapters, 1):
story.append(Paragraph(f"{i}. {chapter.title}", normal_style))
story.append(PageBreak())
# 章节内容
for chapter in chapters:
story.append(Paragraph(chapter.title, heading_style))
story.append(Spacer(1, 0.2*inch))
# 分段处理内容
paragraphs = chapter.content.split('\n\n')
for para in paragraphs:
if para.strip():
story.append(Paragraph(para.strip(), normal_style))
story.append(Spacer(1, 0.1*inch))
story.append(Spacer(1, 0.2 * inch))
images = getattr(chapter, "images", None) or []
blocks = split_content_blocks(chapter.content, images)
for block in blocks:
if block["type"] == "text":
paragraphs = block["value"].split('\n\n')
for para in paragraphs:
if para.strip():
story.append(Paragraph(para.strip(), normal_style))
story.append(Spacer(1, 0.1 * inch))
elif block["type"] == "image":
image_bytes = await self._fetch_image_bytes(block["url"])
if image_bytes:
try:
img = ReportLabImage(BytesIO(image_bytes), width=5 * inch, height=3.75 * inch)
story.append(img)
story.append(Spacer(1, 0.2 * inch))
except Exception as exc:
logger.warning(f"PDF 图片嵌入失败: {exc}")
story.append(PageBreak())
# 生成 PDF
doc.build(story)
buffer.seek(0)
return buffer.read()
# 全局实例
pdf_service = PDFService()

View File

@@ -0,0 +1,46 @@
import unittest
from unittest.mock import AsyncMock, patch, MagicMock
from api.services.pdf_service import PDFService
class PDFServiceImagesTest(unittest.IsolatedAsyncioTestCase):
@patch("api.services.pdf_service.httpx.AsyncClient")
async def test_generate_pdf_embeds_completed_images_and_removes_placeholders(self, async_client_cls):
png_bytes = (
b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01"
b"\x08\x02\x00\x00\x00\x90wS\xde\x00\x00\x00\x0cIDATx\x9cc```\x00\x00"
b"\x00\x04\x00\x01\xf6\x178U\x00\x00\x00\x00IEND\xaeB`\x82"
)
mock_response = MagicMock()
mock_response.content = png_bytes
mock_response.raise_for_status = MagicMock()
mock_client = AsyncMock()
mock_client.get.return_value = mock_response
async_client_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client)
async_client_cls.return_value.__aexit__ = AsyncMock(return_value=False)
service = PDFService()
book = type("BookStub", (), {"title": "我的回忆录"})()
chapter = type(
"ChapterStub",
(),
{
"title": "童年的夏天",
"content": "那条路我一直记得。\n\n{{{{IMAGE:南方小镇的青石板路}}}}\n\n奶奶常坐在那里。",
"images": [
{
"index": 0,
"placeholder": "{{{{IMAGE:南方小镇的青石板路}}}}",
"url": "https://cos.example.com/0.png",
"status": "completed",
}
],
},
)()
pdf_bytes = await service.generate_pdf(book, [chapter])
self.assertGreater(len(pdf_bytes), 100)
self.assertNotIn(b"IMAGE:", pdf_bytes)