feat(api): embed memoir chapter images in PDF export and strip placeholders
Made-with: Cursor
This commit is contained in:
@@ -1,57 +1,81 @@
|
||||
"""
|
||||
PDF 生成服务
|
||||
"""
|
||||
import logging
|
||||
import re
|
||||
from typing import List
|
||||
from reportlab.lib.pagesizes import letter, A4
|
||||
|
||||
import httpx
|
||||
from reportlab.lib.pagesizes import A4
|
||||
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
||||
from reportlab.lib.units import inch
|
||||
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
|
||||
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak, Image as ReportLabImage
|
||||
from reportlab.pdfbase import pdfmetrics
|
||||
from reportlab.pdfbase.ttfonts import TTFont
|
||||
from reportlab.pdfbase.cidfonts import UnicodeCIDFont
|
||||
from io import BytesIO
|
||||
import os
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
PLACEHOLDER_RE = re.compile(r"\{\{\{\{IMAGE:.*?\}\}\}\}|\{\{IMAGE:.*?\}\}", re.DOTALL)
|
||||
|
||||
|
||||
def strip_image_placeholders(text: str) -> str:
|
||||
return PLACEHOLDER_RE.sub("", text or "").strip()
|
||||
|
||||
|
||||
def split_content_blocks(content: str, images: list[dict]) -> list[dict]:
|
||||
blocks: list[dict] = []
|
||||
remaining = content
|
||||
for image in sorted(images or [], key=lambda item: item.get("index", 0)):
|
||||
placeholder = image.get("placeholder")
|
||||
if not placeholder or placeholder not in remaining:
|
||||
continue
|
||||
before, remaining = remaining.split(placeholder, 1)
|
||||
cleaned_before = strip_image_placeholders(before)
|
||||
if cleaned_before:
|
||||
blocks.append({"type": "text", "value": cleaned_before})
|
||||
if image.get("status") == "completed" and image.get("url"):
|
||||
blocks.append({"type": "image", "url": image["url"]})
|
||||
cleaned_remaining = strip_image_placeholders(remaining)
|
||||
if cleaned_remaining:
|
||||
blocks.append({"type": "text", "value": cleaned_remaining})
|
||||
return blocks
|
||||
|
||||
|
||||
class PDFService:
|
||||
"""PDF 生成服务"""
|
||||
|
||||
|
||||
def __init__(self):
|
||||
# 尝试注册中文字体
|
||||
try:
|
||||
# 使用系统字体或 ReportLab 内置的中文字体
|
||||
# 如果没有中文字体文件,使用 UnicodeCIDFont
|
||||
pdfmetrics.registerFont(UnicodeCIDFont('STSong-Light'))
|
||||
self.chinese_font = 'STSong-Light'
|
||||
except Exception:
|
||||
# 如果注册失败,使用默认字体(可能不支持中文)
|
||||
self.chinese_font = 'Helvetica'
|
||||
|
||||
|
||||
async def _fetch_image_bytes(self, url: str) -> bytes | None:
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=30) as client:
|
||||
response = await client.get(url)
|
||||
response.raise_for_status()
|
||||
return response.content
|
||||
except Exception as exc:
|
||||
logger.warning(f"PDF 图片下载失败: url={url}, error={exc}")
|
||||
return None
|
||||
|
||||
async def generate_pdf(self, book, chapters: List) -> bytes:
|
||||
"""
|
||||
生成 PDF
|
||||
|
||||
Args:
|
||||
book: 回忆录对象
|
||||
chapters: 章节列表
|
||||
|
||||
Returns:
|
||||
PDF 字节数据
|
||||
"""
|
||||
buffer = BytesIO()
|
||||
doc = SimpleDocTemplate(buffer, pagesize=A4)
|
||||
|
||||
# 创建样式
|
||||
|
||||
styles = getSampleStyleSheet()
|
||||
title_style = ParagraphStyle(
|
||||
'CustomTitle',
|
||||
parent=styles['Heading1'],
|
||||
fontSize=24,
|
||||
spaceAfter=30,
|
||||
alignment=1, # 居中
|
||||
alignment=1,
|
||||
fontName=self.chinese_font
|
||||
)
|
||||
|
||||
|
||||
heading_style = ParagraphStyle(
|
||||
'CustomHeading',
|
||||
parent=styles['Heading1'],
|
||||
@@ -59,7 +83,7 @@ class PDFService:
|
||||
spaceAfter=12,
|
||||
fontName=self.chinese_font
|
||||
)
|
||||
|
||||
|
||||
normal_style = ParagraphStyle(
|
||||
'CustomNormal',
|
||||
parent=styles['Normal'],
|
||||
@@ -67,41 +91,48 @@ class PDFService:
|
||||
leading=18,
|
||||
fontName=self.chinese_font
|
||||
)
|
||||
|
||||
# 构建内容
|
||||
|
||||
story = []
|
||||
|
||||
# 封面
|
||||
|
||||
story.append(Paragraph(book.title, title_style))
|
||||
story.append(Spacer(1, 0.5*inch))
|
||||
story.append(Spacer(1, 0.5 * inch))
|
||||
story.append(PageBreak())
|
||||
|
||||
# 目录
|
||||
|
||||
story.append(Paragraph("目录", heading_style))
|
||||
story.append(Spacer(1, 0.2*inch))
|
||||
story.append(Spacer(1, 0.2 * inch))
|
||||
for i, chapter in enumerate(chapters, 1):
|
||||
story.append(Paragraph(f"{i}. {chapter.title}", normal_style))
|
||||
story.append(PageBreak())
|
||||
|
||||
# 章节内容
|
||||
|
||||
for chapter in chapters:
|
||||
story.append(Paragraph(chapter.title, heading_style))
|
||||
story.append(Spacer(1, 0.2*inch))
|
||||
|
||||
# 分段处理内容
|
||||
paragraphs = chapter.content.split('\n\n')
|
||||
for para in paragraphs:
|
||||
if para.strip():
|
||||
story.append(Paragraph(para.strip(), normal_style))
|
||||
story.append(Spacer(1, 0.1*inch))
|
||||
|
||||
story.append(Spacer(1, 0.2 * inch))
|
||||
|
||||
images = getattr(chapter, "images", None) or []
|
||||
blocks = split_content_blocks(chapter.content, images)
|
||||
|
||||
for block in blocks:
|
||||
if block["type"] == "text":
|
||||
paragraphs = block["value"].split('\n\n')
|
||||
for para in paragraphs:
|
||||
if para.strip():
|
||||
story.append(Paragraph(para.strip(), normal_style))
|
||||
story.append(Spacer(1, 0.1 * inch))
|
||||
elif block["type"] == "image":
|
||||
image_bytes = await self._fetch_image_bytes(block["url"])
|
||||
if image_bytes:
|
||||
try:
|
||||
img = ReportLabImage(BytesIO(image_bytes), width=5 * inch, height=3.75 * inch)
|
||||
story.append(img)
|
||||
story.append(Spacer(1, 0.2 * inch))
|
||||
except Exception as exc:
|
||||
logger.warning(f"PDF 图片嵌入失败: {exc}")
|
||||
|
||||
story.append(PageBreak())
|
||||
|
||||
# 生成 PDF
|
||||
|
||||
doc.build(story)
|
||||
buffer.seek(0)
|
||||
return buffer.read()
|
||||
|
||||
|
||||
# 全局实例
|
||||
pdf_service = PDFService()
|
||||
|
||||
46
api/tests/test_pdf_service_images.py
Normal file
46
api/tests/test_pdf_service_images.py
Normal file
@@ -0,0 +1,46 @@
|
||||
import unittest
|
||||
from unittest.mock import AsyncMock, patch, MagicMock
|
||||
|
||||
from api.services.pdf_service import PDFService
|
||||
|
||||
|
||||
class PDFServiceImagesTest(unittest.IsolatedAsyncioTestCase):
|
||||
@patch("api.services.pdf_service.httpx.AsyncClient")
|
||||
async def test_generate_pdf_embeds_completed_images_and_removes_placeholders(self, async_client_cls):
|
||||
png_bytes = (
|
||||
b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01"
|
||||
b"\x08\x02\x00\x00\x00\x90wS\xde\x00\x00\x00\x0cIDATx\x9cc```\x00\x00"
|
||||
b"\x00\x04\x00\x01\xf6\x178U\x00\x00\x00\x00IEND\xaeB`\x82"
|
||||
)
|
||||
mock_response = MagicMock()
|
||||
mock_response.content = png_bytes
|
||||
mock_response.raise_for_status = MagicMock()
|
||||
|
||||
mock_client = AsyncMock()
|
||||
mock_client.get.return_value = mock_response
|
||||
async_client_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client)
|
||||
async_client_cls.return_value.__aexit__ = AsyncMock(return_value=False)
|
||||
|
||||
service = PDFService()
|
||||
book = type("BookStub", (), {"title": "我的回忆录"})()
|
||||
chapter = type(
|
||||
"ChapterStub",
|
||||
(),
|
||||
{
|
||||
"title": "童年的夏天",
|
||||
"content": "那条路我一直记得。\n\n{{{{IMAGE:南方小镇的青石板路}}}}\n\n奶奶常坐在那里。",
|
||||
"images": [
|
||||
{
|
||||
"index": 0,
|
||||
"placeholder": "{{{{IMAGE:南方小镇的青石板路}}}}",
|
||||
"url": "https://cos.example.com/0.png",
|
||||
"status": "completed",
|
||||
}
|
||||
],
|
||||
},
|
||||
)()
|
||||
|
||||
pdf_bytes = await service.generate_pdf(book, [chapter])
|
||||
|
||||
self.assertGreater(len(pdf_bytes), 100)
|
||||
self.assertNotIn(b"IMAGE:", pdf_bytes)
|
||||
Reference in New Issue
Block a user