feat(api): embed memoir chapter images in PDF export and strip placeholders

Made-with: Cursor
This commit is contained in:
Kevin
2026-03-10 16:06:09 +08:00
parent 879466fde1
commit f5afeb39ef
2 changed files with 124 additions and 47 deletions

View File

@@ -1,57 +1,81 @@
"""
PDF 生成服务
"""
import logging
import re
from typing import List
from reportlab.lib.pagesizes import letter, A4
import httpx
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak, Image as ReportLabImage
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.pdfbase.cidfonts import UnicodeCIDFont
from io import BytesIO
import os
logger = logging.getLogger(__name__)
PLACEHOLDER_RE = re.compile(r"\{\{\{\{IMAGE:.*?\}\}\}\}|\{\{IMAGE:.*?\}\}", re.DOTALL)
def strip_image_placeholders(text: str) -> str:
return PLACEHOLDER_RE.sub("", text or "").strip()
def split_content_blocks(content: str, images: list[dict]) -> list[dict]:
blocks: list[dict] = []
remaining = content
for image in sorted(images or [], key=lambda item: item.get("index", 0)):
placeholder = image.get("placeholder")
if not placeholder or placeholder not in remaining:
continue
before, remaining = remaining.split(placeholder, 1)
cleaned_before = strip_image_placeholders(before)
if cleaned_before:
blocks.append({"type": "text", "value": cleaned_before})
if image.get("status") == "completed" and image.get("url"):
blocks.append({"type": "image", "url": image["url"]})
cleaned_remaining = strip_image_placeholders(remaining)
if cleaned_remaining:
blocks.append({"type": "text", "value": cleaned_remaining})
return blocks
class PDFService:
"""PDF 生成服务"""
def __init__(self):
# 尝试注册中文字体
try:
# 使用系统字体或 ReportLab 内置的中文字体
# 如果没有中文字体文件,使用 UnicodeCIDFont
pdfmetrics.registerFont(UnicodeCIDFont('STSong-Light'))
self.chinese_font = 'STSong-Light'
except Exception:
# 如果注册失败,使用默认字体(可能不支持中文)
self.chinese_font = 'Helvetica'
async def _fetch_image_bytes(self, url: str) -> bytes | None:
try:
async with httpx.AsyncClient(timeout=30) as client:
response = await client.get(url)
response.raise_for_status()
return response.content
except Exception as exc:
logger.warning(f"PDF 图片下载失败: url={url}, error={exc}")
return None
async def generate_pdf(self, book, chapters: List) -> bytes:
"""
生成 PDF
Args:
book: 回忆录对象
chapters: 章节列表
Returns:
PDF 字节数据
"""
buffer = BytesIO()
doc = SimpleDocTemplate(buffer, pagesize=A4)
# 创建样式
styles = getSampleStyleSheet()
title_style = ParagraphStyle(
'CustomTitle',
parent=styles['Heading1'],
fontSize=24,
spaceAfter=30,
alignment=1, # 居中
alignment=1,
fontName=self.chinese_font
)
heading_style = ParagraphStyle(
'CustomHeading',
parent=styles['Heading1'],
@@ -59,7 +83,7 @@ class PDFService:
spaceAfter=12,
fontName=self.chinese_font
)
normal_style = ParagraphStyle(
'CustomNormal',
parent=styles['Normal'],
@@ -67,41 +91,48 @@ class PDFService:
leading=18,
fontName=self.chinese_font
)
# 构建内容
story = []
# 封面
story.append(Paragraph(book.title, title_style))
story.append(Spacer(1, 0.5*inch))
story.append(Spacer(1, 0.5 * inch))
story.append(PageBreak())
# 目录
story.append(Paragraph("目录", heading_style))
story.append(Spacer(1, 0.2*inch))
story.append(Spacer(1, 0.2 * inch))
for i, chapter in enumerate(chapters, 1):
story.append(Paragraph(f"{i}. {chapter.title}", normal_style))
story.append(PageBreak())
# 章节内容
for chapter in chapters:
story.append(Paragraph(chapter.title, heading_style))
story.append(Spacer(1, 0.2*inch))
# 分段处理内容
paragraphs = chapter.content.split('\n\n')
for para in paragraphs:
if para.strip():
story.append(Paragraph(para.strip(), normal_style))
story.append(Spacer(1, 0.1*inch))
story.append(Spacer(1, 0.2 * inch))
images = getattr(chapter, "images", None) or []
blocks = split_content_blocks(chapter.content, images)
for block in blocks:
if block["type"] == "text":
paragraphs = block["value"].split('\n\n')
for para in paragraphs:
if para.strip():
story.append(Paragraph(para.strip(), normal_style))
story.append(Spacer(1, 0.1 * inch))
elif block["type"] == "image":
image_bytes = await self._fetch_image_bytes(block["url"])
if image_bytes:
try:
img = ReportLabImage(BytesIO(image_bytes), width=5 * inch, height=3.75 * inch)
story.append(img)
story.append(Spacer(1, 0.2 * inch))
except Exception as exc:
logger.warning(f"PDF 图片嵌入失败: {exc}")
story.append(PageBreak())
# 生成 PDF
doc.build(story)
buffer.seek(0)
return buffer.read()
# 全局实例
pdf_service = PDFService()