""" PDF 生成服务 """ import logging import re from typing import List import httpx from reportlab.lib.pagesizes import A4 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle from reportlab.lib.units import inch from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak, Image as ReportLabImage from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase.cidfonts import UnicodeCIDFont from io import BytesIO logger = logging.getLogger(__name__) PLACEHOLDER_RE = re.compile(r"\{\{\{\{IMAGE:.*?\}\}\}\}|\{\{IMAGE:.*?\}\}", re.DOTALL) def strip_image_placeholders(text: str) -> str: return PLACEHOLDER_RE.sub("", text or "").strip() def split_content_blocks(content: str, images: list[dict]) -> list[dict]: blocks: list[dict] = [] remaining = content for image in sorted(images or [], key=lambda item: item.get("index", 0)): placeholder = image.get("placeholder") if not placeholder or placeholder not in remaining: continue before, remaining = remaining.split(placeholder, 1) cleaned_before = strip_image_placeholders(before) if cleaned_before: blocks.append({"type": "text", "value": cleaned_before}) if image.get("status") == "completed" and image.get("url"): blocks.append({"type": "image", "url": image["url"]}) cleaned_remaining = strip_image_placeholders(remaining) if cleaned_remaining: blocks.append({"type": "text", "value": cleaned_remaining}) return blocks class PDFService: """PDF 生成服务""" def __init__(self): try: pdfmetrics.registerFont(UnicodeCIDFont('STSong-Light')) self.chinese_font = 'STSong-Light' except Exception: self.chinese_font = 'Helvetica' async def _fetch_image_bytes(self, url: str) -> bytes | None: try: async with httpx.AsyncClient(timeout=30) as client: response = await client.get(url) response.raise_for_status() return response.content except Exception as exc: logger.warning(f"PDF 图片下载失败: url={url}, error={exc}") return None async def generate_pdf(self, book, chapters: List) -> bytes: buffer = BytesIO() doc = SimpleDocTemplate(buffer, pagesize=A4) styles = getSampleStyleSheet() title_style = ParagraphStyle( 'CustomTitle', parent=styles['Heading1'], fontSize=24, spaceAfter=30, alignment=1, fontName=self.chinese_font ) heading_style = ParagraphStyle( 'CustomHeading', parent=styles['Heading1'], fontSize=18, spaceAfter=12, fontName=self.chinese_font ) normal_style = ParagraphStyle( 'CustomNormal', parent=styles['Normal'], fontSize=12, leading=18, fontName=self.chinese_font ) story = [] story.append(Paragraph(book.title, title_style)) story.append(Spacer(1, 0.5 * inch)) story.append(PageBreak()) story.append(Paragraph("目录", heading_style)) story.append(Spacer(1, 0.2 * inch)) for i, chapter in enumerate(chapters, 1): story.append(Paragraph(f"{i}. {chapter.title}", normal_style)) story.append(PageBreak()) for chapter in chapters: story.append(Paragraph(chapter.title, heading_style)) story.append(Spacer(1, 0.2 * inch)) images = getattr(chapter, "images", None) or [] blocks = split_content_blocks(chapter.content, images) for block in blocks: if block["type"] == "text": paragraphs = block["value"].split('\n\n') for para in paragraphs: if para.strip(): story.append(Paragraph(para.strip(), normal_style)) story.append(Spacer(1, 0.1 * inch)) elif block["type"] == "image": image_bytes = await self._fetch_image_bytes(block["url"]) if image_bytes: try: img = ReportLabImage(BytesIO(image_bytes), width=5 * inch, height=3.75 * inch) story.append(img) story.append(Spacer(1, 0.2 * inch)) except Exception as exc: logger.warning(f"PDF 图片嵌入失败: {exc}") story.append(PageBreak()) doc.build(story) buffer.seek(0) return buffer.read() pdf_service = PDFService()