diff --git a/api/services/pdf_service.py b/api/services/pdf_service.py index 0f43167..bf7715d 100644 --- a/api/services/pdf_service.py +++ b/api/services/pdf_service.py @@ -1,57 +1,81 @@ """ PDF 生成服务 """ +import logging +import re from typing import List -from reportlab.lib.pagesizes import letter, A4 + +import httpx +from reportlab.lib.pagesizes import A4 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle from reportlab.lib.units import inch -from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak +from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak, Image as ReportLabImage from reportlab.pdfbase import pdfmetrics -from reportlab.pdfbase.ttfonts import TTFont from reportlab.pdfbase.cidfonts import UnicodeCIDFont from io import BytesIO -import os + +logger = logging.getLogger(__name__) + +PLACEHOLDER_RE = re.compile(r"\{\{\{\{IMAGE:.*?\}\}\}\}|\{\{IMAGE:.*?\}\}", re.DOTALL) + + +def strip_image_placeholders(text: str) -> str: + return PLACEHOLDER_RE.sub("", text or "").strip() + + +def split_content_blocks(content: str, images: list[dict]) -> list[dict]: + blocks: list[dict] = [] + remaining = content + for image in sorted(images or [], key=lambda item: item.get("index", 0)): + placeholder = image.get("placeholder") + if not placeholder or placeholder not in remaining: + continue + before, remaining = remaining.split(placeholder, 1) + cleaned_before = strip_image_placeholders(before) + if cleaned_before: + blocks.append({"type": "text", "value": cleaned_before}) + if image.get("status") == "completed" and image.get("url"): + blocks.append({"type": "image", "url": image["url"]}) + cleaned_remaining = strip_image_placeholders(remaining) + if cleaned_remaining: + blocks.append({"type": "text", "value": cleaned_remaining}) + return blocks class PDFService: """PDF 生成服务""" - + def __init__(self): - # 尝试注册中文字体 try: - # 使用系统字体或 ReportLab 内置的中文字体 - # 如果没有中文字体文件,使用 UnicodeCIDFont pdfmetrics.registerFont(UnicodeCIDFont('STSong-Light')) self.chinese_font = 'STSong-Light' except Exception: - # 如果注册失败,使用默认字体(可能不支持中文) self.chinese_font = 'Helvetica' - + + async def _fetch_image_bytes(self, url: str) -> bytes | None: + try: + async with httpx.AsyncClient(timeout=30) as client: + response = await client.get(url) + response.raise_for_status() + return response.content + except Exception as exc: + logger.warning(f"PDF 图片下载失败: url={url}, error={exc}") + return None + async def generate_pdf(self, book, chapters: List) -> bytes: - """ - 生成 PDF - - Args: - book: 回忆录对象 - chapters: 章节列表 - - Returns: - PDF 字节数据 - """ buffer = BytesIO() doc = SimpleDocTemplate(buffer, pagesize=A4) - - # 创建样式 + styles = getSampleStyleSheet() title_style = ParagraphStyle( 'CustomTitle', parent=styles['Heading1'], fontSize=24, spaceAfter=30, - alignment=1, # 居中 + alignment=1, fontName=self.chinese_font ) - + heading_style = ParagraphStyle( 'CustomHeading', parent=styles['Heading1'], @@ -59,7 +83,7 @@ class PDFService: spaceAfter=12, fontName=self.chinese_font ) - + normal_style = ParagraphStyle( 'CustomNormal', parent=styles['Normal'], @@ -67,41 +91,48 @@ class PDFService: leading=18, fontName=self.chinese_font ) - - # 构建内容 + story = [] - - # 封面 + story.append(Paragraph(book.title, title_style)) - story.append(Spacer(1, 0.5*inch)) + story.append(Spacer(1, 0.5 * inch)) story.append(PageBreak()) - - # 目录 + story.append(Paragraph("目录", heading_style)) - story.append(Spacer(1, 0.2*inch)) + story.append(Spacer(1, 0.2 * inch)) for i, chapter in enumerate(chapters, 1): story.append(Paragraph(f"{i}. {chapter.title}", normal_style)) story.append(PageBreak()) - - # 章节内容 + for chapter in chapters: story.append(Paragraph(chapter.title, heading_style)) - story.append(Spacer(1, 0.2*inch)) - - # 分段处理内容 - paragraphs = chapter.content.split('\n\n') - for para in paragraphs: - if para.strip(): - story.append(Paragraph(para.strip(), normal_style)) - story.append(Spacer(1, 0.1*inch)) - + story.append(Spacer(1, 0.2 * inch)) + + images = getattr(chapter, "images", None) or [] + blocks = split_content_blocks(chapter.content, images) + + for block in blocks: + if block["type"] == "text": + paragraphs = block["value"].split('\n\n') + for para in paragraphs: + if para.strip(): + story.append(Paragraph(para.strip(), normal_style)) + story.append(Spacer(1, 0.1 * inch)) + elif block["type"] == "image": + image_bytes = await self._fetch_image_bytes(block["url"]) + if image_bytes: + try: + img = ReportLabImage(BytesIO(image_bytes), width=5 * inch, height=3.75 * inch) + story.append(img) + story.append(Spacer(1, 0.2 * inch)) + except Exception as exc: + logger.warning(f"PDF 图片嵌入失败: {exc}") + story.append(PageBreak()) - - # 生成 PDF + doc.build(story) buffer.seek(0) return buffer.read() -# 全局实例 pdf_service = PDFService() diff --git a/api/tests/test_pdf_service_images.py b/api/tests/test_pdf_service_images.py new file mode 100644 index 0000000..7fd5914 --- /dev/null +++ b/api/tests/test_pdf_service_images.py @@ -0,0 +1,46 @@ +import unittest +from unittest.mock import AsyncMock, patch, MagicMock + +from api.services.pdf_service import PDFService + + +class PDFServiceImagesTest(unittest.IsolatedAsyncioTestCase): + @patch("api.services.pdf_service.httpx.AsyncClient") + async def test_generate_pdf_embeds_completed_images_and_removes_placeholders(self, async_client_cls): + png_bytes = ( + b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01" + b"\x08\x02\x00\x00\x00\x90wS\xde\x00\x00\x00\x0cIDATx\x9cc```\x00\x00" + b"\x00\x04\x00\x01\xf6\x178U\x00\x00\x00\x00IEND\xaeB`\x82" + ) + mock_response = MagicMock() + mock_response.content = png_bytes + mock_response.raise_for_status = MagicMock() + + mock_client = AsyncMock() + mock_client.get.return_value = mock_response + async_client_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client) + async_client_cls.return_value.__aexit__ = AsyncMock(return_value=False) + + service = PDFService() + book = type("BookStub", (), {"title": "我的回忆录"})() + chapter = type( + "ChapterStub", + (), + { + "title": "童年的夏天", + "content": "那条路我一直记得。\n\n{{{{IMAGE:南方小镇的青石板路}}}}\n\n奶奶常坐在那里。", + "images": [ + { + "index": 0, + "placeholder": "{{{{IMAGE:南方小镇的青石板路}}}}", + "url": "https://cos.example.com/0.png", + "status": "completed", + } + ], + }, + )() + + pdf_bytes = await service.generate_pdf(book, [chapter]) + + self.assertGreater(len(pdf_bytes), 100) + self.assertNotIn(b"IMAGE:", pdf_bytes)