Files
life-echo/api/app/features/memoir/pdf_service.py
Kevin 8af37e5e8e 修复:CI 部署环境与 ref 错配、迁移碎片化、图片意图 source_span、章节物化脏版式、会话历史与本地语音不一致
新增:TTS 上传 COS 与分片、章节 reading_segments 物化与快照、markdown 清洗、会话消息 repository、语音 store 重构与相关测试
2026-03-20 16:43:02 +08:00

232 lines
8.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
PDF 生成服务(从 services 迁入 memoir feature
"""
from app.core.logging import get_logger
from io import BytesIO
from typing import List, Optional
import httpx
from PIL import Image
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.platypus import (
Image as ReportLabImage,
PageBreak,
Paragraph,
SimpleDocTemplate,
Spacer,
)
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.cidfonts import UnicodeCIDFont
from app.features.memoir.asset_resolver import (
collect_asset_ids_from_markdown,
split_markdown_by_asset_refs,
strip_legacy_image_placeholders,
)
from app.features.memoir.chapter_markdown_compose import (
materialize_chapter_pdf_markdown_from_loaded_chapter,
)
from app.features.memoir.helpers import (
_chapter_markdown,
sections_to_content_and_images,
)
def _chapter_markdown_for_pdf(chapter) -> str:
"""有 story 编排时 PDF 使用「## 故事名 + 正文」物化;否则沿用章节 canonical。"""
links = getattr(chapter, "story_links", None) or []
if links and any(getattr(l, "story", None) for l in links):
return materialize_chapter_pdf_markdown_from_loaded_chapter(chapter)
return _chapter_markdown(chapter)
from app.features.memoir.memoir_images.parser import PLACEHOLDER_RE
from app.features.memoir.memoir_images.schema import (
IMAGE_STATUS_COMPLETED,
normalize_image_assets,
)
from app.features.memoir.memoir_images.storage import (
CosDownloadUrlError,
TencentCosStorageService,
mark_image_delivery_unavailable,
resolve_image_storage_key,
)
logger = get_logger(__name__)
def strip_image_placeholders(text: str) -> str:
return PLACEHOLDER_RE.sub("", text or "").strip()
def split_content_blocks(content: str, images: list[dict]) -> list[dict]:
blocks: list[dict] = []
remaining = content
for image in sorted(images or [], key=lambda item: item.get("index", 0)):
placeholder = image.get("placeholder")
if not placeholder or placeholder not in remaining:
continue
before, remaining = remaining.split(placeholder, 1)
cleaned_before = strip_image_placeholders(before)
if cleaned_before:
blocks.append({"type": "text", "value": cleaned_before})
if image.get("status") == IMAGE_STATUS_COMPLETED and image.get("url"):
blocks.append({"type": "image", "url": image["url"]})
cleaned_remaining = strip_image_placeholders(remaining)
if cleaned_remaining:
blocks.append({"type": "text", "value": cleaned_remaining})
return blocks
def _prepare_pdf_image_assets(images: list[dict]) -> list[dict]:
storage = TencentCosStorageService.from_env()
prepared_assets: list[dict] = []
for item in normalize_image_assets(images):
asset = dict(item)
storage_key = resolve_image_storage_key(asset)
if asset.get("status") == IMAGE_STATUS_COMPLETED and storage_key:
try:
asset["url"] = storage.get_download_url(storage_key)
except CosDownloadUrlError as exc:
logger.warning(
"PDF 图片签名失败: key=%s, retryable=%s, request_id=%s, error=%s",
storage_key,
exc.retryable,
exc.request_id,
exc,
)
asset = mark_image_delivery_unavailable(asset)
except Exception as exc:
logger.warning("PDF 图片签名失败: key=%s, error=%s", storage_key, exc)
asset = mark_image_delivery_unavailable(asset)
prepared_assets.append(asset)
return prepared_assets
def _fit_image_size(
image_bytes: bytes, max_width: float, max_height: float
) -> tuple[float, float]:
with Image.open(BytesIO(image_bytes)) as image:
width, height = image.size
if width <= 0 or height <= 0:
return max_width, max_height
scale = min(max_width / width, max_height / height)
return width * scale, height * scale
class PDFService:
def __init__(self):
try:
pdfmetrics.registerFont(UnicodeCIDFont("STSong-Light"))
self.chinese_font = "STSong-Light"
except Exception:
self.chinese_font = "Helvetica"
async def _fetch_image_bytes(self, url: str) -> bytes | None:
try:
async with httpx.AsyncClient(timeout=30) as client:
response = await client.get(url)
response.raise_for_status()
return response.content
except Exception as exc:
logger.warning("PDF 图片下载失败: url=%s, error=%s", url, exc)
return None
async def generate_pdf(
self,
book,
chapters: List,
asset_url_map: Optional[dict[str, str]] = None,
) -> bytes:
buffer = BytesIO()
doc = SimpleDocTemplate(buffer, pagesize=A4)
styles = getSampleStyleSheet()
title_style = ParagraphStyle(
"CustomTitle",
parent=styles["Heading1"],
fontSize=24,
spaceAfter=30,
alignment=1,
fontName=self.chinese_font,
)
heading_style = ParagraphStyle(
"CustomHeading",
parent=styles["Heading1"],
fontSize=18,
spaceAfter=12,
fontName=self.chinese_font,
)
normal_style = ParagraphStyle(
"CustomNormal",
parent=styles["Normal"],
fontSize=12,
leading=18,
fontName=self.chinese_font,
)
story = []
story.append(Paragraph(book.title, title_style))
story.append(Spacer(1, 0.5 * inch))
story.append(PageBreak())
story.append(Paragraph("目录", heading_style))
story.append(Spacer(1, 0.2 * inch))
for i, chapter in enumerate(chapters, 1):
story.append(Paragraph(f"{i}. {chapter.title}", normal_style))
story.append(PageBreak())
for chapter in chapters:
story.append(Paragraph(chapter.title, heading_style))
story.append(Spacer(1, 0.2 * inch))
# 有 story_links 时按章节内故事注入 ## 标题(与物化章节正文不含故事标题区分)
markdown = _chapter_markdown_for_pdf(chapter)
_, images_list = sections_to_content_and_images(chapter)
if not markdown:
markdown = getattr(chapter, "content", "") or ""
if not images_list:
images_list = list(getattr(chapter, "images", None) or [])
prepared_images = _prepare_pdf_image_assets(images_list)
blocks: list[dict]
if asset_url_map and collect_asset_ids_from_markdown(markdown):
blocks = split_markdown_by_asset_refs(
markdown,
lambda aid: asset_url_map.get(aid) if asset_url_map else None,
)
for b in blocks:
if b.get("type") == "text":
b["value"] = strip_legacy_image_placeholders(
b.get("value") or ""
)
else:
blocks = split_content_blocks(markdown, prepared_images)
for block in blocks:
if block["type"] == "text":
paragraphs = block["value"].split("\n\n")
for para in paragraphs:
if para.strip():
story.append(Paragraph(para.strip(), normal_style))
story.append(Spacer(1, 0.1 * inch))
elif block["type"] == "image":
image_bytes = await self._fetch_image_bytes(block["url"])
if image_bytes:
try:
width, height = _fit_image_size(
image_bytes,
max_width=5 * inch,
max_height=3.75 * inch,
)
img = ReportLabImage(
BytesIO(image_bytes), width=width, height=height
)
story.append(img)
story.append(Spacer(1, 0.2 * inch))
except Exception as exc:
logger.warning("PDF 图片嵌入失败: %s", exc)
story.append(PageBreak())
doc.build(story)
buffer.seek(0)
return buffer.read()
pdf_service = PDFService()