Files
life-echo/api/app/features/memoir/pdf_service.py
Kevin a3f61fcc0f feat(api+app): 对话阶段化、回忆录流水线与客户端会话体验
- DB: segments 用户输入文本(Alembic 0002)
- Chat: 阶段检测/阶段提示/回复限制,编排与访谈/画像 prompts 调整
- Memoir: 忠实度检查 agent,叙事与分类等链路更新
- Core: agent 日志、Alembic 启动、LangChain/日志/配置等
- Story: time_hints;Memory 检索与相关测试
- Expo: 助手头像、会话页与消息拆分、实时会话与文案/i18n
- Docs/scripts/tests: 迁移脚本、LLM JSON/记忆检索文档、新增单测
2026-03-26 12:13:36 +08:00

167 lines
6.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
PDF 生成服务(从 services 迁入 memoir feature
"""
from io import BytesIO
from typing import List, Optional
import httpx
from PIL import Image
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
from reportlab.lib.units import inch
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.cidfonts import UnicodeCIDFont
from reportlab.platypus import (
Image as ReportLabImage,
)
from reportlab.platypus import (
PageBreak,
Paragraph,
SimpleDocTemplate,
Spacer,
)
from app.core.logging import get_logger
from app.features.memoir.asset_resolver import (
collect_asset_ids_from_markdown,
split_markdown_by_asset_refs,
strip_image_placeholders,
)
from app.features.memoir.chapter_markdown_compose import (
materialize_chapter_pdf_markdown_from_loaded_chapter,
)
from app.features.memoir.helpers import _chapter_markdown
logger = get_logger(__name__)
def _chapter_markdown_for_pdf(chapter) -> str:
"""有 story 编排时 PDF 使用「## 故事名 + 正文」物化;否则沿用章节 canonical。"""
links = getattr(chapter, "story_links", None) or []
if links and any(getattr(link, "story", None) for link in links):
return materialize_chapter_pdf_markdown_from_loaded_chapter(chapter)
return _chapter_markdown(chapter)
def _fit_image_size(
image_bytes: bytes, max_width: float, max_height: float
) -> tuple[float, float]:
with Image.open(BytesIO(image_bytes)) as image:
width, height = image.size
if width <= 0 or height <= 0:
return max_width, max_height
scale = min(max_width / width, max_height / height)
return width * scale, height * scale
class PDFService:
def __init__(self):
try:
pdfmetrics.registerFont(UnicodeCIDFont("STSong-Light"))
self.chinese_font = "STSong-Light"
except Exception:
self.chinese_font = "Helvetica"
async def _fetch_image_bytes(self, url: str) -> bytes | None:
try:
async with httpx.AsyncClient(timeout=30) as client:
response = await client.get(url)
response.raise_for_status()
return response.content
except Exception as exc:
logger.warning("PDF 图片下载失败: url={}, error={}", url, exc)
return None
async def generate_pdf(
self,
book,
chapters: List,
asset_url_map: Optional[dict[str, str]] = None,
) -> bytes:
buffer = BytesIO()
doc = SimpleDocTemplate(buffer, pagesize=A4)
styles = getSampleStyleSheet()
title_style = ParagraphStyle(
"CustomTitle",
parent=styles["Heading1"],
fontSize=24,
spaceAfter=30,
alignment=1,
fontName=self.chinese_font,
)
heading_style = ParagraphStyle(
"CustomHeading",
parent=styles["Heading1"],
fontSize=18,
spaceAfter=12,
fontName=self.chinese_font,
)
normal_style = ParagraphStyle(
"CustomNormal",
parent=styles["Normal"],
fontSize=12,
leading=18,
fontName=self.chinese_font,
)
story = []
story.append(Paragraph(book.title, title_style))
story.append(Spacer(1, 0.5 * inch))
story.append(PageBreak())
story.append(Paragraph("目录", heading_style))
story.append(Spacer(1, 0.2 * inch))
for i, chapter in enumerate(chapters, 1):
story.append(Paragraph(f"{i}. {chapter.title}", normal_style))
story.append(PageBreak())
for chapter in chapters:
story.append(Paragraph(chapter.title, heading_style))
story.append(Spacer(1, 0.2 * inch))
# 有 story_links 时按章节内故事注入 ## 标题(与物化章节正文不含故事标题区分)
markdown = _chapter_markdown_for_pdf(chapter)
blocks: list[dict]
if asset_url_map and collect_asset_ids_from_markdown(markdown):
blocks = split_markdown_by_asset_refs(
markdown,
lambda aid: asset_url_map.get(aid) if asset_url_map else None,
)
for b in blocks:
if b.get("type") == "text":
b["value"] = strip_image_placeholders(b.get("value") or "")
else:
cleaned_markdown = strip_image_placeholders(markdown or "")
blocks = (
[{"type": "text", "value": cleaned_markdown}]
if cleaned_markdown
else []
)
for block in blocks:
if block["type"] == "text":
paragraphs = block["value"].split("\n\n")
for para in paragraphs:
if para.strip():
story.append(Paragraph(para.strip(), normal_style))
story.append(Spacer(1, 0.1 * inch))
elif block["type"] == "image":
image_bytes = await self._fetch_image_bytes(block["url"])
if image_bytes:
try:
width, height = _fit_image_size(
image_bytes,
max_width=5 * inch,
max_height=3.75 * inch,
)
img = ReportLabImage(
BytesIO(image_bytes), width=width, height=height
)
story.append(img)
story.append(Spacer(1, 0.2 * inch))
except Exception as exc:
logger.warning("PDF 图片嵌入失败: {}", exc)
story.append(PageBreak())
doc.build(story)
buffer.seek(0)
return buffer.read()
pdf_service = PDFService()