#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 软著申报用:源码整理生成 PDF(reportlab) 默认扫描整个 monorepo 根目录(本脚本位于 api/scripts/,向上两级即为仓库根), 会包含后端 api、Expo 客户端 app-expo、评测 Web app-eval-web 等;也可用 --root 只扫某一子项目。 使用说明(简要): 1) 安装依赖:在 api 目录执行 uv sync(本仓库已含 reportlab) 2) 中文字体:可设置 CJK_MONO_FONT_PATH;默认可用 STSong-Light(Adobe CID)。苹方/冬青等常为 CFF, ReportLab 无法直接嵌入。使用 STSong-Light 时,ASCII(含空格、英文代码)用内置 Courier 分段绘制, 中文用 STSong,避免整行用 CID 时拉丁字距错位、挤在一起。 需要退回纯 TrueType 单字体(Menlo 等)时用 --no-cid。 3) 修改下方「配置区」常量:软件全称、版本号、必要时改 SOURCE_ROOT / 输出路径、字体路径、后缀与跳过目录 4) 运行(全仓默认,仅 .py / .ts(x) / .js(x) / .vue 等源码):cd api && uv run python scripts/copyright_source_pdf.py 仅后端示例:... --root ../api 注意: - 空行会从统计与输出中剔除;行首行尾以外的空白(缩进)保留。 - 总页数按 ceil(非空行总数/50) 估算;>60 页则取前 1500 行 + 后 1500 行,最终 60 页、页码 1–60。 - 不足 60 页时输出全部非空行;最后一页若不足 50 行,用空行补齐到 50 行。 """ from __future__ import annotations import argparse import math import os import sys from dataclasses import dataclass from pathlib import Path from typing import Iterable from reportlab.lib.pagesizes import A4 from reportlab.lib.units import cm from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase.cidfonts import UnicodeCIDFont from reportlab.pdfbase.ttfonts import TTFont from reportlab.pdfgen import canvas # 本文件位于 /api/scripts/,仓库根为其上两级目录 REPO_ROOT = Path(__file__).resolve().parents[2] # ========================= # 配置区(按项目修改;也可用命令行覆盖部分项) # ========================= SOFTWARE_FULL_NAME = "岁月留书" VERSION = "V1.0.0" # 默认:整仓源码根(api + 各前端/工具子项目);仅申报名某一模块时可改为子路径或 CLI --root SOURCE_ROOT = REPO_ROOT OUTPUT_PDF = REPO_ROOT / "copyright_source_listing.pdf" # PDF 页眉/材料中出现的文件路径:将本机目录前缀替换为申报用单位根(如 上海华嘎科技有限公司/life-echo/...) FILE_PATH_ABSOLUTE_PREFIX = Path("/Users/kevin/Codes/hgtk") FILE_PATH_DISPLAY_ROOT = "上海华嘎科技有限公司" # 设为 None 时自动探测(macOS:Menlo.ttc / Songti.ttc 等);也可填本机 TTF/OTF 或 TTC 路径 CJK_MONO_FONT_PATH: Path | None = None # STSong-Light(CID)与拉丁混排时 ReportLab 常把 ASCII 字距算错;ASCII 段单独用内置 Courier。 ASCII_LATIN_FONT = "Courier" @dataclass(frozen=True) class RenderFonts: """primary:中文等非 ASCII;latin_font 非空时 U+0000–U+007F 用 Courier。""" primary: str latin_font: str | None = None def _segment_latin_cjk(line: str) -> list[tuple[str, str]]: if not line: return [] out: list[tuple[str, str]] = [] buf: list[str] = [] is_lat = ord(line[0]) < 128 for ch in line: lat = ord(ch) < 128 if lat != is_lat: out.append(("lat" if is_lat else "cjk", "".join(buf))) buf = [ch] is_lat = lat else: buf.append(ch) out.append(("lat" if is_lat else "cjk", "".join(buf))) return out def _mixed_line_width(line: str, fonts: RenderFonts, font_size: float) -> float: if fonts.latin_font is None: return pdfmetrics.stringWidth(line, fonts.primary, font_size) w = 0.0 for kind, chunk in _segment_latin_cjk(line): fn = fonts.latin_font if kind == "lat" else fonts.primary w += pdfmetrics.stringWidth(chunk, fn, font_size) return w def _draw_line( c: canvas.Canvas, x: float, y: float, line: str, fonts: RenderFonts, font_size: float, ) -> None: if fonts.latin_font is None: c.setFont(fonts.primary, font_size) c.drawString(x, y, line) return xpos = x for kind, chunk in _segment_latin_cjk(line): if not chunk: continue fn = fonts.latin_font if kind == "lat" else fonts.primary c.setFont(fn, font_size) c.drawString(xpos, y, chunk) xpos += pdfmetrics.stringWidth(chunk, fn, font_size) # 仅收录「可执行/可编译」源码:本仓为 Python + TS/JS 栈;排除 .md、.json、.yaml 等配置与文档。 # 其它语言可自行向集合内追加后缀。 INCLUDE_SUFFIXES: set[str] = { ".py", ".pyi", ".ts", ".tsx", ".mts", ".cts", ".js", ".jsx", ".mjs", ".cjs", ".vue", } # 需要跳过的目录名(只比对路径每一段 name) SKIP_DIR_NAMES: set[str] = { ".git", ".svn", ".hg", "node_modules", "venv", ".venv", "env", ".env", ".idea", ".vscode", "__pycache__", ".mypy_cache", ".pytest_cache", ".ruff_cache", ".next", ".nuxt", ".output", ".turbo", ".parcel-cache", ".expo", "dist", "build", "target", "out", "coverage", "htmlcov", "storybook-static", "Pods", ".gradle", "DerivedData", } # 是否在文件边界插入标记行 ADD_FILE_MARKERS = True FILE_MARKER_PREFIX = "// ===== " LINES_PER_PAGE = 50 MAX_PAGES = 60 # 苹方在多数 macOS 上为 .ttc 内嵌 CFF 轮廓,ReportLab TTFont 无法载入;仍会先尝试以下路径(以防特例)。 _PINGFANG_CANDIDATES: tuple[Path, ...] = ( Path("/System/Library/Fonts/PingFang.ttc"), Path( "/System/Library/Fonts/Hiragino Sans GB.ttc" ), # 冬青黑体,常与苹方同源 CFF,多半失败 Path("/Library/Fonts/PingFang.ttc"), ) def _try_register_stsong_cid() -> bool: try: pdfmetrics.registerFont(UnicodeCIDFont("STSong-Light")) return True except Exception: return False def _register_stsong_cid_with_notice(*, pingfang_failed: bool) -> str: if not _try_register_stsong_cid(): return "" if pingfang_failed: print( "已载入 PDF 简体中文:STSong-Light(Adobe Unicode CID)。" "说明:系统「苹方/冬青」等常为 OpenType-CFF,ReportLab 无法写入该轮廓;" "已改用内置宋体轮廓以保证中文可见(非苹方字形,软著材料通常可接受)。", file=sys.stderr, ) else: print( "已载入 PDF 简体中文:STSong-Light(Adobe Unicode CID)。", file=sys.stderr, ) return "STSong-Light" # macOS:TrueType 轮廓 .ttc(Menlo、部分宋体/黑体);苹方多为 CFF,见上方说明。 _MACOS_TTC_CANDIDATES: tuple[tuple[Path, int], ...] = ( (Path("/System/Library/Fonts/Menlo.ttc"), 0), (Path("/System/Library/Fonts/Supplemental/Songti.ttc"), 0), (Path("/System/Library/Fonts/STHeiti Light.ttc"), 0), (Path("/System/Library/Fonts/STHeiti Medium.ttc"), 0), ) def _try_register_ttf_or_ttc(path: Path, font_name: str = "CodeCJK") -> bool: """将字体注册为 font_name;.ttc 递增 subfontIndex 直至成功或无更多子字体。""" if not path.is_file(): return False if path.suffix.lower() == ".ttc": idx = 0 while idx < 64: try: pdfmetrics.registerFont(TTFont(font_name, str(path), subfontIndex=idx)) return True except Exception as e: msg = str(e).lower() if "bad subfontindex" in msg: break if "subfontindex" in msg and "not in" in msg: break idx += 1 return False try: pdfmetrics.registerFont(TTFont(font_name, str(path))) return True except Exception: return False def register_font(user_path: Path | None, *, no_cid: bool = False) -> RenderFonts: """选择正文字体;STSong CID 时同时返回 Courier 供拉丁混排。""" font_tt = "CodeCJK" if user_path is not None and user_path.is_file(): if _try_register_ttf_or_ttc(user_path, font_tt): print(f"已载入 PDF 字体:{user_path}", file=sys.stderr) return RenderFonts(primary=font_tt) pingfang_failed = False if sys.platform == "darwin": for p in _PINGFANG_CANDIDATES: if _try_register_ttf_or_ttc(p, font_tt): print(f"已载入 PDF 字体(苹方/平方相关):{p}", file=sys.stderr) return RenderFonts(primary=font_tt) pingfang_failed = any(p.is_file() for p in _PINGFANG_CANDIDATES) if not no_cid: cid_name = _register_stsong_cid_with_notice(pingfang_failed=pingfang_failed) if cid_name: return RenderFonts(primary=cid_name, latin_font=ASCII_LATIN_FONT) if sys.platform == "darwin": for ttc_path, sub in _MACOS_TTC_CANDIDATES: if not ttc_path.is_file(): continue try: pdfmetrics.registerFont( TTFont(font_tt, str(ttc_path), subfontIndex=sub) ) label = "系统等宽" if "Menlo" in ttc_path.name else "系统字体" print( f"已载入 PDF 字体(macOS TrueType):{ttc_path}({label})", file=sys.stderr, ) return RenderFonts(primary=font_tt) except Exception: continue for p in ( Path("/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc"), Path("/usr/share/fonts/truetype/noto/NotoSansMonoCJK-Regular.ttf"), ): if _try_register_ttf_or_ttc(p, font_tt): print(f"已载入 PDF 字体(Linux 常见路径):{p}", file=sys.stderr) return RenderFonts(primary=font_tt) if no_cid: print( "警告:已指定 --no-cid 且未找到可用 TrueType 轮廓字体,将使用 Courier。", file=sys.stderr, ) else: print( "警告:未找到可用的中文矢量字体,将使用 Courier(中文可能无法显示)。", file=sys.stderr, ) return RenderFonts(primary="Courier") @dataclass(frozen=True) class ExtractResult: lines: list[str] pages: int capped: bool original_non_empty_lines: int original_pages_ceil: int def should_skip_dir(name: str) -> bool: return name in SKIP_DIR_NAMES def iter_source_files(root: Path) -> Iterable[Path]: for dirpath, dirnames, filenames in os.walk(root, followlinks=False): dirnames[:] = [d for d in dirnames if not should_skip_dir(d)] for fn in filenames: p = Path(dirpath) / fn if p.suffix.lower() in INCLUDE_SUFFIXES: yield p def path_for_pdf_listing(fp: Path) -> str: """将绝对路径转为 PDF 展示路径:.../hgtk/foo -> 上海华嘎科技有限公司/foo。""" try: resolved = fp.resolve() base = FILE_PATH_ABSOLUTE_PREFIX.expanduser().resolve() rel = resolved.relative_to(base) return f"{FILE_PATH_DISPLAY_ROOT}/{rel.as_posix()}" except ValueError: return fp.as_posix() def read_nonempty_lines_from_files(files: list[Path]) -> list[str]: out: list[str] = [] for fp in files: if ADD_FILE_MARKERS: out.append(f"{FILE_MARKER_PREFIX}{path_for_pdf_listing(fp)} =====") try: text = fp.read_text(encoding="utf-8", errors="replace") except OSError: text = fp.read_text(encoding="latin-1", errors="replace") for raw in text.splitlines(): line = raw.rstrip("\n") if line.strip() == "": continue out.append(line) return out def extract_for_pdf( lines: list[str], *, lines_per_page: int = LINES_PER_PAGE, max_pages: int = MAX_PAGES, ) -> ExtractResult: original_non_empty_lines = len(lines) original_pages_ceil = max(1, math.ceil(original_non_empty_lines / lines_per_page)) capped = original_pages_ceil > max_pages if capped: first_n = lines_per_page * (max_pages // 2) last_n = lines_per_page * (max_pages // 2) selected = lines[:first_n] + lines[-last_n:] else: selected = list(lines) pages = max(1, math.ceil(len(selected) / lines_per_page)) return ExtractResult( lines=selected, pages=pages, capped=capped, original_non_empty_lines=original_non_empty_lines, original_pages_ceil=original_pages_ceil, ) def pad_to_multiple(lines: list[str], lines_per_page: int) -> list[str]: if not lines: return [""] * lines_per_page remainder = len(lines) % lines_per_page if remainder == 0: return lines return lines + [""] * (lines_per_page - remainder) def fit_line_to_width( line: str, fonts: RenderFonts, font_size: float, max_width: float ) -> str: if _mixed_line_width(line, fonts, font_size) <= max_width: return line lo, hi = 0, len(line) while lo < hi: mid = (lo + hi + 1) // 2 if _mixed_line_width(line[:mid], fonts, font_size) <= max_width: lo = mid else: hi = mid - 1 ell = "…" ell_w = _mixed_line_width(ell, fonts, font_size) while lo > 0 and _mixed_line_width(line[:lo], fonts, font_size) + ell_w > max_width: lo -= 1 if lo <= 0: return ell return line[:lo] + ell def draw_pdf( out_path: Path, all_lines: list[str], *, fonts: RenderFonts, lines_per_page: int, software_name: str, version: str, font_size: float = 8.5, ) -> None: page_w, page_h = A4 left = 2 * cm right = 2 * cm top = 2 * cm bottom = 2 * cm sep_gap = 0.25 * cm c = canvas.Canvas(str(out_path), pagesize=A4) c.setTitle(f"{software_name} {version} — 源代码节选") c.setAuthor(software_name) header_baseline_y = page_h - top - 0.35 * cm sep_y = header_baseline_y - 0.35 * cm content_top = sep_y - sep_gap content_bottom = bottom + 0.5 * cm if lines_per_page <= 1: line_step = max(content_top - content_bottom, 1.0) else: line_step = (content_top - content_bottom) / (lines_per_page - 1) max_text_width = page_w - left - right truncated_count = 0 n = len(all_lines) if n % lines_per_page != 0: raise ValueError("all_lines 长度必须是 lines_per_page 的整数倍(请先 pad)") total_pages = n // lines_per_page for page_no in range(1, total_pages + 1): left_text = f"{software_name} {version}" _draw_line(c, left, header_baseline_y, left_text, fonts, font_size) page_label = str(page_no) page_font = fonts.latin_font or fonts.primary pw = pdfmetrics.stringWidth(page_label, page_font, font_size) c.setFont(page_font, font_size) c.drawString(page_w - right - pw, header_baseline_y, page_label) c.setLineWidth(0.3) c.line(left, sep_y, page_w - right, sep_y) start = (page_no - 1) * lines_per_page page_slice = all_lines[start : start + lines_per_page] for i, raw in enumerate(page_slice): y = content_top - i * line_step fitted = fit_line_to_width(raw, fonts, font_size, max_text_width) if fitted != raw and raw: truncated_count += 1 _draw_line(c, left, y, fitted, fonts, font_size) c.showPage() if truncated_count: print( f"提示:共有 {truncated_count} 行因超宽被截断(可减小字号或换更窄字体)。", file=sys.stderr, ) c.save() def parse_args() -> argparse.Namespace: p = argparse.ArgumentParser( description="将源码目录整理为软著申报用 PDF(reportlab)", ) p.add_argument( "--root", type=Path, default=None, help="源码根目录(默认使用脚本内 SOURCE_ROOT)", ) p.add_argument( "--out", type=Path, default=None, help="输出 PDF 路径(默认使用脚本内 OUTPUT_PDF)", ) p.add_argument( "--font", type=Path, default=None, help="CJK 等宽字体 TTF/OTF(默认使用脚本内 CJK_MONO_FONT_PATH)", ) p.add_argument( "--name", default=None, help="软件全称(默认脚本内 SOFTWARE_FULL_NAME)", ) p.add_argument( "--version", dest="version_str", default=None, help="版本号(默认脚本内 VERSION)", ) p.add_argument( "--font-size", type=float, default=8.3, help="正文字号(略小可减少超长行截断,默认 8.3)", ) p.add_argument( "--no-cid", action="store_true", help="不使用 Adobe STSong-Light(CID),仅尝试 TrueType 轮廓(Menlo 等),中文更易缺失", ) return p.parse_args() def main() -> int: args = parse_args() root = args.root or SOURCE_ROOT out_pdf = args.out or OUTPUT_PDF font_path = args.font if font_path is None: font_path = CJK_MONO_FONT_PATH name = args.name or SOFTWARE_FULL_NAME ver = args.version_str or VERSION if not root.is_dir(): print(f"错误:源码根目录不存在或不是目录:{root}", file=sys.stderr) return 1 files = sorted(iter_source_files(root), key=lambda p: p.as_posix()) lines = read_nonempty_lines_from_files(files) result = extract_for_pdf(lines) padded = pad_to_multiple(result.lines, LINES_PER_PAGE) fonts = register_font(font_path, no_cid=args.no_cid) out_pdf.parent.mkdir(parents=True, exist_ok=True) draw_pdf( out_pdf, padded, fonts=fonts, lines_per_page=LINES_PER_PAGE, software_name=name, version=ver, font_size=args.font_size, ) out_pages = len(padded) // LINES_PER_PAGE print( "完成:", f"源非空行 {result.original_non_empty_lines}(按 50 行/页约 {result.original_pages_ceil} 页)", f"节选后 {len(result.lines)} 行", f"输出 {out_pdf} 共 {out_pages} 页", f" capped={result.capped}", ) return 0 if __name__ == "__main__": raise SystemExit(main())