""" 从标题/正文推断 Story 的人生时间键,供章节内按发生顺序排序。 """ from __future__ import annotations import re from datetime import datetime from typing import Any _YEAR_IN_TITLE = re.compile(r"(?:^|[\s·,,])(?P(?:19|20)\d{2})(?:年|\s|·|$)") _YEAR_ANYWHERE = re.compile(r"(?:19|20)\d{2}") def parse_year_from_title(title: str | None) -> int | None: """从「1999年 · 标题」或标题中的四位年份提取年份。""" if not title: return None m = _YEAR_IN_TITLE.search(title.strip()) if m: try: return int(m.group("y")) except ValueError: return None return None def parse_year_from_text(text: str | None, *, max_chars: int = 4000) -> int | None: """从正文前若干字中取第一个 plausible 四位年份。""" if not text: return None chunk = (text or "")[:max_chars] m = _YEAR_ANYWHERE.search(chunk) if m: try: y = int(m.group(0)) if 1900 <= y <= 2100: return y except (ValueError, TypeError): return None return None def parse_time_start_year(time_start: str | None) -> int | None: """time_start 存 'YYYY' 或 'YYYY-MM' 等,取年份用于排序。""" if not time_start or not str(time_start).strip(): return None s = str(time_start).strip() m = re.match(r"^(\d{4})", s) if m: try: y = int(m.group(1)) if 1900 <= y <= 2100: return y except ValueError: return None return None def apply_infer_story_time_start_to_model(story: Any) -> None: """将推断的 YYYY 写入 `story.time_start`(已有合法值则保留)。""" ts = infer_story_time_start( title=getattr(story, "title", None), canonical_markdown=getattr(story, "canonical_markdown", None), existing_time_start=getattr(story, "time_start", None), ) if ts: story.time_start = ts def infer_story_time_start( *, title: str | None, canonical_markdown: str | None, existing_time_start: str | None = None, ) -> str | None: """ 返回统一 `YYYY` 字符串;若无法推断则返回 None。 已有 time_start 合法则保留。 """ y_existing = parse_time_start_year(existing_time_start) if y_existing is not None: return str(y_existing) y_title = parse_year_from_title(title) if y_title is not None: return str(y_title) y_body = parse_year_from_text(canonical_markdown) if y_body is not None: return str(y_body) return None def life_sort_key_parts( *, time_start: str | None, title: str | None, created_at: datetime | None, story_id: str, ) -> tuple[int, int, str]: """ 用于 sorted(...): 人生发生顺序(早→晚)。 无年份时排在后面(9999),再以创建时间、id 稳定 tie-break。 """ y = parse_time_start_year(time_start) if y is None: y = parse_year_from_title(title) if y is None: y = 9999 sub = 0 if created_at is not None: sub = int(created_at.timestamp()) return (y, sub, story_id)