life-echo/api/app/features/story/time_hints.py

"""
从标题/正文推断 Story 的人生时间键，供章节内按发生顺序排序。
"""

from __future__ import annotations

import re
from datetime import datetime
from typing import Any


_YEAR_IN_TITLE = re.compile(r"(?:^|[\s·，,])(?P<y>(?:19|20)\d{2})(?:年|\s|·|$)")
_YEAR_ANYWHERE = re.compile(r"(?:19|20)\d{2}")


def parse_year_from_title(title: str | None) -> int | None:
    """从「1999年 · 标题」或标题中的四位年份提取年份。"""
    if not title:
        return None
    m = _YEAR_IN_TITLE.search(title.strip())
    if m:
        try:
            return int(m.group("y"))
        except ValueError:
            return None
    return None


def parse_year_from_text(text: str | None, *, max_chars: int = 4000) -> int | None:
    """从正文前若干字中取第一个 plausible 四位年份。"""
    if not text:
        return None
    chunk = (text or "")[:max_chars]
    m = _YEAR_ANYWHERE.search(chunk)
    if m:
        try:
            y = int(m.group(0))
            if 1900 <= y <= 2100:
                return y
        except (ValueError, TypeError):
            return None
    return None


def parse_time_start_year(time_start: str | None) -> int | None:
    """time_start 存 'YYYY' 或 'YYYY-MM' 等，取年份用于排序。"""
    if not time_start or not str(time_start).strip():
        return None
    s = str(time_start).strip()
    m = re.match(r"^(\d{4})", s)
    if m:
        try:
            y = int(m.group(1))
            if 1900 <= y <= 2100:
                return y
        except ValueError:
            return None
    return None


def apply_infer_story_time_start_to_model(story: Any) -> None:
    """将推断的 YYYY 写入 `story.time_start`（已有合法值则保留）。"""
    ts = infer_story_time_start(
        title=getattr(story, "title", None),
        canonical_markdown=getattr(story, "canonical_markdown", None),
        existing_time_start=getattr(story, "time_start", None),
    )
    if ts:
        story.time_start = ts


def infer_story_time_start(
    *,
    title: str | None,
    canonical_markdown: str | None,
    existing_time_start: str | None = None,
) -> str | None:
    """
    返回统一 `YYYY` 字符串；若无法推断则返回 None。
    已有 time_start 合法则保留。
    """
    y_existing = parse_time_start_year(existing_time_start)
    if y_existing is not None:
        return str(y_existing)

    y_title = parse_year_from_title(title)
    if y_title is not None:
        return str(y_title)

    y_body = parse_year_from_text(canonical_markdown)
    if y_body is not None:
        return str(y_body)

    return None


def life_sort_key_parts(
    *,
    time_start: str | None,
    title: str | None,
    created_at: datetime | None,
    story_id: str,
) -> tuple[int, int, str]:
    """
    用于 sorted(...): 人生发生顺序（早→晚）。
    无年份时排在后面（9999），再以创建时间、id 稳定 tie-break。
    """
    y = parse_time_start_year(time_start)
    if y is None:
        y = parse_year_from_title(title)
    if y is None:
        y = 9999
    sub = 0
    if created_at is not None:
        sub = int(created_at.timestamp())
    return (y, sub, story_id)