数据库与模型:新增多版迁移(章节证据快照、对话血缘、记忆事实/时间线 lineage 等),把「成稿 ↔ 对话/记忆」的溯源信息落到表结构里。 业务链路:会话与 WS、回忆录/故事流水线、记忆写入与 enrichment 等跟着接上线索与快照;新增章节证据快照与评测侧 EvalTraceService 等模块,方便组评审用的证据包。 内部评测:自动化 run 与手工 memoir 评审共用可追溯证据;rubric/ judge 相关脚本与文档有配套调整。 app-eval-web:Memoir/实验详情里能展开看证据摘要与 evidence_trace(含对话轮次 id);Vite 代理与 development.sh 注入的 API 端口与当前默认内部评测端口一致,避免改端口后页面连错服务。 工程杂项:GitHub Actions / 仓库说明有更新;各适配器与支付/配额/plan 等多处为小改动或跟随主改动的收尾;新增/扩充了?
116 lines
3.2 KiB
Python
116 lines
3.2 KiB
Python
"""
|
||
从标题/正文推断 Story 的人生时间键,供章节内按发生顺序排序。
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import re
|
||
from datetime import datetime
|
||
from typing import Any
|
||
|
||
_YEAR_IN_TITLE = re.compile(r"(?:^|[\s·,,])(?P<y>(?:19|20)\d{2})(?:年|\s|·|$)")
|
||
_YEAR_ANYWHERE = re.compile(r"(?:19|20)\d{2}")
|
||
|
||
|
||
def parse_year_from_title(title: str | None) -> int | None:
|
||
"""从「1999年 · 标题」或标题中的四位年份提取年份。"""
|
||
if not title:
|
||
return None
|
||
m = _YEAR_IN_TITLE.search(title.strip())
|
||
if m:
|
||
try:
|
||
return int(m.group("y"))
|
||
except ValueError:
|
||
return None
|
||
return None
|
||
|
||
|
||
def parse_year_from_text(text: str | None, *, max_chars: int = 4000) -> int | None:
|
||
"""从正文前若干字中取第一个 plausible 四位年份。"""
|
||
if not text:
|
||
return None
|
||
chunk = (text or "")[:max_chars]
|
||
m = _YEAR_ANYWHERE.search(chunk)
|
||
if m:
|
||
try:
|
||
y = int(m.group(0))
|
||
if 1900 <= y <= 2100:
|
||
return y
|
||
except (ValueError, TypeError):
|
||
return None
|
||
return None
|
||
|
||
|
||
def parse_time_start_year(time_start: str | None) -> int | None:
|
||
"""time_start 存 'YYYY' 或 'YYYY-MM' 等,取年份用于排序。"""
|
||
if not time_start or not str(time_start).strip():
|
||
return None
|
||
s = str(time_start).strip()
|
||
m = re.match(r"^(\d{4})", s)
|
||
if m:
|
||
try:
|
||
y = int(m.group(1))
|
||
if 1900 <= y <= 2100:
|
||
return y
|
||
except ValueError:
|
||
return None
|
||
return None
|
||
|
||
|
||
def apply_infer_story_time_start_to_model(story: Any) -> None:
|
||
"""将推断的 YYYY 写入 `story.time_start`(已有合法值则保留)。"""
|
||
ts = infer_story_time_start(
|
||
title=getattr(story, "title", None),
|
||
canonical_markdown=getattr(story, "canonical_markdown", None),
|
||
existing_time_start=getattr(story, "time_start", None),
|
||
)
|
||
if ts:
|
||
story.time_start = ts
|
||
|
||
|
||
def infer_story_time_start(
|
||
*,
|
||
title: str | None,
|
||
canonical_markdown: str | None,
|
||
existing_time_start: str | None = None,
|
||
) -> str | None:
|
||
"""
|
||
返回统一 `YYYY` 字符串;若无法推断则返回 None。
|
||
已有 time_start 合法则保留。
|
||
"""
|
||
y_existing = parse_time_start_year(existing_time_start)
|
||
if y_existing is not None:
|
||
return str(y_existing)
|
||
|
||
y_title = parse_year_from_title(title)
|
||
if y_title is not None:
|
||
return str(y_title)
|
||
|
||
y_body = parse_year_from_text(canonical_markdown)
|
||
if y_body is not None:
|
||
return str(y_body)
|
||
|
||
return None
|
||
|
||
|
||
def life_sort_key_parts(
|
||
*,
|
||
time_start: str | None,
|
||
title: str | None,
|
||
created_at: datetime | None,
|
||
story_id: str,
|
||
) -> tuple[int, int, str]:
|
||
"""
|
||
用于 sorted(...): 人生发生顺序(早→晚)。
|
||
无年份时排在后面(9999),再以创建时间、id 稳定 tie-break。
|
||
"""
|
||
y = parse_time_start_year(time_start)
|
||
if y is None:
|
||
y = parse_year_from_title(title)
|
||
if y is None:
|
||
y = 9999
|
||
sub = 0
|
||
if created_at is not None:
|
||
sub = int(created_at.timestamp())
|
||
return (y, sub, story_id)
|