Files
life-echo/api/app/features/story/time_hints.py
Kevin 309a051038 feat: 回忆录证据血缘与内部评测可追溯,顺带对齐本地评测台与 CI
数据库与模型:新增多版迁移(章节证据快照、对话血缘、记忆事实/时间线 lineage 等),把「成稿 ↔ 对话/记忆」的溯源信息落到表结构里。
业务链路:会话与 WS、回忆录/故事流水线、记忆写入与 enrichment 等跟着接上线索与快照;新增章节证据快照与评测侧 EvalTraceService 等模块,方便组评审用的证据包。
内部评测:自动化 run 与手工 memoir 评审共用可追溯证据;rubric/ judge 相关脚本与文档有配套调整。
app-eval-web:Memoir/实验详情里能展开看证据摘要与 evidence_trace(含对话轮次 id);Vite 代理与 development.sh 注入的 API 端口与当前默认内部评测端口一致,避免改端口后页面连错服务。
工程杂项:GitHub Actions / 仓库说明有更新;各适配器与支付/配额/plan 等多处为小改动或跟随主改动的收尾;新增/扩充了?
2026-04-08 15:37:09 +08:00

116 lines
3.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
从标题/正文推断 Story 的人生时间键,供章节内按发生顺序排序。
"""
from __future__ import annotations
import re
from datetime import datetime
from typing import Any
_YEAR_IN_TITLE = re.compile(r"(?:^|[\,])(?P<y>(?:19|20)\d{2})(?:年|\s|·|$)")
_YEAR_ANYWHERE = re.compile(r"(?:19|20)\d{2}")
def parse_year_from_title(title: str | None) -> int | None:
"""从「1999年 · 标题」或标题中的四位年份提取年份。"""
if not title:
return None
m = _YEAR_IN_TITLE.search(title.strip())
if m:
try:
return int(m.group("y"))
except ValueError:
return None
return None
def parse_year_from_text(text: str | None, *, max_chars: int = 4000) -> int | None:
"""从正文前若干字中取第一个 plausible 四位年份。"""
if not text:
return None
chunk = (text or "")[:max_chars]
m = _YEAR_ANYWHERE.search(chunk)
if m:
try:
y = int(m.group(0))
if 1900 <= y <= 2100:
return y
except (ValueError, TypeError):
return None
return None
def parse_time_start_year(time_start: str | None) -> int | None:
"""time_start 存 'YYYY''YYYY-MM' 等,取年份用于排序。"""
if not time_start or not str(time_start).strip():
return None
s = str(time_start).strip()
m = re.match(r"^(\d{4})", s)
if m:
try:
y = int(m.group(1))
if 1900 <= y <= 2100:
return y
except ValueError:
return None
return None
def apply_infer_story_time_start_to_model(story: Any) -> None:
"""将推断的 YYYY 写入 `story.time_start`(已有合法值则保留)。"""
ts = infer_story_time_start(
title=getattr(story, "title", None),
canonical_markdown=getattr(story, "canonical_markdown", None),
existing_time_start=getattr(story, "time_start", None),
)
if ts:
story.time_start = ts
def infer_story_time_start(
*,
title: str | None,
canonical_markdown: str | None,
existing_time_start: str | None = None,
) -> str | None:
"""
返回统一 `YYYY` 字符串;若无法推断则返回 None。
已有 time_start 合法则保留。
"""
y_existing = parse_time_start_year(existing_time_start)
if y_existing is not None:
return str(y_existing)
y_title = parse_year_from_title(title)
if y_title is not None:
return str(y_title)
y_body = parse_year_from_text(canonical_markdown)
if y_body is not None:
return str(y_body)
return None
def life_sort_key_parts(
*,
time_start: str | None,
title: str | None,
created_at: datetime | None,
story_id: str,
) -> tuple[int, int, str]:
"""
用于 sorted(...): 人生发生顺序(早→晚)。
无年份时排在后面9999再以创建时间、id 稳定 tie-break。
"""
y = parse_time_start_year(time_start)
if y is None:
y = parse_year_from_title(title)
if y is None:
y = 9999
sub = 0
if created_at is not None:
sub = int(created_at.timestamp())
return (y, sub, story_id)