feat/ eval

This commit is contained in:
Kevin
2026-04-06 23:19:20 +08:00
parent 2fded6fbd9
commit 29dec8fe32
13 changed files with 2266 additions and 683 deletions

View File

@@ -49,3 +49,47 @@ def extract_dialogue_turns_from_export_md(text: str) -> list[tuple[str, str]]:
raw_ai = ((ai_m.group(1) if ai_m else "") or "").strip()
out.append((u, _normalize_export_ai_block(raw_ai)))
return out
_MEMOIR_SECTION_HEADER = re.compile(
r"^##\s*回忆录章节(生成正文)\s*$",
re.MULTILINE | re.IGNORECASE,
)
_IMAGE_REF = re.compile(r"\{\{IMAGE:[^}]*\}\}\s*", re.DOTALL)
def extract_source_user_id_from_export_md(text: str) -> str | None:
"""匹配导出头 ``**User ID:** `uuid` ``。"""
m = re.search(r"\*\*User ID:\*\*\s*`([0-9a-fA-F-]{36})`", text)
if not m:
return None
return m.group(1).strip()
def extract_memoir_chapter_sections_from_export_md(text: str) -> list[tuple[str, str]]:
"""从 ``## 回忆录章节(生成正文)`` 起按 ``##`` / ``###`` 标题切分基线正文(去掉 IMAGE 占位)。"""
m = _MEMOIR_SECTION_HEADER.search(text)
if not m:
return []
tail = (text[m.end() :] or "").strip()
if not tail:
return []
pieces = re.split(r"\n(?=(?:###\s|##\s+))", tail)
out: list[tuple[str, str]] = []
for piece in pieces:
piece = piece.strip()
if not piece.startswith("#"):
continue
first_nl = piece.find("\n")
if first_nl == -1:
title = piece.lstrip("#").strip()
body = ""
else:
title = piece[:first_nl].lstrip("#").strip()
body = (piece[first_nl + 1 :] or "").strip()
body = _IMAGE_REF.sub("", body)
body = re.sub(r"\n{3,}", "\n\n", body).strip()
if title and body:
out.append((title, body))
return out