feat/ eval
This commit is contained in:
@@ -49,3 +49,47 @@ def extract_dialogue_turns_from_export_md(text: str) -> list[tuple[str, str]]:
|
||||
raw_ai = ((ai_m.group(1) if ai_m else "") or "").strip()
|
||||
out.append((u, _normalize_export_ai_block(raw_ai)))
|
||||
return out
|
||||
|
||||
|
||||
_MEMOIR_SECTION_HEADER = re.compile(
|
||||
r"^##\s*回忆录章节(生成正文)\s*$",
|
||||
re.MULTILINE | re.IGNORECASE,
|
||||
)
|
||||
|
||||
_IMAGE_REF = re.compile(r"\{\{IMAGE:[^}]*\}\}\s*", re.DOTALL)
|
||||
|
||||
|
||||
def extract_source_user_id_from_export_md(text: str) -> str | None:
|
||||
"""匹配导出头 ``**User ID:** `uuid` ``。"""
|
||||
m = re.search(r"\*\*User ID:\*\*\s*`([0-9a-fA-F-]{36})`", text)
|
||||
if not m:
|
||||
return None
|
||||
return m.group(1).strip()
|
||||
|
||||
|
||||
def extract_memoir_chapter_sections_from_export_md(text: str) -> list[tuple[str, str]]:
|
||||
"""从 ``## 回忆录章节(生成正文)`` 起按 ``##`` / ``###`` 标题切分基线正文(去掉 IMAGE 占位)。"""
|
||||
m = _MEMOIR_SECTION_HEADER.search(text)
|
||||
if not m:
|
||||
return []
|
||||
tail = (text[m.end() :] or "").strip()
|
||||
if not tail:
|
||||
return []
|
||||
pieces = re.split(r"\n(?=(?:###\s|##\s+))", tail)
|
||||
out: list[tuple[str, str]] = []
|
||||
for piece in pieces:
|
||||
piece = piece.strip()
|
||||
if not piece.startswith("#"):
|
||||
continue
|
||||
first_nl = piece.find("\n")
|
||||
if first_nl == -1:
|
||||
title = piece.lstrip("#").strip()
|
||||
body = ""
|
||||
else:
|
||||
title = piece[:first_nl].lstrip("#").strip()
|
||||
body = (piece[first_nl + 1 :] or "").strip()
|
||||
body = _IMAGE_REF.sub("", body)
|
||||
body = re.sub(r"\n{3,}", "\n\n", body).strip()
|
||||
if title and body:
|
||||
out.append((title, body))
|
||||
return out
|
||||
|
||||
Reference in New Issue
Block a user