feat/ 导出开发容器内的数据用于评估

This commit is contained in:
Kevin
2026-04-03 14:44:46 +08:00
parent 828a29748e
commit b75edacb5f
51 changed files with 5999 additions and 57 deletions

View File

@@ -0,0 +1,23 @@
"""JSON 脚本导入评测用例。"""
from __future__ import annotations
import json
from typing import Any
def parse_script_json(raw: str | bytes) -> tuple[list[str], dict[str, Any]]:
data = json.loads(raw if isinstance(raw, str) else raw.decode("utf-8"))
if isinstance(data, list):
utterances = [str(x).strip() for x in data if str(x).strip()]
return utterances, {}
if isinstance(data, dict):
u = data.get("utterances") or data.get("user_utterances") or []
if not isinstance(u, list):
raise ValueError("utterances 必须是数组")
utterances = [str(x).strip() for x in u if str(x).strip()]
meta = {
k: v for k, v in data.items() if k not in ("utterances", "user_utterances")
}
return utterances, meta
raise ValueError("根须为数组或对象")

View File

@@ -0,0 +1,19 @@
"""从 extract_sql_to_user_md 产出的 Markdown 中提取用户轮次。"""
from __future__ import annotations
import re
def extract_user_utterances_from_export_md(text: str) -> list[str]:
"""匹配 ``**用户:**`` 块之间的正文。"""
out: list[str] = []
for m in re.finditer(
r"\*\*用户:\*\*\s*\n+(.+?)(?=\n+\*\*AI:\*\*|\n+####|\Z)",
text,
flags=re.DOTALL | re.IGNORECASE,
):
chunk = (m.group(1) or "").strip()
if chunk and chunk != "(空)":
out.append(chunk)
return out