feat/ 导出开发容器内的数据用于评估
This commit is contained in:
23
api/app/features/evaluation/importers/script_json.py
Normal file
23
api/app/features/evaluation/importers/script_json.py
Normal file
@@ -0,0 +1,23 @@
|
||||
"""JSON 脚本导入评测用例。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
|
||||
def parse_script_json(raw: str | bytes) -> tuple[list[str], dict[str, Any]]:
|
||||
data = json.loads(raw if isinstance(raw, str) else raw.decode("utf-8"))
|
||||
if isinstance(data, list):
|
||||
utterances = [str(x).strip() for x in data if str(x).strip()]
|
||||
return utterances, {}
|
||||
if isinstance(data, dict):
|
||||
u = data.get("utterances") or data.get("user_utterances") or []
|
||||
if not isinstance(u, list):
|
||||
raise ValueError("utterances 必须是数组")
|
||||
utterances = [str(x).strip() for x in u if str(x).strip()]
|
||||
meta = {
|
||||
k: v for k, v in data.items() if k not in ("utterances", "user_utterances")
|
||||
}
|
||||
return utterances, meta
|
||||
raise ValueError("根须为数组或对象")
|
||||
@@ -0,0 +1,19 @@
|
||||
"""从 extract_sql_to_user_md 产出的 Markdown 中提取用户轮次。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
|
||||
def extract_user_utterances_from_export_md(text: str) -> list[str]:
|
||||
"""匹配 ``**用户:**`` 块之间的正文。"""
|
||||
out: list[str] = []
|
||||
for m in re.finditer(
|
||||
r"\*\*用户:\*\*\s*\n+(.+?)(?=\n+\*\*AI:\*\*|\n+####|\Z)",
|
||||
text,
|
||||
flags=re.DOTALL | re.IGNORECASE,
|
||||
):
|
||||
chunk = (m.group(1) or "").strip()
|
||||
if chunk and chunk != "(空)":
|
||||
out.append(chunk)
|
||||
return out
|
||||
Reference in New Issue
Block a user