feat(evaluation): session catalog, user export import, and eval web UI

- Extend evaluation API: schemas, router, repo, admin and execution services - Improve user export markdown importer; add fixtures and importer tests - Session catalog repo/service updates; internal app wiring and docs - Add internal-eval.sh helper; refresh app-eval-web (App, styles, Vite)
2026-04-06 13:45:04 +08:00
parent b75edacb5f
commit ca8bcc8489
17 changed files with 2062 additions and 296 deletions
--- a/api/app/features/evaluation/importers/user_export_markdown.py
+++ b/api/app/features/evaluation/importers/user_export_markdown.py
@@ -17,3 +17,30 @@ def extract_user_utterances_from_export_md(text: str) -> list[str]:
        if chunk and chunk != "（空）":
            out.append(chunk)
    return out
+
+
+def extract_dialogue_turns_from_export_md(text: str) -> list[tuple[str, str]]:
+    """
+    从 extract_sql_to_user_md 导出的 Markdown 中按「轮次」提取 (用户, AI) 对，供评测台对照。
+    """
+    chunks = re.split(r"\n####\s*轮次\s*\d+[^\n]*", text)
+    out: list[tuple[str, str]] = []
+    for chunk in chunks[1:]:
+        user_m = re.search(
+            r"\*\*用户:\*\*\s*\n+(.+?)(?=\n\*\*AI:\*\*)",
+            chunk,
+            flags=re.DOTALL | re.IGNORECASE,
+        )
+        ai_m = re.search(
+            r"\*\*AI:\*\*\s*\n+(.+?)(?=\n####\s|\n###\s+[^#]|\Z)",
+            chunk,
+            flags=re.DOTALL | re.IGNORECASE,
+        )
+        if not user_m:
+            continue
+        u = (user_m.group(1) or "").strip()
+        if not u or u == "（空）":
+            continue
+        a = ((ai_m.group(1) if ai_m else "") or "").strip()
+        out.append((u, a))
+    return out