feat(evaluation): session catalog, user export import, and eval web UI

- Extend evaluation API: schemas, router, repo, admin and execution services - Improve user export markdown importer; add fixtures and importer tests - Session catalog repo/service updates; internal app wiring and docs - Add internal-eval.sh helper; refresh app-eval-web (App, styles, Vite)
2026-04-06 13:45:04 +08:00
parent b75edacb5f
commit ca8bcc8489
17 changed files with 2062 additions and 296 deletions
--- a/api/app/features/evaluation/repo.py
+++ b/api/app/features/evaluation/repo.py
@@ -204,6 +204,27 @@ async def list_runs_for_experiment(
    return list(res.scalars().all())


+async def list_runs_for_source_conversation(
+    db: AsyncSession,
+    *,
+    source_conversation_id: str,
+    limit: int = 80,
+) -> list[tuple[EvalRun, EvalCase, EvalExperiment]]:
+    stmt = (
+        select(EvalRun, EvalCase, EvalExperiment)
+        .join(EvalCase, EvalRun.case_id == EvalCase.id)
+        .join(EvalExperiment, EvalRun.experiment_id == EvalExperiment.id)
+        .where(EvalCase.source_conversation_id == source_conversation_id)
+        .order_by(
+            EvalRun.completed_at.desc().nulls_last(),
+            EvalRun.started_at.desc().nulls_last(),
+        )
+        .limit(limit)
+    )
+    res = await db.execute(stmt)
+    return list(res.all())
+
+
 async def update_run(
    db: AsyncSession,
    run: EvalRun,