feat(evaluation): session catalog, user export import, and eval web UI

- Extend evaluation API: schemas, router, repo, admin and execution services
- Improve user export markdown importer; add fixtures and importer tests
- Session catalog repo/service updates; internal app wiring and docs
- Add internal-eval.sh helper; refresh app-eval-web (App, styles, Vite)
This commit is contained in:
Kevin
2026-04-06 13:45:04 +08:00
parent b75edacb5f
commit ca8bcc8489
17 changed files with 2062 additions and 296 deletions

View File

@@ -204,6 +204,27 @@ async def list_runs_for_experiment(
return list(res.scalars().all())
async def list_runs_for_source_conversation(
db: AsyncSession,
*,
source_conversation_id: str,
limit: int = 80,
) -> list[tuple[EvalRun, EvalCase, EvalExperiment]]:
stmt = (
select(EvalRun, EvalCase, EvalExperiment)
.join(EvalCase, EvalRun.case_id == EvalCase.id)
.join(EvalExperiment, EvalRun.experiment_id == EvalExperiment.id)
.where(EvalCase.source_conversation_id == source_conversation_id)
.order_by(
EvalRun.completed_at.desc().nulls_last(),
EvalRun.started_at.desc().nulls_last(),
)
.limit(limit)
)
res = await db.execute(stmt)
return list(res.all())
async def update_run(
db: AsyncSession,
run: EvalRun,