"""评测域数据访问。""" from __future__ import annotations import uuid from typing import Any from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession from app.features.evaluation.models import ( EvalCase, EvalExperiment, EvalGateVerdict, EvalRegressionSet, EvalRun, EvalRunTurn, EvalVersion, ) def _id() -> str: return str(uuid.uuid4()).replace("-", "")[:32] async def list_regression_sets(db: AsyncSession) -> list[EvalRegressionSet]: res = await db.execute( select(EvalRegressionSet).order_by(EvalRegressionSet.created_at.desc()) ) return list(res.scalars().unique().all()) async def get_regression_set(db: AsyncSession, sid: str) -> EvalRegressionSet | None: return await db.get(EvalRegressionSet, sid) async def create_regression_set( db: AsyncSession, *, name: str, description: str | None = None ) -> EvalRegressionSet: row = EvalRegressionSet(id=_id(), name=name, description=description) db.add(row) await db.flush() return row async def list_cases(db: AsyncSession, regression_set_id: str) -> list[EvalCase]: res = await db.execute( select(EvalCase) .where(EvalCase.regression_set_id == regression_set_id) .order_by(EvalCase.created_at.asc()) ) return list(res.scalars().all()) async def create_case( db: AsyncSession, *, regression_set_id: str, user_utterances: list[str], title: str | None = None, source_conversation_id: str | None = None, source_user_id: str | None = None, reference_memoir_markdown: str | None = None, is_protected: bool = False, meta: dict[str, Any] | None = None, ) -> EvalCase: row = EvalCase( id=_id(), regression_set_id=regression_set_id, source_conversation_id=source_conversation_id, source_user_id=source_user_id, title=title, user_utterances=list(user_utterances), reference_memoir_markdown=reference_memoir_markdown, is_protected=is_protected, meta=meta, ) db.add(row) await db.flush() return row async def get_case(db: AsyncSession, case_id: str) -> EvalCase | None: return await db.get(EvalCase, case_id) async def list_versions(db: AsyncSession) -> list[EvalVersion]: res = await db.execute(select(EvalVersion).order_by(EvalVersion.created_at.desc())) return list(res.scalars().all()) async def create_version( db: AsyncSession, *, name: str, runner_kind: str = "llm_chat_v1", config_json: dict[str, Any] | None = None, ) -> EvalVersion: row = EvalVersion( id=_id(), name=name, runner_kind=runner_kind, config_json=config_json, ) db.add(row) await db.flush() return row async def get_version(db: AsyncSession, vid: str) -> EvalVersion | None: return await db.get(EvalVersion, vid) async def create_experiment( db: AsyncSession, *, name: str, regression_set_id: str, baseline_version_id: str, candidate_version_id: str, rubric_pack: str = "conversation_v1+memoir_v1", composite_weights_json: dict[str, Any] | None = None, ) -> EvalExperiment: row = EvalExperiment( id=_id(), name=name, regression_set_id=regression_set_id, baseline_version_id=baseline_version_id, candidate_version_id=candidate_version_id, rubric_pack=rubric_pack, composite_weights_json=composite_weights_json, status="pending", ) db.add(row) await db.flush() return row async def get_experiment(db: AsyncSession, eid: str) -> EvalExperiment | None: return await db.get(EvalExperiment, eid) async def list_experiments(db: AsyncSession, limit: int = 50) -> list[EvalExperiment]: res = await db.execute( select(EvalExperiment).order_by(EvalExperiment.created_at.desc()).limit(limit) ) return list(res.scalars().all()) async def update_experiment( db: AsyncSession, exp: EvalExperiment, *, status: str | None = None, error_message: str | None = None, completed_at: Any | None = ..., ) -> None: if status is not None: exp.status = status if error_message is not None: exp.error_message = error_message if completed_at is not ...: exp.completed_at = completed_at async def get_run( db: AsyncSession, experiment_id: str, case_id: str, side: str ) -> EvalRun | None: res = await db.execute( select(EvalRun).where( EvalRun.experiment_id == experiment_id, EvalRun.case_id == case_id, EvalRun.side == side, ) ) return res.scalars().first() async def create_run( db: AsyncSession, *, experiment_id: str, case_id: str, side: str, ) -> EvalRun: row = EvalRun( id=_id(), experiment_id=experiment_id, case_id=case_id, side=side, status="pending", ) db.add(row) await db.flush() return row async def list_runs_for_experiment( db: AsyncSession, experiment_id: str ) -> list[EvalRun]: res = await db.execute( select(EvalRun).where(EvalRun.experiment_id == experiment_id) ) return list(res.scalars().all()) async def list_runs_for_source_conversation( db: AsyncSession, *, source_conversation_id: str, limit: int = 80, ) -> list[tuple[EvalRun, EvalCase, EvalExperiment]]: stmt = ( select(EvalRun, EvalCase, EvalExperiment) .join(EvalCase, EvalRun.case_id == EvalCase.id) .join(EvalExperiment, EvalRun.experiment_id == EvalExperiment.id) .where(EvalCase.source_conversation_id == source_conversation_id) .order_by( EvalRun.completed_at.desc().nulls_last(), EvalRun.started_at.desc().nulls_last(), ) .limit(limit) ) res = await db.execute(stmt) return list(res.all()) async def update_run( db: AsyncSession, run: EvalRun, *, status: str | None = None, error_message: str | None = None, memoir_markdown: str | None = None, conversation_score_total: float | None = None, memoir_score_total: float | None = None, composite_score: float | None = None, judge_bundle_json: dict[str, Any] | None = None, started_at: Any | None = ..., completed_at: Any | None = ..., ) -> None: if status is not None: run.status = status if error_message is not None: run.error_message = error_message if memoir_markdown is not None: run.memoir_markdown = memoir_markdown if conversation_score_total is not None: run.conversation_score_total = conversation_score_total if memoir_score_total is not None: run.memoir_score_total = memoir_score_total if composite_score is not None: run.composite_score = composite_score if judge_bundle_json is not None: run.judge_bundle_json = judge_bundle_json if started_at is not ...: run.started_at = started_at if completed_at is not ...: run.completed_at = completed_at async def add_turn( db: AsyncSession, *, run_id: str, turn_index: int, user_utterance: str, assistant_reply: str | None, duration_ms: int | None, judge_scores_json: dict[str, Any] | None, judge_rationale: str | None, ) -> EvalRunTurn: row = EvalRunTurn( id=_id(), run_id=run_id, turn_index=turn_index, user_utterance=user_utterance, assistant_reply=assistant_reply, duration_ms=duration_ms, judge_scores_json=judge_scores_json, judge_rationale=judge_rationale, ) db.add(row) await db.flush() return row async def list_turns(db: AsyncSession, run_id: str) -> list[EvalRunTurn]: res = await db.execute( select(EvalRunTurn) .where(EvalRunTurn.run_id == run_id) .order_by(EvalRunTurn.turn_index.asc()) ) return list(res.scalars().all()) async def upsert_gate_verdict( db: AsyncSession, *, experiment_id: str, passed: bool, mean_composite_delta: float | None, protected_regressions_json: list[dict[str, Any]] | None, details_json: dict[str, Any] | None, ) -> EvalGateVerdict: res = await db.execute( select(EvalGateVerdict).where(EvalGateVerdict.experiment_id == experiment_id) ) row = res.scalars().first() if row: row.passed = passed row.mean_composite_delta = mean_composite_delta row.protected_regressions_json = protected_regressions_json row.details_json = details_json await db.flush() return row row = EvalGateVerdict( id=_id(), experiment_id=experiment_id, passed=passed, mean_composite_delta=mean_composite_delta, protected_regressions_json=protected_regressions_json, details_json=details_json, ) db.add(row) await db.flush() return row async def get_gate_verdict( db: AsyncSession, experiment_id: str ) -> EvalGateVerdict | None: res = await db.execute( select(EvalGateVerdict).where(EvalGateVerdict.experiment_id == experiment_id) ) return res.scalars().first()