"""平台门禁:均分提升 + 受保护 session 无明显退步。""" from __future__ import annotations from dataclasses import dataclass from typing import Any from app.core.config import settings from app.features.evaluation.models import EvalCase, EvalRun @dataclass class GateResult: passed: bool mean_baseline_composite: float mean_candidate_composite: float mean_delta: float protected_regressions: list[dict[str, Any]] per_case: list[dict[str, Any]] def compute_gate( *, cases: list[EvalCase], runs: list[EvalRun], regression_threshold: float | None = None, ) -> GateResult: thr = ( regression_threshold if regression_threshold is not None else settings.eval_gate_protected_regression_threshold ) by_case: dict[str, dict[str, EvalRun]] = {} for r in runs: if r.status != "completed": continue by_case.setdefault(r.case_id, {})[r.side] = r per_case: list[dict[str, Any]] = [] base_scores: list[float] = [] cand_scores: list[float] = [] protected_regs: list[dict[str, Any]] = [] case_map = {c.id: c for c in cases} for cid, sides in by_case.items(): b = sides.get("baseline") c_run = sides.get("candidate") if not b or not c_run: continue if b.composite_score is None or c_run.composite_score is None: continue bs = float(b.composite_score) cs = float(c_run.composite_score) delta = cs - bs base_scores.append(bs) cand_scores.append(cs) ec = case_map.get(cid) protected = bool(ec and ec.is_protected) row = { "case_id": cid, "title": ec.title if ec else None, "baseline_composite": bs, "candidate_composite": cs, "delta": delta, "protected": protected, } per_case.append(row) if protected and delta < -thr: protected_regs.append( { "case_id": cid, "title": ec.title if ec else None, "delta": delta, "threshold": thr, } ) mean_b = sum(base_scores) / len(base_scores) if base_scores else 0.0 mean_c = sum(cand_scores) / len(cand_scores) if cand_scores else 0.0 mean_delta = mean_c - mean_b passed = ( mean_c > mean_b + 1e-6 and len(protected_regs) == 0 and len(base_scores) > 0 ) return GateResult( passed=passed, mean_baseline_composite=mean_b, mean_candidate_composite=mean_c, mean_delta=mean_delta, protected_regressions=protected_regs, per_case=per_case, )