55 lines
1.4 KiB
Python
55 lines
1.4 KiB
Python
from unittest.mock import MagicMock
|
|
|
|
from app.features.evaluation.gating_service import compute_gate
|
|
|
|
|
|
def _case(cid: str, protected: bool = False):
|
|
c = MagicMock()
|
|
c.id = cid
|
|
c.title = None
|
|
c.is_protected = protected
|
|
return c
|
|
|
|
|
|
def _run(case_id: str, side: str, composite: float, status: str = "completed"):
|
|
r = MagicMock()
|
|
r.case_id = case_id
|
|
r.side = side
|
|
r.status = status
|
|
r.composite_score = composite
|
|
return r
|
|
|
|
|
|
def test_gate_passes_when_mean_up_and_no_protected_regression() -> None:
|
|
cases = [_case("1"), _case("2")]
|
|
runs = [
|
|
_run("1", "baseline", 50),
|
|
_run("1", "candidate", 60),
|
|
_run("2", "baseline", 40),
|
|
_run("2", "candidate", 55),
|
|
]
|
|
g = compute_gate(cases=cases, runs=runs, regression_threshold=2.0)
|
|
assert g.passed
|
|
assert g.mean_delta > 0
|
|
|
|
|
|
def test_gate_fails_on_protected_regression() -> None:
|
|
cases = [_case("1", protected=True)]
|
|
runs = [
|
|
_run("1", "baseline", 80.0),
|
|
_run("1", "candidate", 75.0),
|
|
]
|
|
g = compute_gate(cases=cases, runs=runs, regression_threshold=2.0)
|
|
assert not g.passed
|
|
assert len(g.protected_regressions) == 1
|
|
|
|
|
|
def test_gate_fails_when_mean_not_higher() -> None:
|
|
cases = [_case("1")]
|
|
runs = [
|
|
_run("1", "baseline", 70.0),
|
|
_run("1", "candidate", 69.0),
|
|
]
|
|
g = compute_gate(cases=cases, runs=runs, regression_threshold=2.0)
|
|
assert not g.passed
|