feat/ 导出开发容器内的数据用于评估
This commit is contained in:
54
api/tests/evaluation/test_gating_service.py
Normal file
54
api/tests/evaluation/test_gating_service.py
Normal file
@@ -0,0 +1,54 @@
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from app.features.evaluation.gating_service import compute_gate
|
||||
|
||||
|
||||
def _case(cid: str, protected: bool = False):
|
||||
c = MagicMock()
|
||||
c.id = cid
|
||||
c.title = None
|
||||
c.is_protected = protected
|
||||
return c
|
||||
|
||||
|
||||
def _run(case_id: str, side: str, composite: float, status: str = "completed"):
|
||||
r = MagicMock()
|
||||
r.case_id = case_id
|
||||
r.side = side
|
||||
r.status = status
|
||||
r.composite_score = composite
|
||||
return r
|
||||
|
||||
|
||||
def test_gate_passes_when_mean_up_and_no_protected_regression() -> None:
|
||||
cases = [_case("1"), _case("2")]
|
||||
runs = [
|
||||
_run("1", "baseline", 50),
|
||||
_run("1", "candidate", 60),
|
||||
_run("2", "baseline", 40),
|
||||
_run("2", "candidate", 55),
|
||||
]
|
||||
g = compute_gate(cases=cases, runs=runs, regression_threshold=2.0)
|
||||
assert g.passed
|
||||
assert g.mean_delta > 0
|
||||
|
||||
|
||||
def test_gate_fails_on_protected_regression() -> None:
|
||||
cases = [_case("1", protected=True)]
|
||||
runs = [
|
||||
_run("1", "baseline", 80.0),
|
||||
_run("1", "candidate", 75.0),
|
||||
]
|
||||
g = compute_gate(cases=cases, runs=runs, regression_threshold=2.0)
|
||||
assert not g.passed
|
||||
assert len(g.protected_regressions) == 1
|
||||
|
||||
|
||||
def test_gate_fails_when_mean_not_higher() -> None:
|
||||
cases = [_case("1")]
|
||||
runs = [
|
||||
_run("1", "baseline", 70.0),
|
||||
_run("1", "candidate", 69.0),
|
||||
]
|
||||
g = compute_gate(cases=cases, runs=runs, regression_threshold=2.0)
|
||||
assert not g.passed
|
||||
29
api/tests/evaluation/test_importers.py
Normal file
29
api/tests/evaluation/test_importers.py
Normal file
@@ -0,0 +1,29 @@
|
||||
from app.features.evaluation.importers.script_json import parse_script_json
|
||||
from app.features.evaluation.importers.user_export_markdown import (
|
||||
extract_user_utterances_from_export_md,
|
||||
)
|
||||
|
||||
|
||||
def test_parse_script_json_list() -> None:
|
||||
u, meta = parse_script_json('["a", "b"]')
|
||||
assert u == ["a", "b"]
|
||||
assert meta == {}
|
||||
|
||||
|
||||
def test_parse_script_json_object() -> None:
|
||||
u, meta = parse_script_json('{"utterances":["x"],"foo":1}')
|
||||
assert u == ["x"]
|
||||
assert meta == {"foo": 1}
|
||||
|
||||
|
||||
def test_extract_user_lines_from_export_md() -> None:
|
||||
md = """
|
||||
**用户:**
|
||||
|
||||
hello
|
||||
|
||||
**AI:**
|
||||
|
||||
hi
|
||||
"""
|
||||
assert extract_user_utterances_from_export_md(md) == ["hello"]
|
||||
65
api/tests/evaluation/test_internal_router_auth.py
Normal file
65
api/tests/evaluation/test_internal_router_auth.py
Normal file
@@ -0,0 +1,65 @@
|
||||
"""内部路由在未配密钥时应 503。"""
|
||||
|
||||
import pytest
|
||||
from httpx import ASGITransport, AsyncClient
|
||||
|
||||
from app.features.evaluation.internal_auth import get_internal_eval_principal
|
||||
from app.features.evaluation.router import router
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_internal_eval_list_sets_requires_config(monkeypatch: pytest.MonkeyPatch):
|
||||
from fastapi import FastAPI
|
||||
|
||||
monkeypatch.setattr(
|
||||
"app.core.config.settings.internal_eval_api_key",
|
||||
"",
|
||||
raising=False,
|
||||
)
|
||||
app = FastAPI()
|
||||
app.include_router(router, prefix="/internal/api/evaluation")
|
||||
transport = ASGITransport(app=app)
|
||||
async with AsyncClient(transport=transport, base_url="http://t") as client:
|
||||
r = await client.get("/internal/api/evaluation/regression-sets")
|
||||
assert r.status_code == 503
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_internal_eval_with_override_lists_empty(monkeypatch: pytest.MonkeyPatch):
|
||||
from fastapi import FastAPI
|
||||
|
||||
monkeypatch.setattr(
|
||||
"app.core.config.settings.internal_eval_api_key",
|
||||
"secret",
|
||||
raising=False,
|
||||
)
|
||||
app = FastAPI()
|
||||
app.include_router(router, prefix="/internal/api/evaluation")
|
||||
|
||||
async def _override_auth():
|
||||
from app.features.evaluation.internal_auth import InternalEvalPrincipal
|
||||
|
||||
return InternalEvalPrincipal()
|
||||
|
||||
app.dependency_overrides[get_internal_eval_principal] = _override_auth
|
||||
from app.core.db import get_async_db
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
mock_session = AsyncMock()
|
||||
mock_result = MagicMock()
|
||||
mock_result.scalars.return_value.unique.return_value.all.return_value = []
|
||||
mock_session.execute = AsyncMock(return_value=mock_result)
|
||||
|
||||
async def _db():
|
||||
yield mock_session
|
||||
|
||||
app.dependency_overrides[get_async_db] = _db
|
||||
|
||||
transport = ASGITransport(app=app)
|
||||
async with AsyncClient(transport=transport, base_url="http://t") as client:
|
||||
r = await client.get(
|
||||
"/internal/api/evaluation/regression-sets",
|
||||
headers={"X-Internal-Eval-Key": "secret"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
assert r.json() == []
|
||||
Reference in New Issue
Block a user