2026-04-03 14:44:46 +08:00
|
|
|
"""内部评测 REST API。"""
|
|
|
|
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
from typing import Annotated
|
|
|
|
|
|
|
|
|
|
from fastapi import APIRouter, Depends, HTTPException, Query
|
|
|
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
|
|
|
|
|
|
from app.core.db import get_async_db
|
|
|
|
|
from app.features.evaluation.admin_service import EvaluationAdminService
|
|
|
|
|
from app.features.evaluation.deps import get_evaluation_admin_service
|
|
|
|
|
from app.features.evaluation.errors import (
|
|
|
|
|
EvaluationBadRequestError,
|
|
|
|
|
EvaluationNotFoundError,
|
|
|
|
|
)
|
|
|
|
|
from app.features.evaluation.internal_auth import InternalEvalAuth
|
|
|
|
|
from app.features.evaluation.presenters import case_out, run_out
|
|
|
|
|
from app.features.evaluation.schemas import (
|
|
|
|
|
CaseCreate,
|
|
|
|
|
CaseOut,
|
|
|
|
|
EvalRunOut,
|
|
|
|
|
ExperimentCreate,
|
|
|
|
|
ExperimentDetailOut,
|
|
|
|
|
ExperimentOut,
|
|
|
|
|
GateVerdictOut,
|
|
|
|
|
ImportJsonCaseBody,
|
|
|
|
|
ImportMarkdownBody,
|
|
|
|
|
RegressionSetCreate,
|
|
|
|
|
RegressionSetOut,
|
2026-04-06 13:45:04 +08:00
|
|
|
SessionDialogueOut,
|
|
|
|
|
SessionEvalRunsOut,
|
2026-04-03 14:44:46 +08:00
|
|
|
SessionListItem,
|
|
|
|
|
SessionListResponse,
|
|
|
|
|
SessionTranscriptOut,
|
|
|
|
|
SnapshotFromConversationBody,
|
2026-04-06 13:45:04 +08:00
|
|
|
UserExportFixtureDetailOut,
|
|
|
|
|
UserExportFixtureListOut,
|
|
|
|
|
UserExportFixtureTurnOut,
|
2026-04-03 14:44:46 +08:00
|
|
|
VersionCreate,
|
|
|
|
|
VersionOut,
|
|
|
|
|
)
|
|
|
|
|
from app.features.evaluation.session_catalog_service import SessionCatalogService
|
|
|
|
|
|
|
|
|
|
router = APIRouter(tags=["internal-evaluation"])
|
|
|
|
|
|
|
|
|
|
|
2026-04-06 13:45:04 +08:00
|
|
|
@router.get("/ping", include_in_schema=False)
|
|
|
|
|
async def eval_api_ping() -> dict[str, str | bool]:
|
|
|
|
|
"""无鉴权:确认当前进程是 internal_main 且路由已挂载。"""
|
|
|
|
|
return {"ok": True, "service": "life-echo-internal-eval"}
|
|
|
|
|
|
|
|
|
|
|
2026-04-03 14:44:46 +08:00
|
|
|
def _eval_http_exc(
|
|
|
|
|
e: EvaluationNotFoundError | EvaluationBadRequestError,
|
|
|
|
|
) -> HTTPException:
|
|
|
|
|
if isinstance(e, EvaluationNotFoundError):
|
|
|
|
|
return HTTPException(status_code=404, detail=e.detail)
|
|
|
|
|
return HTTPException(status_code=400, detail=e.detail)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.get("/regression-sets", response_model=list[RegressionSetOut])
|
|
|
|
|
async def list_regression_sets(
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
|
|
|
|
):
|
|
|
|
|
rows = await svc.list_regression_sets()
|
|
|
|
|
return [RegressionSetOut.model_validate(r) for r in rows]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.post("/regression-sets", response_model=RegressionSetOut)
|
|
|
|
|
async def create_regression_set(
|
|
|
|
|
body: RegressionSetCreate,
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
|
|
|
|
):
|
|
|
|
|
try:
|
|
|
|
|
row = await svc.create_regression_set(body)
|
|
|
|
|
except (EvaluationNotFoundError, EvaluationBadRequestError) as e:
|
|
|
|
|
raise _eval_http_exc(e) from e
|
|
|
|
|
return RegressionSetOut.model_validate(row)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.get("/regression-sets/{set_id}/cases", response_model=list[CaseOut])
|
|
|
|
|
async def list_cases(
|
|
|
|
|
set_id: str,
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
|
|
|
|
):
|
|
|
|
|
try:
|
|
|
|
|
rows = await svc.list_cases(set_id)
|
|
|
|
|
except EvaluationNotFoundError as e:
|
|
|
|
|
raise _eval_http_exc(e) from e
|
|
|
|
|
return [case_out(r) for r in rows]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.post("/regression-sets/{set_id}/cases", response_model=CaseOut)
|
|
|
|
|
async def create_case(
|
|
|
|
|
set_id: str,
|
|
|
|
|
body: CaseCreate,
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
|
|
|
|
):
|
|
|
|
|
try:
|
|
|
|
|
row = await svc.create_case(set_id, body)
|
|
|
|
|
except (EvaluationNotFoundError, EvaluationBadRequestError) as e:
|
|
|
|
|
raise _eval_http_exc(e) from e
|
|
|
|
|
return case_out(row)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.post(
|
|
|
|
|
"/regression-sets/{set_id}/snapshot-from-conversation/{conversation_id}",
|
|
|
|
|
response_model=CaseOut,
|
|
|
|
|
)
|
|
|
|
|
async def snapshot_from_conversation(
|
|
|
|
|
set_id: str,
|
|
|
|
|
conversation_id: str,
|
|
|
|
|
body: SnapshotFromConversationBody,
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
|
|
|
|
):
|
|
|
|
|
try:
|
|
|
|
|
row = await svc.snapshot_from_conversation(set_id, conversation_id, body)
|
|
|
|
|
except (EvaluationNotFoundError, EvaluationBadRequestError) as e:
|
|
|
|
|
raise _eval_http_exc(e) from e
|
|
|
|
|
return case_out(row)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.get("/sessions", response_model=SessionListResponse)
|
|
|
|
|
async def list_sessions(
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
db: Annotated[AsyncSession, Depends(get_async_db)],
|
|
|
|
|
offset: int = Query(0, ge=0),
|
|
|
|
|
limit: int = Query(50, ge=1, le=200),
|
|
|
|
|
user_id: str | None = Query(None),
|
|
|
|
|
q: str | None = Query(None),
|
2026-04-06 13:45:04 +08:00
|
|
|
status: str | None = Query(
|
|
|
|
|
None,
|
|
|
|
|
description="按会话 status 过滤,如 active",
|
|
|
|
|
),
|
2026-04-03 14:44:46 +08:00
|
|
|
):
|
|
|
|
|
catalog = SessionCatalogService(db)
|
|
|
|
|
rows, total = await catalog.list_sessions(
|
2026-04-06 13:45:04 +08:00
|
|
|
offset=offset, limit=limit, user_id=user_id, q=q, status=status
|
2026-04-03 14:44:46 +08:00
|
|
|
)
|
|
|
|
|
return SessionListResponse(
|
|
|
|
|
items=[
|
|
|
|
|
SessionListItem(
|
|
|
|
|
id=r.id,
|
|
|
|
|
user_id=r.user_id,
|
2026-04-06 13:45:04 +08:00
|
|
|
user_phone=r.user_phone,
|
2026-04-03 14:44:46 +08:00
|
|
|
started_at=r.started_at,
|
2026-04-06 13:45:04 +08:00
|
|
|
last_message_at=r.last_message_at,
|
2026-04-03 14:44:46 +08:00
|
|
|
conversation_stage=r.conversation_stage,
|
|
|
|
|
current_topic=r.current_topic,
|
|
|
|
|
status=r.status,
|
|
|
|
|
)
|
|
|
|
|
for r in rows
|
|
|
|
|
],
|
|
|
|
|
total=total,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
2026-04-06 13:45:04 +08:00
|
|
|
@router.get(
|
|
|
|
|
"/sessions/{conversation_id}/dialogue",
|
|
|
|
|
response_model=SessionDialogueOut,
|
|
|
|
|
)
|
|
|
|
|
async def get_session_dialogue(
|
|
|
|
|
conversation_id: str,
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
db: Annotated[AsyncSession, Depends(get_async_db)],
|
|
|
|
|
):
|
|
|
|
|
catalog = SessionCatalogService(db)
|
|
|
|
|
out = await catalog.get_session_dialogue(conversation_id)
|
|
|
|
|
if not out:
|
|
|
|
|
raise HTTPException(status_code=404, detail="conversation not found")
|
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
|
2026-04-03 14:44:46 +08:00
|
|
|
@router.get(
|
|
|
|
|
"/sessions/{conversation_id}/transcript", response_model=SessionTranscriptOut
|
|
|
|
|
)
|
|
|
|
|
async def get_session_transcript(
|
|
|
|
|
conversation_id: str,
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
db: Annotated[AsyncSession, Depends(get_async_db)],
|
|
|
|
|
):
|
|
|
|
|
catalog = SessionCatalogService(db)
|
|
|
|
|
tr = await catalog.get_transcript(conversation_id)
|
|
|
|
|
if not tr:
|
|
|
|
|
raise HTTPException(status_code=404, detail="conversation not found")
|
|
|
|
|
return SessionTranscriptOut(
|
|
|
|
|
conversation_id=tr.conversation_id,
|
|
|
|
|
user_id=tr.user_id,
|
|
|
|
|
user_utterances_from_segments=tr.user_utterances_from_segments,
|
|
|
|
|
user_utterances_from_messages=tr.user_utterances_from_messages,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
2026-04-06 13:45:04 +08:00
|
|
|
@router.get(
|
|
|
|
|
"/sessions/{conversation_id}/evaluation-runs",
|
|
|
|
|
response_model=SessionEvalRunsOut,
|
|
|
|
|
)
|
|
|
|
|
async def list_session_evaluation_runs(
|
|
|
|
|
conversation_id: str,
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
|
|
|
|
):
|
|
|
|
|
return await svc.list_session_evaluation_runs(conversation_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.get(
|
|
|
|
|
"/fixtures/user-exports",
|
|
|
|
|
response_model=UserExportFixtureListOut,
|
|
|
|
|
)
|
|
|
|
|
async def list_user_export_fixtures(
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
|
|
|
|
):
|
|
|
|
|
return UserExportFixtureListOut(items=svc.list_user_export_fixture_names())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.get(
|
|
|
|
|
"/fixtures/user-exports/{filename}",
|
|
|
|
|
response_model=UserExportFixtureDetailOut,
|
|
|
|
|
)
|
|
|
|
|
async def get_user_export_fixture(
|
|
|
|
|
filename: str,
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
|
|
|
|
):
|
|
|
|
|
try:
|
|
|
|
|
turns = svc.load_user_export_fixture_turns(filename)
|
|
|
|
|
except ValueError:
|
|
|
|
|
raise HTTPException(
|
|
|
|
|
status_code=400, detail="invalid fixture filename"
|
|
|
|
|
) from None
|
|
|
|
|
except FileNotFoundError:
|
|
|
|
|
raise HTTPException(status_code=404, detail="fixture not found") from None
|
|
|
|
|
return UserExportFixtureDetailOut(
|
|
|
|
|
filename=filename,
|
|
|
|
|
turns=[UserExportFixtureTurnOut(user=u, ai=a) for u, a in turns],
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
2026-04-03 14:44:46 +08:00
|
|
|
@router.post("/regression-sets/{set_id}/import-markdown", response_model=CaseOut)
|
|
|
|
|
async def import_markdown_case(
|
|
|
|
|
set_id: str,
|
|
|
|
|
body: ImportMarkdownBody,
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
|
|
|
|
):
|
|
|
|
|
try:
|
|
|
|
|
row = await svc.import_markdown_case(set_id, body)
|
|
|
|
|
except (EvaluationNotFoundError, EvaluationBadRequestError) as e:
|
|
|
|
|
raise _eval_http_exc(e) from e
|
|
|
|
|
return case_out(row)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.post("/import/json-case", response_model=CaseOut)
|
|
|
|
|
async def import_json_case(
|
|
|
|
|
body: ImportJsonCaseBody,
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
|
|
|
|
):
|
|
|
|
|
try:
|
|
|
|
|
row = await svc.import_json_case(body)
|
|
|
|
|
except (EvaluationNotFoundError, EvaluationBadRequestError) as e:
|
|
|
|
|
raise _eval_http_exc(e) from e
|
|
|
|
|
return case_out(row)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.get("/versions", response_model=list[VersionOut])
|
|
|
|
|
async def list_versions(
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
|
|
|
|
):
|
|
|
|
|
rows = await svc.list_versions()
|
|
|
|
|
return [VersionOut.model_validate(r) for r in rows]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.post("/versions", response_model=VersionOut)
|
|
|
|
|
async def create_version(
|
|
|
|
|
body: VersionCreate,
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
|
|
|
|
):
|
|
|
|
|
try:
|
|
|
|
|
row = await svc.create_version(body)
|
|
|
|
|
except (EvaluationNotFoundError, EvaluationBadRequestError) as e:
|
|
|
|
|
raise _eval_http_exc(e) from e
|
|
|
|
|
return VersionOut.model_validate(row)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.get("/experiments", response_model=list[ExperimentOut])
|
|
|
|
|
async def list_experiments(
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
|
|
|
|
limit: int = Query(50, ge=1, le=200),
|
|
|
|
|
):
|
|
|
|
|
rows = await svc.list_experiments(limit=limit)
|
|
|
|
|
return [ExperimentOut.model_validate(r) for r in rows]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.post("/experiments", response_model=ExperimentOut)
|
|
|
|
|
async def create_experiment(
|
|
|
|
|
body: ExperimentCreate,
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
|
|
|
|
):
|
|
|
|
|
try:
|
|
|
|
|
row = await svc.create_experiment(body)
|
|
|
|
|
except (EvaluationNotFoundError, EvaluationBadRequestError) as e:
|
|
|
|
|
raise _eval_http_exc(e) from e
|
|
|
|
|
return ExperimentOut.model_validate(row)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.get("/experiments/{experiment_id}", response_model=ExperimentDetailOut)
|
|
|
|
|
async def get_experiment_detail(
|
|
|
|
|
experiment_id: str,
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
|
|
|
|
):
|
|
|
|
|
try:
|
|
|
|
|
bundle = await svc.get_experiment_detail(experiment_id)
|
|
|
|
|
except EvaluationNotFoundError as e:
|
|
|
|
|
raise _eval_http_exc(e) from e
|
|
|
|
|
run_outs: list[EvalRunOut] = [run_out(r, turns) for r, turns in bundle.run_rows]
|
|
|
|
|
gate = GateVerdictOut.model_validate(bundle.gate) if bundle.gate else None
|
|
|
|
|
return ExperimentDetailOut(
|
|
|
|
|
experiment=ExperimentOut.model_validate(bundle.experiment),
|
|
|
|
|
runs=run_outs,
|
|
|
|
|
gate=gate,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.post("/experiments/{experiment_id}/run", response_model=ExperimentOut)
|
|
|
|
|
async def enqueue_experiment_run(
|
|
|
|
|
experiment_id: str,
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
|
|
|
|
):
|
|
|
|
|
try:
|
|
|
|
|
exp = await svc.enqueue_experiment_run(experiment_id)
|
|
|
|
|
except EvaluationNotFoundError as e:
|
|
|
|
|
raise _eval_http_exc(e) from e
|
|
|
|
|
return ExperimentOut.model_validate(exp)
|