refactor(eval+memoir):精简内部评测路由与服务,composite/对话摘要与 judge 能力补强
- 访谈:新增 interview_state_hints,联动 orchestrator 与提示词 - 回忆录:story_pipeline_sync/state/memory/post_commit 与 Celery 任务调整 - 基建:开发用 celery broker、compose/development 脚本、依赖注入 - eval-web:移除数据集/实验/版本等页面与流式轮询,突出 Playground - 文档与单测同步
This commit is contained in:
@@ -28,45 +28,32 @@ from app.features.evaluation.importers.user_export_markdown import (
|
||||
from app.features.evaluation.internal_auth import InternalEvalAuth
|
||||
from app.features.evaluation.judge_manual_service import EvalJudgeManualService
|
||||
from app.features.evaluation.memoir_readiness_service import MemoirReadinessService
|
||||
from app.features.evaluation.presenters import case_out, run_out
|
||||
from app.features.evaluation.replay_service import ReplayConversationService
|
||||
from app.features.evaluation.schemas import (
|
||||
CaseCreate,
|
||||
CaseOut,
|
||||
EvalRunOut,
|
||||
EvalSandboxOut,
|
||||
ExperimentCreate,
|
||||
ExperimentDetailOut,
|
||||
ExperimentOut,
|
||||
GateVerdictOut,
|
||||
ImportJsonCaseBody,
|
||||
ImportMarkdownBody,
|
||||
ManualJudgeConversationBody,
|
||||
ManualJudgeConversationOut,
|
||||
ManualJudgeConversationStreamBody,
|
||||
ManualJudgeMemoirBody,
|
||||
ManualJudgeMemoirOut,
|
||||
PlaygroundConversationJudgeOut,
|
||||
MemoirPhase1ReadyOut,
|
||||
MemoirSectionBaselineOut,
|
||||
RegressionSetCreate,
|
||||
RegressionSetOut,
|
||||
MemoirSubmitOut,
|
||||
PlaygroundConversationJudgeOut,
|
||||
ReplayBootstrapBody,
|
||||
ReplayBootstrapOut,
|
||||
ReplayConversationBody,
|
||||
ReplayConversationOut,
|
||||
RetryBaselineJudgeBody,
|
||||
RetryBaselineJudgeOut,
|
||||
SessionDialogueOut,
|
||||
SessionEvalRunsOut,
|
||||
SessionListItem,
|
||||
SessionListResponse,
|
||||
SessionTranscriptOut,
|
||||
SnapshotFromConversationBody,
|
||||
UserExportFixtureDetailOut,
|
||||
UserExportFixtureListOut,
|
||||
UserExportFixtureTurnOut,
|
||||
UserMemoirSnapshotOut,
|
||||
VersionCreate,
|
||||
VersionOut,
|
||||
)
|
||||
from app.features.evaluation.session_catalog_service import SessionCatalogService
|
||||
from app.features.evaluation.user_export_fixtures import read_user_export_fixture
|
||||
@@ -88,73 +75,6 @@ def _eval_http_exc(
|
||||
return HTTPException(status_code=400, detail=e.detail)
|
||||
|
||||
|
||||
@router.get("/regression-sets", response_model=list[RegressionSetOut])
|
||||
async def list_regression_sets(
|
||||
_auth: InternalEvalAuth,
|
||||
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
||||
):
|
||||
rows = await svc.list_regression_sets()
|
||||
return [RegressionSetOut.model_validate(r) for r in rows]
|
||||
|
||||
|
||||
@router.post("/regression-sets", response_model=RegressionSetOut)
|
||||
async def create_regression_set(
|
||||
body: RegressionSetCreate,
|
||||
_auth: InternalEvalAuth,
|
||||
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
||||
):
|
||||
try:
|
||||
row = await svc.create_regression_set(body)
|
||||
except (EvaluationNotFoundError, EvaluationBadRequestError) as e:
|
||||
raise _eval_http_exc(e) from e
|
||||
return RegressionSetOut.model_validate(row)
|
||||
|
||||
|
||||
@router.get("/regression-sets/{set_id}/cases", response_model=list[CaseOut])
|
||||
async def list_cases(
|
||||
set_id: str,
|
||||
_auth: InternalEvalAuth,
|
||||
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
||||
):
|
||||
try:
|
||||
rows = await svc.list_cases(set_id)
|
||||
except EvaluationNotFoundError as e:
|
||||
raise _eval_http_exc(e) from e
|
||||
return [case_out(r) for r in rows]
|
||||
|
||||
|
||||
@router.post("/regression-sets/{set_id}/cases", response_model=CaseOut)
|
||||
async def create_case(
|
||||
set_id: str,
|
||||
body: CaseCreate,
|
||||
_auth: InternalEvalAuth,
|
||||
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
||||
):
|
||||
try:
|
||||
row = await svc.create_case(set_id, body)
|
||||
except (EvaluationNotFoundError, EvaluationBadRequestError) as e:
|
||||
raise _eval_http_exc(e) from e
|
||||
return case_out(row)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/regression-sets/{set_id}/snapshot-from-conversation/{conversation_id}",
|
||||
response_model=CaseOut,
|
||||
)
|
||||
async def snapshot_from_conversation(
|
||||
set_id: str,
|
||||
conversation_id: str,
|
||||
body: SnapshotFromConversationBody,
|
||||
_auth: InternalEvalAuth,
|
||||
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
||||
):
|
||||
try:
|
||||
row = await svc.snapshot_from_conversation(set_id, conversation_id, body)
|
||||
except (EvaluationNotFoundError, EvaluationBadRequestError) as e:
|
||||
raise _eval_http_exc(e) from e
|
||||
return case_out(row)
|
||||
|
||||
|
||||
@router.get("/sessions", response_model=SessionListResponse)
|
||||
async def list_sessions(
|
||||
_auth: InternalEvalAuth,
|
||||
@@ -275,16 +195,25 @@ async def memoir_phase1_ready(
|
||||
raise _eval_http_exc(e) from e
|
||||
|
||||
|
||||
@router.get(
|
||||
"/sessions/{conversation_id}/evaluation-runs",
|
||||
response_model=SessionEvalRunsOut,
|
||||
@router.post(
|
||||
"/sessions/{conversation_id}/memoir-submit",
|
||||
response_model=MemoirSubmitOut,
|
||||
)
|
||||
async def list_session_evaluation_runs(
|
||||
async def memoir_submit_phase1(
|
||||
conversation_id: str,
|
||||
_auth: InternalEvalAuth,
|
||||
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
||||
svc: Annotated[
|
||||
MemoirReadinessService, Depends(get_memoir_readiness_service)
|
||||
],
|
||||
):
|
||||
return await svc.list_session_evaluation_runs(conversation_id)
|
||||
try:
|
||||
return await svc.submit_memoir_phase1_for_conversation(
|
||||
conversation_id=conversation_id,
|
||||
)
|
||||
except EvaluationNotFoundError as e:
|
||||
raise _eval_http_exc(e) from e
|
||||
except EvaluationBadRequestError as e:
|
||||
raise _eval_http_exc(e) from e
|
||||
|
||||
|
||||
@router.post("/sessions/replay-bootstrap", response_model=ReplayBootstrapOut)
|
||||
@@ -342,6 +271,7 @@ async def replay_conversation(
|
||||
conversation_id=body.conversation_id,
|
||||
fixture_filename=fn,
|
||||
flush_memoir_after=body.flush_memoir_after,
|
||||
skip_memoir=body.skip_memoir,
|
||||
skip_tts=body.skip_tts,
|
||||
)
|
||||
elif body.user_utterances is not None:
|
||||
@@ -352,6 +282,7 @@ async def replay_conversation(
|
||||
conversation_id=body.conversation_id,
|
||||
utterances=utt,
|
||||
flush_memoir_after=body.flush_memoir_after,
|
||||
skip_memoir=body.skip_memoir,
|
||||
skip_tts=body.skip_tts,
|
||||
)
|
||||
echo = utt
|
||||
@@ -383,6 +314,8 @@ async def judge_conversation_manual(
|
||||
payload = await judge_svc.judge_conversation(
|
||||
body.conversation_id,
|
||||
body.fixture_filename,
|
||||
judge_provider=body.judge_provider,
|
||||
judge_model=body.judge_model,
|
||||
)
|
||||
except EvaluationNotFoundError as e:
|
||||
raise _eval_http_exc(e) from e
|
||||
@@ -406,6 +339,8 @@ async def judge_conversation_manual_stream(
|
||||
body.fixture_filename,
|
||||
include_turn_judges=body.include_turn_judges,
|
||||
include_baseline_turn_judges=body.include_baseline_turn_judges,
|
||||
judge_provider=body.judge_provider,
|
||||
judge_model=body.judge_model,
|
||||
):
|
||||
yield f"data: {json.dumps(evt, ensure_ascii=False)}\n\n"
|
||||
except Exception as e:
|
||||
@@ -427,6 +362,32 @@ async def judge_conversation_manual_stream(
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/judge/conversation-retry-baseline",
|
||||
response_model=RetryBaselineJudgeOut,
|
||||
)
|
||||
async def retry_baseline_conversation_judge(
|
||||
body: RetryBaselineJudgeBody,
|
||||
_auth: InternalEvalAuth,
|
||||
judge_svc: Annotated[
|
||||
EvalJudgeManualService, Depends(get_eval_judge_manual_service)
|
||||
],
|
||||
):
|
||||
try:
|
||||
payload = await judge_svc.retry_baseline_conversation_judge(
|
||||
body.conversation_id,
|
||||
body.fixture_filename,
|
||||
include_baseline_turn_judges=body.include_baseline_turn_judges,
|
||||
judge_provider=body.judge_provider,
|
||||
judge_model=body.judge_model,
|
||||
)
|
||||
except EvaluationNotFoundError as e:
|
||||
raise _eval_http_exc(e) from e
|
||||
except EvaluationBadRequestError as e:
|
||||
raise _eval_http_exc(e) from e
|
||||
return RetryBaselineJudgeOut.model_validate(payload)
|
||||
|
||||
|
||||
@router.post("/judge/memoir-chapters", response_model=ManualJudgeMemoirOut)
|
||||
async def judge_memoir_chapters_manual(
|
||||
body: ManualJudgeMemoirBody,
|
||||
@@ -439,6 +400,8 @@ async def judge_memoir_chapters_manual(
|
||||
payload = await judge_svc.judge_memoir_for_user(
|
||||
body.user_id,
|
||||
body.baseline_sections,
|
||||
judge_provider=body.judge_provider,
|
||||
judge_model=body.judge_model,
|
||||
)
|
||||
except EvaluationBadRequestError as e:
|
||||
raise _eval_http_exc(e) from e
|
||||
@@ -496,107 +459,3 @@ async def get_user_export_fixture(
|
||||
MemoirSectionBaselineOut(title=t, body=b) for t, b in memoir_tuples
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@router.post("/regression-sets/{set_id}/import-markdown", response_model=CaseOut)
|
||||
async def import_markdown_case(
|
||||
set_id: str,
|
||||
body: ImportMarkdownBody,
|
||||
_auth: InternalEvalAuth,
|
||||
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
||||
):
|
||||
try:
|
||||
row = await svc.import_markdown_case(set_id, body)
|
||||
except (EvaluationNotFoundError, EvaluationBadRequestError) as e:
|
||||
raise _eval_http_exc(e) from e
|
||||
return case_out(row)
|
||||
|
||||
|
||||
@router.post("/import/json-case", response_model=CaseOut)
|
||||
async def import_json_case(
|
||||
body: ImportJsonCaseBody,
|
||||
_auth: InternalEvalAuth,
|
||||
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
||||
):
|
||||
try:
|
||||
row = await svc.import_json_case(body)
|
||||
except (EvaluationNotFoundError, EvaluationBadRequestError) as e:
|
||||
raise _eval_http_exc(e) from e
|
||||
return case_out(row)
|
||||
|
||||
|
||||
@router.get("/versions", response_model=list[VersionOut])
|
||||
async def list_versions(
|
||||
_auth: InternalEvalAuth,
|
||||
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
||||
):
|
||||
rows = await svc.list_versions()
|
||||
return [VersionOut.model_validate(r) for r in rows]
|
||||
|
||||
|
||||
@router.post("/versions", response_model=VersionOut)
|
||||
async def create_version(
|
||||
body: VersionCreate,
|
||||
_auth: InternalEvalAuth,
|
||||
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
||||
):
|
||||
try:
|
||||
row = await svc.create_version(body)
|
||||
except (EvaluationNotFoundError, EvaluationBadRequestError) as e:
|
||||
raise _eval_http_exc(e) from e
|
||||
return VersionOut.model_validate(row)
|
||||
|
||||
|
||||
@router.get("/experiments", response_model=list[ExperimentOut])
|
||||
async def list_experiments(
|
||||
_auth: InternalEvalAuth,
|
||||
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
):
|
||||
rows = await svc.list_experiments(limit=limit)
|
||||
return [ExperimentOut.model_validate(r) for r in rows]
|
||||
|
||||
|
||||
@router.post("/experiments", response_model=ExperimentOut)
|
||||
async def create_experiment(
|
||||
body: ExperimentCreate,
|
||||
_auth: InternalEvalAuth,
|
||||
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
||||
):
|
||||
try:
|
||||
row = await svc.create_experiment(body)
|
||||
except (EvaluationNotFoundError, EvaluationBadRequestError) as e:
|
||||
raise _eval_http_exc(e) from e
|
||||
return ExperimentOut.model_validate(row)
|
||||
|
||||
|
||||
@router.get("/experiments/{experiment_id}", response_model=ExperimentDetailOut)
|
||||
async def get_experiment_detail(
|
||||
experiment_id: str,
|
||||
_auth: InternalEvalAuth,
|
||||
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
||||
):
|
||||
try:
|
||||
bundle = await svc.get_experiment_detail(experiment_id)
|
||||
except EvaluationNotFoundError as e:
|
||||
raise _eval_http_exc(e) from e
|
||||
run_outs: list[EvalRunOut] = [run_out(r, turns) for r, turns in bundle.run_rows]
|
||||
gate = GateVerdictOut.model_validate(bundle.gate) if bundle.gate else None
|
||||
return ExperimentDetailOut(
|
||||
experiment=ExperimentOut.model_validate(bundle.experiment),
|
||||
runs=run_outs,
|
||||
gate=gate,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/experiments/{experiment_id}/run", response_model=ExperimentOut)
|
||||
async def enqueue_experiment_run(
|
||||
experiment_id: str,
|
||||
_auth: InternalEvalAuth,
|
||||
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
||||
):
|
||||
try:
|
||||
exp = await svc.enqueue_experiment_run(experiment_id)
|
||||
except EvaluationNotFoundError as e:
|
||||
raise _eval_http_exc(e) from e
|
||||
return ExperimentOut.model_validate(exp)
|
||||
|
||||
Reference in New Issue
Block a user