- Extend evaluation API: schemas, router, repo, admin and execution services - Improve user export markdown importer; add fixtures and importer tests - Session catalog repo/service updates; internal app wiring and docs - Add internal-eval.sh helper; refresh app-eval-web (App, styles, Vite)
217 lines
4.9 KiB
Python
217 lines
4.9 KiB
Python
"""HTTP / OpenAPI 模型。"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from datetime import datetime
|
||
from typing import Any
|
||
|
||
from pydantic import BaseModel, ConfigDict, Field
|
||
|
||
|
||
class RegressionSetCreate(BaseModel):
|
||
name: str
|
||
description: str | None = None
|
||
|
||
|
||
class RegressionSetOut(BaseModel):
|
||
model_config = ConfigDict(from_attributes=True)
|
||
|
||
id: str
|
||
name: str
|
||
description: str | None
|
||
created_at: datetime
|
||
|
||
|
||
class CaseCreate(BaseModel):
|
||
title: str | None = None
|
||
user_utterances: list[str]
|
||
source_conversation_id: str | None = None
|
||
source_user_id: str | None = None
|
||
reference_memoir_markdown: str | None = None
|
||
is_protected: bool = False
|
||
meta: dict[str, Any] | None = None
|
||
|
||
|
||
class CaseOut(BaseModel):
|
||
model_config = ConfigDict(from_attributes=True)
|
||
|
||
id: str
|
||
regression_set_id: str
|
||
source_conversation_id: str | None
|
||
source_user_id: str | None
|
||
title: str | None
|
||
user_utterances: list[Any]
|
||
is_protected: bool
|
||
created_at: datetime
|
||
|
||
|
||
class VersionCreate(BaseModel):
|
||
name: str
|
||
runner_kind: str = "llm_chat_v1"
|
||
config_json: dict[str, Any] | None = None
|
||
|
||
|
||
class VersionOut(BaseModel):
|
||
model_config = ConfigDict(from_attributes=True)
|
||
|
||
id: str
|
||
name: str
|
||
runner_kind: str
|
||
config_json: dict[str, Any] | None
|
||
created_at: datetime
|
||
|
||
|
||
class ExperimentCreate(BaseModel):
|
||
name: str
|
||
regression_set_id: str
|
||
baseline_version_id: str
|
||
candidate_version_id: str
|
||
rubric_pack: str = "conversation_v1+memoir_v1"
|
||
composite_weights_json: dict[str, Any] | None = Field(
|
||
default=None,
|
||
description='默认 {"conversation":0.5,"memoir":0.5}',
|
||
)
|
||
|
||
|
||
class ExperimentOut(BaseModel):
|
||
model_config = ConfigDict(from_attributes=True)
|
||
|
||
id: str
|
||
name: str
|
||
regression_set_id: str
|
||
baseline_version_id: str
|
||
candidate_version_id: str
|
||
rubric_pack: str
|
||
status: str
|
||
error_message: str | None
|
||
created_at: datetime
|
||
completed_at: datetime | None
|
||
|
||
|
||
class SessionDialogueMessageOut(BaseModel):
|
||
model_config = ConfigDict(from_attributes=True)
|
||
|
||
role: str
|
||
content: str
|
||
created_at: datetime | None = None
|
||
|
||
|
||
class SessionDialogueOut(BaseModel):
|
||
conversation_id: str
|
||
messages: list[SessionDialogueMessageOut]
|
||
|
||
|
||
class SessionListItem(BaseModel):
|
||
id: str
|
||
user_id: str
|
||
user_phone: str | None = Field(default=None, description="users.phone,列表展示用")
|
||
started_at: datetime | None
|
||
last_message_at: datetime | None = None
|
||
conversation_stage: str | None
|
||
current_topic: str | None
|
||
status: str | None
|
||
|
||
|
||
class SessionListResponse(BaseModel):
|
||
items: list[SessionListItem]
|
||
total: int
|
||
|
||
|
||
class SessionTranscriptOut(BaseModel):
|
||
conversation_id: str
|
||
user_id: str
|
||
user_utterances_from_segments: list[str]
|
||
user_utterances_from_messages: list[str]
|
||
|
||
|
||
class UserExportFixtureTurnOut(BaseModel):
|
||
user: str
|
||
ai: str
|
||
|
||
|
||
class UserExportFixtureListOut(BaseModel):
|
||
items: list[str]
|
||
|
||
|
||
class UserExportFixtureDetailOut(BaseModel):
|
||
filename: str
|
||
turns: list[UserExportFixtureTurnOut]
|
||
|
||
|
||
class SnapshotFromConversationBody(BaseModel):
|
||
title: str | None = None
|
||
use_messages: bool = False
|
||
is_protected: bool = False
|
||
|
||
|
||
class ImportMarkdownBody(BaseModel):
|
||
markdown: str
|
||
title: str | None = None
|
||
is_protected: bool = False
|
||
|
||
|
||
class ImportJsonCaseBody(BaseModel):
|
||
regression_set_id: str
|
||
utterances: list[str] | None = None
|
||
raw_json: dict[str, Any] | list[Any] | None = Field(
|
||
default=None,
|
||
description="与 utterances 二选一:对象含 utterances 键或根数组",
|
||
)
|
||
title: str | None = None
|
||
is_protected: bool = False
|
||
|
||
|
||
class RunTurnOut(BaseModel):
|
||
model_config = ConfigDict(from_attributes=True)
|
||
|
||
id: str
|
||
turn_index: int
|
||
user_utterance: str
|
||
assistant_reply: str | None
|
||
duration_ms: int | None
|
||
judge_scores_json: dict[str, Any] | None
|
||
judge_rationale: str | None
|
||
|
||
|
||
class EvalRunOut(BaseModel):
|
||
model_config = ConfigDict(from_attributes=True)
|
||
|
||
id: str
|
||
experiment_id: str
|
||
case_id: str
|
||
side: str
|
||
status: str
|
||
error_message: str | None
|
||
memoir_markdown: str | None
|
||
conversation_score_total: float | None
|
||
memoir_score_total: float | None
|
||
composite_score: float | None
|
||
judge_bundle_json: dict[str, Any] | None = None
|
||
turns: list[RunTurnOut] = []
|
||
|
||
|
||
class SessionEvalRunItem(BaseModel):
|
||
experiment_name: str
|
||
run: EvalRunOut
|
||
|
||
|
||
class SessionEvalRunsOut(BaseModel):
|
||
conversation_id: str
|
||
items: list[SessionEvalRunItem]
|
||
|
||
|
||
class GateVerdictOut(BaseModel):
|
||
model_config = ConfigDict(from_attributes=True)
|
||
|
||
passed: bool
|
||
mean_composite_delta: float | None
|
||
protected_regressions_json: list[dict[str, Any]] | None
|
||
details_json: dict[str, Any] | None
|
||
computed_at: datetime
|
||
|
||
|
||
class ExperimentDetailOut(BaseModel):
|
||
experiment: ExperimentOut
|
||
runs: list[EvalRunOut]
|
||
gate: GateVerdictOut | None
|