feat(eval): Playground GLM 评分落库并可恢复

在 conversations 表增加 playground_conversation_judge_json,流式/非流式对话评审结束后写入最近一次快照(整体分、逐轮分、对比文案、错误与基线文件名等)。新增只读 GET 供前端按会话拉取;评测台 Playground 切换会话时自动恢复,并提示基线是否和当时一致。
This commit is contained in:
Kevin
2026-04-08 16:50:53 +08:00
parent 309a051038
commit 78b61c076e
8 changed files with 361 additions and 91 deletions

View File

@@ -46,6 +46,7 @@ from app.features.evaluation.schemas import (
ManualJudgeConversationStreamBody,
ManualJudgeMemoirBody,
ManualJudgeMemoirOut,
PlaygroundConversationJudgeOut,
MemoirPhase1ReadyOut,
MemoirSectionBaselineOut,
RegressionSetCreate,
@@ -225,6 +226,26 @@ async def get_session_transcript(
)
@router.get(
"/sessions/{conversation_id}/playground-conversation-judge",
response_model=PlaygroundConversationJudgeOut,
)
async def get_playground_conversation_judge(
conversation_id: str,
_auth: InternalEvalAuth,
db: Annotated[AsyncSession, Depends(get_async_db)],
):
catalog = SessionCatalogService(db)
tr = await catalog.get_transcript(conversation_id)
if not tr:
raise HTTPException(status_code=404, detail="conversation not found")
judge = await catalog.get_playground_conversation_judge_json(conversation_id)
return PlaygroundConversationJudgeOut(
conversation_id=conversation_id,
judge=judge,
)
@router.get(
"/sessions/{conversation_id}/memoir-phase1-ready",
response_model=MemoirPhase1ReadyOut,