2026-04-03 14:44:46 +08:00
|
|
|
|
"""内部评测 REST API。"""
|
|
|
|
|
|
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
2026-04-06 23:19:20 +08:00
|
|
|
|
import json
|
2026-04-03 14:44:46 +08:00
|
|
|
|
from typing import Annotated
|
|
|
|
|
|
|
|
|
|
|
|
from fastapi import APIRouter, Depends, HTTPException, Query
|
2026-04-06 23:19:20 +08:00
|
|
|
|
from fastapi.responses import StreamingResponse
|
2026-04-03 14:44:46 +08:00
|
|
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
|
|
|
|
|
|
|
|
from app.core.db import get_async_db
|
|
|
|
|
|
from app.features.evaluation.admin_service import EvaluationAdminService
|
2026-04-06 23:19:20 +08:00
|
|
|
|
from app.features.evaluation.deps import (
|
|
|
|
|
|
get_eval_judge_manual_service,
|
|
|
|
|
|
get_evaluation_admin_service,
|
2026-04-08 09:38:07 +08:00
|
|
|
|
get_memoir_readiness_service,
|
2026-04-06 23:19:20 +08:00
|
|
|
|
get_replay_conversation_service,
|
|
|
|
|
|
)
|
2026-04-03 14:44:46 +08:00
|
|
|
|
from app.features.evaluation.errors import (
|
|
|
|
|
|
EvaluationBadRequestError,
|
|
|
|
|
|
EvaluationNotFoundError,
|
|
|
|
|
|
)
|
2026-04-06 23:19:20 +08:00
|
|
|
|
from app.features.evaluation.importers.user_export_markdown import (
|
|
|
|
|
|
extract_memoir_chapter_sections_from_export_md,
|
|
|
|
|
|
extract_source_user_id_from_export_md,
|
|
|
|
|
|
)
|
2026-04-03 14:44:46 +08:00
|
|
|
|
from app.features.evaluation.internal_auth import InternalEvalAuth
|
2026-04-06 23:19:20 +08:00
|
|
|
|
from app.features.evaluation.judge_manual_service import EvalJudgeManualService
|
2026-04-08 09:38:07 +08:00
|
|
|
|
from app.features.evaluation.memoir_readiness_service import MemoirReadinessService
|
2026-04-06 23:19:20 +08:00
|
|
|
|
from app.features.evaluation.replay_service import ReplayConversationService
|
2026-04-03 14:44:46 +08:00
|
|
|
|
from app.features.evaluation.schemas import (
|
2026-04-06 23:19:20 +08:00
|
|
|
|
EvalSandboxOut,
|
|
|
|
|
|
ManualJudgeConversationBody,
|
|
|
|
|
|
ManualJudgeConversationOut,
|
|
|
|
|
|
ManualJudgeConversationStreamBody,
|
|
|
|
|
|
ManualJudgeMemoirBody,
|
|
|
|
|
|
ManualJudgeMemoirOut,
|
2026-04-08 15:37:09 +08:00
|
|
|
|
MemoirPhase1ReadyOut,
|
2026-04-06 23:19:20 +08:00
|
|
|
|
MemoirSectionBaselineOut,
|
2026-04-08 21:36:12 +08:00
|
|
|
|
MemoirSubmitOut,
|
|
|
|
|
|
PlaygroundConversationJudgeOut,
|
2026-04-06 23:19:20 +08:00
|
|
|
|
ReplayBootstrapBody,
|
|
|
|
|
|
ReplayBootstrapOut,
|
|
|
|
|
|
ReplayConversationBody,
|
|
|
|
|
|
ReplayConversationOut,
|
2026-04-08 21:36:12 +08:00
|
|
|
|
RetryBaselineJudgeBody,
|
|
|
|
|
|
RetryBaselineJudgeOut,
|
2026-04-06 13:45:04 +08:00
|
|
|
|
SessionDialogueOut,
|
2026-04-03 14:44:46 +08:00
|
|
|
|
SessionListItem,
|
|
|
|
|
|
SessionListResponse,
|
|
|
|
|
|
SessionTranscriptOut,
|
2026-04-06 13:45:04 +08:00
|
|
|
|
UserExportFixtureDetailOut,
|
|
|
|
|
|
UserExportFixtureListOut,
|
|
|
|
|
|
UserExportFixtureTurnOut,
|
2026-04-06 23:19:20 +08:00
|
|
|
|
UserMemoirSnapshotOut,
|
2026-04-03 14:44:46 +08:00
|
|
|
|
)
|
|
|
|
|
|
from app.features.evaluation.session_catalog_service import SessionCatalogService
|
2026-04-06 23:19:20 +08:00
|
|
|
|
from app.features.evaluation.user_export_fixtures import read_user_export_fixture
|
2026-04-03 14:44:46 +08:00
|
|
|
|
|
|
|
|
|
|
router = APIRouter(tags=["internal-evaluation"])
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-04-06 13:45:04 +08:00
|
|
|
|
@router.get("/ping", include_in_schema=False)
|
|
|
|
|
|
async def eval_api_ping() -> dict[str, str | bool]:
|
|
|
|
|
|
"""无鉴权:确认当前进程是 internal_main 且路由已挂载。"""
|
|
|
|
|
|
return {"ok": True, "service": "life-echo-internal-eval"}
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-04-03 14:44:46 +08:00
|
|
|
|
def _eval_http_exc(
|
|
|
|
|
|
e: EvaluationNotFoundError | EvaluationBadRequestError,
|
|
|
|
|
|
) -> HTTPException:
|
|
|
|
|
|
if isinstance(e, EvaluationNotFoundError):
|
|
|
|
|
|
return HTTPException(status_code=404, detail=e.detail)
|
|
|
|
|
|
return HTTPException(status_code=400, detail=e.detail)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.get("/sessions", response_model=SessionListResponse)
|
|
|
|
|
|
async def list_sessions(
|
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
|
db: Annotated[AsyncSession, Depends(get_async_db)],
|
|
|
|
|
|
offset: int = Query(0, ge=0),
|
|
|
|
|
|
limit: int = Query(50, ge=1, le=200),
|
|
|
|
|
|
user_id: str | None = Query(None),
|
|
|
|
|
|
q: str | None = Query(None),
|
2026-04-06 13:45:04 +08:00
|
|
|
|
status: str | None = Query(
|
|
|
|
|
|
None,
|
|
|
|
|
|
description="按会话 status 过滤,如 active",
|
|
|
|
|
|
),
|
2026-04-03 14:44:46 +08:00
|
|
|
|
):
|
|
|
|
|
|
catalog = SessionCatalogService(db)
|
|
|
|
|
|
rows, total = await catalog.list_sessions(
|
2026-04-06 13:45:04 +08:00
|
|
|
|
offset=offset, limit=limit, user_id=user_id, q=q, status=status
|
2026-04-03 14:44:46 +08:00
|
|
|
|
)
|
|
|
|
|
|
return SessionListResponse(
|
|
|
|
|
|
items=[
|
|
|
|
|
|
SessionListItem(
|
|
|
|
|
|
id=r.id,
|
|
|
|
|
|
user_id=r.user_id,
|
2026-04-06 13:45:04 +08:00
|
|
|
|
user_phone=r.user_phone,
|
2026-04-03 14:44:46 +08:00
|
|
|
|
started_at=r.started_at,
|
2026-04-06 13:45:04 +08:00
|
|
|
|
last_message_at=r.last_message_at,
|
2026-04-03 14:44:46 +08:00
|
|
|
|
conversation_stage=r.conversation_stage,
|
|
|
|
|
|
current_topic=r.current_topic,
|
|
|
|
|
|
status=r.status,
|
|
|
|
|
|
)
|
|
|
|
|
|
for r in rows
|
|
|
|
|
|
],
|
|
|
|
|
|
total=total,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-04-06 13:45:04 +08:00
|
|
|
|
@router.get(
|
|
|
|
|
|
"/sessions/{conversation_id}/dialogue",
|
|
|
|
|
|
response_model=SessionDialogueOut,
|
|
|
|
|
|
)
|
|
|
|
|
|
async def get_session_dialogue(
|
|
|
|
|
|
conversation_id: str,
|
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
|
db: Annotated[AsyncSession, Depends(get_async_db)],
|
|
|
|
|
|
):
|
|
|
|
|
|
catalog = SessionCatalogService(db)
|
|
|
|
|
|
out = await catalog.get_session_dialogue(conversation_id)
|
|
|
|
|
|
if not out:
|
|
|
|
|
|
raise HTTPException(status_code=404, detail="conversation not found")
|
|
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-04-03 14:44:46 +08:00
|
|
|
|
@router.get(
|
|
|
|
|
|
"/sessions/{conversation_id}/transcript", response_model=SessionTranscriptOut
|
|
|
|
|
|
)
|
|
|
|
|
|
async def get_session_transcript(
|
|
|
|
|
|
conversation_id: str,
|
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
|
db: Annotated[AsyncSession, Depends(get_async_db)],
|
|
|
|
|
|
):
|
|
|
|
|
|
catalog = SessionCatalogService(db)
|
|
|
|
|
|
tr = await catalog.get_transcript(conversation_id)
|
|
|
|
|
|
if not tr:
|
|
|
|
|
|
raise HTTPException(status_code=404, detail="conversation not found")
|
|
|
|
|
|
return SessionTranscriptOut(
|
|
|
|
|
|
conversation_id=tr.conversation_id,
|
|
|
|
|
|
user_id=tr.user_id,
|
|
|
|
|
|
user_utterances_from_segments=tr.user_utterances_from_segments,
|
|
|
|
|
|
user_utterances_from_messages=tr.user_utterances_from_messages,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-04-08 16:50:53 +08:00
|
|
|
|
@router.get(
|
|
|
|
|
|
"/sessions/{conversation_id}/playground-conversation-judge",
|
|
|
|
|
|
response_model=PlaygroundConversationJudgeOut,
|
|
|
|
|
|
)
|
|
|
|
|
|
async def get_playground_conversation_judge(
|
|
|
|
|
|
conversation_id: str,
|
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
|
db: Annotated[AsyncSession, Depends(get_async_db)],
|
|
|
|
|
|
):
|
|
|
|
|
|
catalog = SessionCatalogService(db)
|
|
|
|
|
|
tr = await catalog.get_transcript(conversation_id)
|
|
|
|
|
|
if not tr:
|
|
|
|
|
|
raise HTTPException(status_code=404, detail="conversation not found")
|
|
|
|
|
|
judge = await catalog.get_playground_conversation_judge_json(conversation_id)
|
|
|
|
|
|
return PlaygroundConversationJudgeOut(
|
|
|
|
|
|
conversation_id=conversation_id,
|
|
|
|
|
|
judge=judge,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-04-08 09:38:07 +08:00
|
|
|
|
@router.get(
|
|
|
|
|
|
"/sessions/{conversation_id}/memoir-phase1-ready",
|
|
|
|
|
|
response_model=MemoirPhase1ReadyOut,
|
|
|
|
|
|
)
|
|
|
|
|
|
async def memoir_phase1_ready(
|
|
|
|
|
|
conversation_id: str,
|
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
|
svc: Annotated[
|
|
|
|
|
|
MemoirReadinessService, Depends(get_memoir_readiness_service)
|
|
|
|
|
|
],
|
|
|
|
|
|
segment_ids: Annotated[
|
|
|
|
|
|
list[str],
|
|
|
|
|
|
Query(
|
|
|
|
|
|
min_length=1,
|
|
|
|
|
|
description="本批待检查的 segment id,可重复 query 参数 segment_ids=id1&segment_ids=id2",
|
|
|
|
|
|
),
|
|
|
|
|
|
],
|
|
|
|
|
|
):
|
|
|
|
|
|
try:
|
|
|
|
|
|
return await svc.memoir_phase1_ready_for_segments(
|
|
|
|
|
|
conversation_id=conversation_id,
|
|
|
|
|
|
segment_ids=segment_ids,
|
|
|
|
|
|
)
|
|
|
|
|
|
except EvaluationNotFoundError as e:
|
|
|
|
|
|
raise _eval_http_exc(e) from e
|
|
|
|
|
|
except EvaluationBadRequestError as e:
|
|
|
|
|
|
raise _eval_http_exc(e) from e
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-04-08 21:36:12 +08:00
|
|
|
|
@router.post(
|
|
|
|
|
|
"/sessions/{conversation_id}/memoir-submit",
|
|
|
|
|
|
response_model=MemoirSubmitOut,
|
2026-04-06 13:45:04 +08:00
|
|
|
|
)
|
2026-04-08 21:36:12 +08:00
|
|
|
|
async def memoir_submit_phase1(
|
2026-04-06 13:45:04 +08:00
|
|
|
|
conversation_id: str,
|
|
|
|
|
|
_auth: InternalEvalAuth,
|
2026-04-08 21:36:12 +08:00
|
|
|
|
svc: Annotated[
|
|
|
|
|
|
MemoirReadinessService, Depends(get_memoir_readiness_service)
|
|
|
|
|
|
],
|
2026-04-06 13:45:04 +08:00
|
|
|
|
):
|
2026-04-08 21:36:12 +08:00
|
|
|
|
try:
|
|
|
|
|
|
return await svc.submit_memoir_phase1_for_conversation(
|
|
|
|
|
|
conversation_id=conversation_id,
|
|
|
|
|
|
)
|
|
|
|
|
|
except EvaluationNotFoundError as e:
|
|
|
|
|
|
raise _eval_http_exc(e) from e
|
|
|
|
|
|
except EvaluationBadRequestError as e:
|
|
|
|
|
|
raise _eval_http_exc(e) from e
|
2026-04-06 13:45:04 +08:00
|
|
|
|
|
|
|
|
|
|
|
2026-04-06 23:19:20 +08:00
|
|
|
|
@router.post("/sessions/replay-bootstrap", response_model=ReplayBootstrapOut)
|
|
|
|
|
|
async def replay_bootstrap(
|
|
|
|
|
|
body: ReplayBootstrapBody,
|
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
|
replay: Annotated[
|
|
|
|
|
|
ReplayConversationService, Depends(get_replay_conversation_service)
|
|
|
|
|
|
],
|
|
|
|
|
|
):
|
|
|
|
|
|
try:
|
|
|
|
|
|
cid = await replay.bootstrap_conversation(body.user_id)
|
|
|
|
|
|
except EvaluationBadRequestError as e:
|
|
|
|
|
|
raise _eval_http_exc(e) from e
|
|
|
|
|
|
return ReplayBootstrapOut(conversation_id=cid)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.post("/sessions/eval-sandbox", response_model=EvalSandboxOut)
|
|
|
|
|
|
async def create_eval_sandbox(
|
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
|
replay: Annotated[
|
|
|
|
|
|
ReplayConversationService, Depends(get_replay_conversation_service)
|
|
|
|
|
|
],
|
|
|
|
|
|
):
|
|
|
|
|
|
try:
|
|
|
|
|
|
uid, cid, phone, nick = await replay.create_eval_sandbox()
|
|
|
|
|
|
except EvaluationBadRequestError as e:
|
|
|
|
|
|
raise _eval_http_exc(e) from e
|
|
|
|
|
|
return EvalSandboxOut(
|
|
|
|
|
|
user_id=uid,
|
|
|
|
|
|
conversation_id=cid,
|
|
|
|
|
|
phone=phone,
|
|
|
|
|
|
nickname=nick,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.post("/replay/conversation", response_model=ReplayConversationOut)
|
|
|
|
|
|
async def replay_conversation(
|
|
|
|
|
|
body: ReplayConversationBody,
|
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
|
replay: Annotated[
|
|
|
|
|
|
ReplayConversationService, Depends(get_replay_conversation_service)
|
|
|
|
|
|
],
|
|
|
|
|
|
):
|
|
|
|
|
|
if body.fixture_filename and body.user_utterances:
|
|
|
|
|
|
raise HTTPException(
|
|
|
|
|
|
status_code=400,
|
|
|
|
|
|
detail="provide only one of fixture_filename or user_utterances",
|
|
|
|
|
|
)
|
|
|
|
|
|
try:
|
2026-04-08 09:38:07 +08:00
|
|
|
|
segment_ids: list[str] = []
|
2026-04-09 13:38:53 +08:00
|
|
|
|
timing = None
|
2026-04-06 23:19:20 +08:00
|
|
|
|
if body.fixture_filename:
|
|
|
|
|
|
fn = body.fixture_filename.strip()
|
2026-04-09 13:38:53 +08:00
|
|
|
|
n, echo, segment_ids, timing = await replay.replay_fixture(
|
2026-04-06 23:19:20 +08:00
|
|
|
|
conversation_id=body.conversation_id,
|
|
|
|
|
|
fixture_filename=fn,
|
|
|
|
|
|
flush_memoir_after=body.flush_memoir_after,
|
2026-04-08 21:36:12 +08:00
|
|
|
|
skip_memoir=body.skip_memoir,
|
2026-04-06 23:19:20 +08:00
|
|
|
|
skip_tts=body.skip_tts,
|
|
|
|
|
|
)
|
|
|
|
|
|
elif body.user_utterances is not None:
|
|
|
|
|
|
utt = [str(u) for u in body.user_utterances if str(u).strip()]
|
|
|
|
|
|
if not utt:
|
|
|
|
|
|
raise EvaluationBadRequestError("user_utterances is empty")
|
2026-04-09 13:38:53 +08:00
|
|
|
|
n, segment_ids, timing = await replay.replay_utterances(
|
2026-04-06 23:19:20 +08:00
|
|
|
|
conversation_id=body.conversation_id,
|
|
|
|
|
|
utterances=utt,
|
|
|
|
|
|
flush_memoir_after=body.flush_memoir_after,
|
2026-04-08 21:36:12 +08:00
|
|
|
|
skip_memoir=body.skip_memoir,
|
2026-04-06 23:19:20 +08:00
|
|
|
|
skip_tts=body.skip_tts,
|
|
|
|
|
|
)
|
|
|
|
|
|
echo = utt
|
|
|
|
|
|
else:
|
|
|
|
|
|
raise EvaluationBadRequestError(
|
|
|
|
|
|
"fixture_filename or user_utterances required"
|
|
|
|
|
|
)
|
|
|
|
|
|
except EvaluationNotFoundError as e:
|
|
|
|
|
|
raise _eval_http_exc(e) from e
|
|
|
|
|
|
except EvaluationBadRequestError as e:
|
|
|
|
|
|
raise _eval_http_exc(e) from e
|
|
|
|
|
|
return ReplayConversationOut(
|
|
|
|
|
|
conversation_id=body.conversation_id,
|
|
|
|
|
|
turns_replayed=n,
|
|
|
|
|
|
utterances_echo=echo,
|
2026-04-08 09:38:07 +08:00
|
|
|
|
segment_ids=segment_ids,
|
2026-04-09 13:38:53 +08:00
|
|
|
|
started_at_utc=timing.started_at_utc if timing else None,
|
|
|
|
|
|
finished_at_utc=timing.finished_at_utc if timing else None,
|
|
|
|
|
|
elapsed_ms=timing.elapsed_ms if timing else None,
|
2026-04-06 23:19:20 +08:00
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.post("/judge/conversation", response_model=ManualJudgeConversationOut)
|
|
|
|
|
|
async def judge_conversation_manual(
|
|
|
|
|
|
body: ManualJudgeConversationBody,
|
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
|
judge_svc: Annotated[
|
|
|
|
|
|
EvalJudgeManualService, Depends(get_eval_judge_manual_service)
|
|
|
|
|
|
],
|
|
|
|
|
|
):
|
|
|
|
|
|
try:
|
|
|
|
|
|
payload = await judge_svc.judge_conversation(
|
|
|
|
|
|
body.conversation_id,
|
|
|
|
|
|
body.fixture_filename,
|
2026-04-08 21:36:12 +08:00
|
|
|
|
judge_provider=body.judge_provider,
|
|
|
|
|
|
judge_model=body.judge_model,
|
2026-04-06 23:19:20 +08:00
|
|
|
|
)
|
|
|
|
|
|
except EvaluationNotFoundError as e:
|
|
|
|
|
|
raise _eval_http_exc(e) from e
|
|
|
|
|
|
except EvaluationBadRequestError as e:
|
|
|
|
|
|
raise _eval_http_exc(e) from e
|
|
|
|
|
|
return ManualJudgeConversationOut.model_validate(payload)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.post("/judge/conversation-stream")
|
|
|
|
|
|
async def judge_conversation_manual_stream(
|
|
|
|
|
|
body: ManualJudgeConversationStreamBody,
|
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
|
judge_svc: Annotated[
|
|
|
|
|
|
EvalJudgeManualService, Depends(get_eval_judge_manual_service)
|
|
|
|
|
|
],
|
|
|
|
|
|
):
|
|
|
|
|
|
async def event_iter():
|
|
|
|
|
|
try:
|
|
|
|
|
|
async for evt in judge_svc.iter_conversation_judge_sse(
|
|
|
|
|
|
body.conversation_id,
|
|
|
|
|
|
body.fixture_filename,
|
2026-04-07 17:15:01 +08:00
|
|
|
|
include_turn_judges=body.include_turn_judges,
|
|
|
|
|
|
include_baseline_turn_judges=body.include_baseline_turn_judges,
|
2026-04-08 21:36:12 +08:00
|
|
|
|
judge_provider=body.judge_provider,
|
|
|
|
|
|
judge_model=body.judge_model,
|
2026-04-06 23:19:20 +08:00
|
|
|
|
):
|
|
|
|
|
|
yield f"data: {json.dumps(evt, ensure_ascii=False)}\n\n"
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
err = json.dumps(
|
|
|
|
|
|
{"event": "error", "phase": "server", "message": str(e)},
|
|
|
|
|
|
ensure_ascii=False,
|
|
|
|
|
|
)
|
|
|
|
|
|
yield f"data: {err}\n\n"
|
|
|
|
|
|
yield f"data: {json.dumps({'event': 'done'}, ensure_ascii=False)}\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
return StreamingResponse(
|
|
|
|
|
|
event_iter(),
|
|
|
|
|
|
media_type="text/event-stream",
|
|
|
|
|
|
headers={
|
|
|
|
|
|
"Cache-Control": "no-cache",
|
|
|
|
|
|
"Connection": "keep-alive",
|
|
|
|
|
|
"X-Accel-Buffering": "no",
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-04-08 21:36:12 +08:00
|
|
|
|
@router.post(
|
|
|
|
|
|
"/judge/conversation-retry-baseline",
|
|
|
|
|
|
response_model=RetryBaselineJudgeOut,
|
|
|
|
|
|
)
|
|
|
|
|
|
async def retry_baseline_conversation_judge(
|
|
|
|
|
|
body: RetryBaselineJudgeBody,
|
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
|
judge_svc: Annotated[
|
|
|
|
|
|
EvalJudgeManualService, Depends(get_eval_judge_manual_service)
|
|
|
|
|
|
],
|
|
|
|
|
|
):
|
|
|
|
|
|
try:
|
|
|
|
|
|
payload = await judge_svc.retry_baseline_conversation_judge(
|
|
|
|
|
|
body.conversation_id,
|
|
|
|
|
|
body.fixture_filename,
|
|
|
|
|
|
include_baseline_turn_judges=body.include_baseline_turn_judges,
|
|
|
|
|
|
judge_provider=body.judge_provider,
|
|
|
|
|
|
judge_model=body.judge_model,
|
|
|
|
|
|
)
|
|
|
|
|
|
except EvaluationNotFoundError as e:
|
|
|
|
|
|
raise _eval_http_exc(e) from e
|
|
|
|
|
|
except EvaluationBadRequestError as e:
|
|
|
|
|
|
raise _eval_http_exc(e) from e
|
|
|
|
|
|
return RetryBaselineJudgeOut.model_validate(payload)
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-04-06 23:19:20 +08:00
|
|
|
|
@router.post("/judge/memoir-chapters", response_model=ManualJudgeMemoirOut)
|
|
|
|
|
|
async def judge_memoir_chapters_manual(
|
|
|
|
|
|
body: ManualJudgeMemoirBody,
|
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
|
judge_svc: Annotated[
|
|
|
|
|
|
EvalJudgeManualService, Depends(get_eval_judge_manual_service)
|
|
|
|
|
|
],
|
|
|
|
|
|
):
|
|
|
|
|
|
try:
|
|
|
|
|
|
payload = await judge_svc.judge_memoir_for_user(
|
|
|
|
|
|
body.user_id,
|
|
|
|
|
|
body.baseline_sections,
|
2026-04-08 21:36:12 +08:00
|
|
|
|
judge_provider=body.judge_provider,
|
|
|
|
|
|
judge_model=body.judge_model,
|
2026-04-06 23:19:20 +08:00
|
|
|
|
)
|
|
|
|
|
|
except EvaluationBadRequestError as e:
|
|
|
|
|
|
raise _eval_http_exc(e) from e
|
|
|
|
|
|
return ManualJudgeMemoirOut.model_validate(payload)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.get("/users/{user_id}/memoir-snapshot", response_model=UserMemoirSnapshotOut)
|
|
|
|
|
|
async def get_user_memoir_snapshot(
|
|
|
|
|
|
user_id: str,
|
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
|
judge_svc: Annotated[
|
|
|
|
|
|
EvalJudgeManualService, Depends(get_eval_judge_manual_service)
|
|
|
|
|
|
],
|
|
|
|
|
|
):
|
|
|
|
|
|
try:
|
|
|
|
|
|
payload = await judge_svc.memoir_snapshot(user_id)
|
|
|
|
|
|
except EvaluationBadRequestError as e:
|
|
|
|
|
|
raise _eval_http_exc(e) from e
|
|
|
|
|
|
return UserMemoirSnapshotOut.model_validate(payload)
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-04-06 13:45:04 +08:00
|
|
|
|
@router.get(
|
|
|
|
|
|
"/fixtures/user-exports",
|
|
|
|
|
|
response_model=UserExportFixtureListOut,
|
|
|
|
|
|
)
|
|
|
|
|
|
async def list_user_export_fixtures(
|
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
|
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
|
|
|
|
|
|
):
|
|
|
|
|
|
return UserExportFixtureListOut(items=svc.list_user_export_fixture_names())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.get(
|
|
|
|
|
|
"/fixtures/user-exports/{filename}",
|
|
|
|
|
|
response_model=UserExportFixtureDetailOut,
|
|
|
|
|
|
)
|
|
|
|
|
|
async def get_user_export_fixture(
|
|
|
|
|
|
filename: str,
|
|
|
|
|
|
_auth: InternalEvalAuth,
|
|
|
|
|
|
):
|
|
|
|
|
|
try:
|
2026-04-06 23:19:20 +08:00
|
|
|
|
turns, raw_md = read_user_export_fixture(filename)
|
2026-04-06 13:45:04 +08:00
|
|
|
|
except ValueError:
|
|
|
|
|
|
raise HTTPException(
|
|
|
|
|
|
status_code=400, detail="invalid fixture filename"
|
|
|
|
|
|
) from None
|
|
|
|
|
|
except FileNotFoundError:
|
|
|
|
|
|
raise HTTPException(status_code=404, detail="fixture not found") from None
|
2026-04-06 23:19:20 +08:00
|
|
|
|
memoir_tuples = extract_memoir_chapter_sections_from_export_md(raw_md)
|
2026-04-06 13:45:04 +08:00
|
|
|
|
return UserExportFixtureDetailOut(
|
|
|
|
|
|
filename=filename,
|
|
|
|
|
|
turns=[UserExportFixtureTurnOut(user=u, ai=a) for u, a in turns],
|
2026-04-06 23:19:20 +08:00
|
|
|
|
source_user_id=extract_source_user_id_from_export_md(raw_md),
|
|
|
|
|
|
memoir_sections=[
|
|
|
|
|
|
MemoirSectionBaselineOut(title=t, body=b) for t, b in memoir_tuples
|
|
|
|
|
|
],
|
2026-04-06 13:45:04 +08:00
|
|
|
|
)
|