Files
life-echo/api/app/features/evaluation/router.py
Sully 53e0065e3e refactor(api): TOML 配置 SSOT、统一错误契约、Auth/事务加固与可观测性 (#33)
配置 SSOT(TOML + .env)
统一错误契约
Auth 与事务边界
Redis / Celery 可靠性:业务 Redis(DB/0)与 Celery broker/backend(DB/1)显式拆分;连接池、sync client
可观测性(OpenTelemetry + LGTM)
2026-05-22 13:44:50 +08:00

478 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""内部评测 REST API。"""
from __future__ import annotations
import json
from typing import Annotated
from fastapi import APIRouter, Depends, Query
from fastapi.responses import StreamingResponse
from app.core.deps_types import DbDep
from app.core.errors import BadRequestError, NotFoundError
from app.core.memoir_pipeline_progress import get_pipeline_run_for_eval
from app.features.evaluation.admin_service import EvaluationAdminService
from app.features.evaluation.deps import (
get_eval_judge_manual_service,
get_evaluation_admin_service,
get_memoir_readiness_service,
get_replay_conversation_service,
get_session_catalog_service,
)
from app.features.evaluation.errors import EvaluationBadRequestError
from app.features.evaluation.importers.user_export_markdown import (
extract_memoir_chapter_sections_from_export_md,
extract_source_user_id_from_export_md,
)
from app.features.evaluation.internal_auth import InternalEvalAuth
from app.features.evaluation.judge_manual_service import EvalJudgeManualService
from app.features.evaluation.memoir_readiness_service import MemoirReadinessService
from app.features.evaluation.replay_service import ReplayConversationService
from app.features.evaluation.schemas import (
EvalSandboxOut,
ManualJudgeConversationBody,
ManualJudgeConversationOut,
ManualJudgeConversationStreamBody,
ManualJudgeMemoirBody,
ManualJudgeMemoirOut,
MemoirPhase1ReadyOut,
MemoirPipelineRunOut,
MemoirSectionBaselineOut,
MemoirSubmitOut,
PlaygroundConversationJudgeOut,
ReplayBootstrapBody,
ReplayBootstrapOut,
ReplayConversationBody,
ReplayConversationOut,
RetryBaselineJudgeBody,
RetryBaselineJudgeOut,
SessionDialogueOut,
SessionListItem,
SessionListResponse,
SessionTranscriptOut,
UserExportFixtureDetailOut,
UserExportFixtureListOut,
UserExportFixtureTurnOut,
UserMemoirSnapshotOut,
)
from app.features.evaluation.session_catalog_service import SessionCatalogService
from app.features.evaluation.user_export_fixtures import read_user_export_fixture
SessionCatalogDep = Annotated[SessionCatalogService, Depends(get_session_catalog_service)]
router = APIRouter(tags=["internal-evaluation"])
@router.get("/ping", include_in_schema=False)
async def eval_api_ping() -> dict[str, str | bool]:
"""无鉴权:确认当前进程是 internal_main 且路由已挂载。"""
return {"ok": True, "service": "life-echo-internal-eval"}
@router.get("/sessions", response_model=SessionListResponse)
async def list_sessions(
_auth: InternalEvalAuth,
catalog: SessionCatalogDep,
offset: int = Query(0, ge=0),
limit: int = Query(50, ge=1, le=200),
user_id: str | None = Query(None),
q: str | None = Query(None),
status: str | None = Query(
None,
description="按会话 status 过滤,如 active",
),
):
rows, total = await catalog.list_sessions(
offset=offset, limit=limit, user_id=user_id, q=q, status=status
)
return SessionListResponse(
items=[
SessionListItem(
id=r.id,
user_id=r.user_id,
user_phone=r.user_phone,
started_at=r.started_at,
last_message_at=r.last_message_at,
conversation_stage=r.conversation_stage,
current_topic=r.current_topic,
status=r.status,
)
for r in rows
],
total=total,
)
@router.get(
"/sessions/{conversation_id}/dialogue",
response_model=SessionDialogueOut,
)
async def get_session_dialogue(
conversation_id: str,
_auth: InternalEvalAuth,
catalog: SessionCatalogDep,
):
out = await catalog.get_session_dialogue(conversation_id)
if not out:
raise NotFoundError("conversation not found")
return out
@router.get(
"/sessions/{conversation_id}/transcript", response_model=SessionTranscriptOut
)
async def get_session_transcript(
conversation_id: str,
_auth: InternalEvalAuth,
catalog: SessionCatalogDep,
):
tr = await catalog.get_transcript(conversation_id)
if not tr:
raise NotFoundError("conversation not found")
return SessionTranscriptOut(
conversation_id=tr.conversation_id,
user_id=tr.user_id,
user_utterances_from_segments=tr.user_utterances_from_segments,
user_utterances_from_messages=tr.user_utterances_from_messages,
)
@router.get(
"/sessions/{conversation_id}/playground-conversation-judge",
response_model=PlaygroundConversationJudgeOut,
)
async def get_playground_conversation_judge(
conversation_id: str,
_auth: InternalEvalAuth,
catalog: SessionCatalogDep,
):
tr = await catalog.get_transcript(conversation_id)
if not tr:
raise NotFoundError("conversation not found")
judge = await catalog.get_playground_conversation_judge_json(conversation_id)
return PlaygroundConversationJudgeOut(
conversation_id=conversation_id,
judge=judge,
)
@router.get(
"/users/{user_id}/memoir-pipeline-run",
response_model=MemoirPipelineRunOut,
)
async def get_memoir_pipeline_run(
user_id: str,
_auth: InternalEvalAuth,
phase1_task_id: Annotated[
str | None,
Query(description="Phase1 Celery task id与 memoir-submit 返回一致)"),
] = None,
memoir_correlation_id: Annotated[
str | None,
Query(description="流水线聚合根 ID与日志 memoir_correlation_id 一致)"),
] = None,
):
if not phase1_task_id and not memoir_correlation_id:
raise BadRequestError("provide phase1_task_id or memoir_correlation_id")
if phase1_task_id and memoir_correlation_id:
raise BadRequestError("provide only one of phase1_task_id or memoir_correlation_id")
snap = get_pipeline_run_for_eval(
user_id.strip(),
memoir_correlation_id=memoir_correlation_id,
phase1_task_id=phase1_task_id,
)
if not snap:
raise NotFoundError("pipeline snapshot not found")
return MemoirPipelineRunOut.model_validate(snap)
@router.get(
"/sessions/{conversation_id}/memoir-phase1-ready",
response_model=MemoirPhase1ReadyOut,
)
async def memoir_phase1_ready(
conversation_id: str,
_auth: InternalEvalAuth,
svc: Annotated[MemoirReadinessService, Depends(get_memoir_readiness_service)],
segment_ids: Annotated[
list[str],
Query(
min_length=1,
description="本批待检查的 segment id可重复 query 参数 segment_ids=id1&segment_ids=id2",
),
],
):
return await svc.memoir_phase1_ready_for_segments(
conversation_id=conversation_id,
segment_ids=segment_ids,
)
@router.post(
"/sessions/{conversation_id}/memoir-submit",
response_model=MemoirSubmitOut,
)
async def memoir_submit_phase1(
conversation_id: str,
_auth: InternalEvalAuth,
svc: Annotated[MemoirReadinessService, Depends(get_memoir_readiness_service)],
):
return await svc.submit_memoir_phase1_for_conversation(
conversation_id=conversation_id,
)
@router.post("/sessions/replay-bootstrap", response_model=ReplayBootstrapOut)
async def replay_bootstrap(
body: ReplayBootstrapBody,
_auth: InternalEvalAuth,
replay: Annotated[
ReplayConversationService, Depends(get_replay_conversation_service)
],
):
cid = await replay.bootstrap_conversation(body.user_id)
return ReplayBootstrapOut(conversation_id=cid)
@router.post("/sessions/eval-sandbox", response_model=EvalSandboxOut)
async def create_eval_sandbox(
_auth: InternalEvalAuth,
replay: Annotated[
ReplayConversationService, Depends(get_replay_conversation_service)
],
):
uid, cid, phone, nick = await replay.create_eval_sandbox()
return EvalSandboxOut(
user_id=uid,
conversation_id=cid,
phone=phone,
nickname=nick,
)
@router.post("/replay/conversation", response_model=ReplayConversationOut)
async def replay_conversation(
body: ReplayConversationBody,
_auth: InternalEvalAuth,
replay: Annotated[
ReplayConversationService, Depends(get_replay_conversation_service)
],
):
if body.fixture_filename and body.user_utterances:
raise BadRequestError("provide only one of fixture_filename or user_utterances")
segment_ids: list[str] = []
timing = None
if body.fixture_filename:
fn = body.fixture_filename.strip()
n, echo, segment_ids, timing = await replay.replay_fixture(
conversation_id=body.conversation_id,
fixture_filename=fn,
flush_memoir_after=body.flush_memoir_after,
skip_memoir=body.skip_memoir,
skip_tts=body.skip_tts,
)
elif body.user_utterances is not None:
utt = [str(u) for u in body.user_utterances if str(u).strip()]
if not utt:
raise EvaluationBadRequestError("user_utterances is empty")
n, segment_ids, timing = await replay.replay_utterances(
conversation_id=body.conversation_id,
utterances=utt,
flush_memoir_after=body.flush_memoir_after,
skip_memoir=body.skip_memoir,
skip_tts=body.skip_tts,
)
echo = utt
else:
raise EvaluationBadRequestError(
"fixture_filename or user_utterances required"
)
return ReplayConversationOut(
conversation_id=body.conversation_id,
turns_replayed=n,
utterances_echo=echo,
segment_ids=segment_ids,
started_at_utc=timing.started_at_utc if timing else None,
finished_at_utc=timing.finished_at_utc if timing else None,
elapsed_ms=timing.elapsed_ms if timing else None,
)
@router.post("/judge/conversation", response_model=ManualJudgeConversationOut)
async def judge_conversation_manual(
body: ManualJudgeConversationBody,
_auth: InternalEvalAuth,
judge_svc: Annotated[
EvalJudgeManualService, Depends(get_eval_judge_manual_service)
],
):
payload = await judge_svc.judge_conversation(
body.conversation_id,
body.fixture_filename,
judge_provider=body.judge_provider,
judge_model=body.judge_model,
)
return ManualJudgeConversationOut.model_validate(payload)
@router.post("/judge/conversation-stream")
async def judge_conversation_manual_stream(
body: ManualJudgeConversationStreamBody,
_auth: InternalEvalAuth,
judge_svc: Annotated[
EvalJudgeManualService, Depends(get_eval_judge_manual_service)
],
):
async def event_iter():
try:
async for evt in judge_svc.iter_conversation_judge_sse(
body.conversation_id,
body.fixture_filename,
include_turn_judges=body.include_turn_judges,
include_baseline_turn_judges=body.include_baseline_turn_judges,
judge_provider=body.judge_provider,
judge_model=body.judge_model,
):
yield f"data: {json.dumps(evt, ensure_ascii=False)}\n\n"
except Exception as e:
err = json.dumps(
{"event": "error", "phase": "server", "message": str(e)},
ensure_ascii=False,
)
yield f"data: {err}\n\n"
yield f"data: {json.dumps({'event': 'done'}, ensure_ascii=False)}\n\n"
return StreamingResponse(
event_iter(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no",
},
)
@router.post(
"/judge/conversation-retry-baseline",
response_model=RetryBaselineJudgeOut,
)
async def retry_baseline_conversation_judge(
body: RetryBaselineJudgeBody,
_auth: InternalEvalAuth,
judge_svc: Annotated[
EvalJudgeManualService, Depends(get_eval_judge_manual_service)
],
):
payload = await judge_svc.retry_baseline_conversation_judge(
body.conversation_id,
body.fixture_filename,
include_baseline_turn_judges=body.include_baseline_turn_judges,
judge_provider=body.judge_provider,
judge_model=body.judge_model,
)
return RetryBaselineJudgeOut.model_validate(payload)
@router.post("/judge/memoir-chapters", response_model=ManualJudgeMemoirOut)
async def judge_memoir_chapters_manual(
body: ManualJudgeMemoirBody,
_auth: InternalEvalAuth,
judge_svc: Annotated[
EvalJudgeManualService, Depends(get_eval_judge_manual_service)
],
):
payload = await judge_svc.judge_memoir_for_user(
body.user_id,
body.baseline_sections,
judge_provider=body.judge_provider,
judge_model=body.judge_model,
)
return ManualJudgeMemoirOut.model_validate(payload)
@router.post("/judge/memoir-chapters-stream")
async def judge_memoir_chapters_stream(
body: ManualJudgeMemoirBody,
_auth: InternalEvalAuth,
judge_svc: Annotated[
EvalJudgeManualService, Depends(get_eval_judge_manual_service)
],
):
async def event_iter():
try:
async for evt in judge_svc.iter_memoir_chapter_judge_sse(
body.user_id,
body.baseline_sections,
judge_provider=body.judge_provider,
judge_model=body.judge_model,
):
yield f"data: {json.dumps(evt, ensure_ascii=False)}\n\n"
except Exception as e:
err = json.dumps(
{"event": "error", "phase": "server", "message": str(e)},
ensure_ascii=False,
)
yield f"data: {err}\n\n"
yield f"data: {json.dumps({'event': 'done'}, ensure_ascii=False)}\n\n"
return StreamingResponse(
event_iter(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no",
},
)
@router.get("/users/{user_id}/memoir-snapshot", response_model=UserMemoirSnapshotOut)
async def get_user_memoir_snapshot(
user_id: str,
_auth: InternalEvalAuth,
judge_svc: Annotated[
EvalJudgeManualService, Depends(get_eval_judge_manual_service)
],
):
payload = await judge_svc.memoir_snapshot(user_id)
return UserMemoirSnapshotOut.model_validate(payload)
@router.get(
"/fixtures/user-exports",
response_model=UserExportFixtureListOut,
)
async def list_user_export_fixtures(
_auth: InternalEvalAuth,
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
):
return UserExportFixtureListOut(items=svc.list_user_export_fixture_names())
@router.get(
"/fixtures/user-exports/{filename}",
response_model=UserExportFixtureDetailOut,
)
async def get_user_export_fixture(
filename: str,
_auth: InternalEvalAuth,
):
try:
turns, raw_md = read_user_export_fixture(filename)
except ValueError:
raise BadRequestError("invalid fixture filename") from None
except FileNotFoundError:
raise NotFoundError("fixture not found")
memoir_tuples = extract_memoir_chapter_sections_from_export_md(raw_md)
return UserExportFixtureDetailOut(
filename=filename,
turns=[UserExportFixtureTurnOut(user=u, ai=a) for u, a in turns],
source_user_id=extract_source_user_id_from_export_md(raw_md),
memoir_sections=[
MemoirSectionBaselineOut(title=t, body=b) for t, b in memoir_tuples
],
)