Files
life-echo/api/app/features/evaluation/router.py

536 lines
18 KiB
Python
Raw Normal View History

"""内部评测 REST API。"""
from __future__ import annotations
2026-04-06 23:19:20 +08:00
import json
from typing import Annotated
from fastapi import APIRouter, Depends, HTTPException, Query
2026-04-06 23:19:20 +08:00
from fastapi.responses import StreamingResponse
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.db import get_async_db
from app.core.memoir_pipeline_progress import get_pipeline_run_for_eval
from app.features.evaluation.admin_service import EvaluationAdminService
2026-04-06 23:19:20 +08:00
from app.features.evaluation.deps import (
get_eval_judge_manual_service,
get_evaluation_admin_service,
get_memoir_readiness_service,
2026-04-06 23:19:20 +08:00
get_replay_conversation_service,
)
from app.features.evaluation.errors import (
EvaluationBadRequestError,
EvaluationNotFoundError,
)
2026-04-06 23:19:20 +08:00
from app.features.evaluation.importers.user_export_markdown import (
extract_memoir_chapter_sections_from_export_md,
extract_source_user_id_from_export_md,
)
from app.features.evaluation.internal_auth import InternalEvalAuth
2026-04-06 23:19:20 +08:00
from app.features.evaluation.judge_manual_service import EvalJudgeManualService
from app.features.evaluation.memoir_readiness_service import MemoirReadinessService
2026-04-06 23:19:20 +08:00
from app.features.evaluation.replay_service import ReplayConversationService
from app.features.evaluation.schemas import (
2026-04-06 23:19:20 +08:00
EvalSandboxOut,
ManualJudgeConversationBody,
ManualJudgeConversationOut,
ManualJudgeConversationStreamBody,
ManualJudgeMemoirBody,
ManualJudgeMemoirOut,
MemoirPhase1ReadyOut,
MemoirPipelineRunOut,
2026-04-06 23:19:20 +08:00
MemoirSectionBaselineOut,
MemoirSubmitOut,
PlaygroundConversationJudgeOut,
2026-04-06 23:19:20 +08:00
ReplayBootstrapBody,
ReplayBootstrapOut,
ReplayConversationBody,
ReplayConversationOut,
RetryBaselineJudgeBody,
RetryBaselineJudgeOut,
SessionDialogueOut,
SessionListItem,
SessionListResponse,
SessionTranscriptOut,
UserExportFixtureDetailOut,
UserExportFixtureListOut,
UserExportFixtureTurnOut,
2026-04-06 23:19:20 +08:00
UserMemoirSnapshotOut,
)
from app.features.evaluation.session_catalog_service import SessionCatalogService
2026-04-06 23:19:20 +08:00
from app.features.evaluation.user_export_fixtures import read_user_export_fixture
router = APIRouter(tags=["internal-evaluation"])
@router.get("/ping", include_in_schema=False)
async def eval_api_ping() -> dict[str, str | bool]:
"""无鉴权:确认当前进程是 internal_main 且路由已挂载。"""
return {"ok": True, "service": "life-echo-internal-eval"}
def _eval_http_exc(
e: EvaluationNotFoundError | EvaluationBadRequestError,
) -> HTTPException:
if isinstance(e, EvaluationNotFoundError):
return HTTPException(status_code=404, detail=e.detail)
return HTTPException(status_code=400, detail=e.detail)
@router.get("/sessions", response_model=SessionListResponse)
async def list_sessions(
_auth: InternalEvalAuth,
db: Annotated[AsyncSession, Depends(get_async_db)],
offset: int = Query(0, ge=0),
limit: int = Query(50, ge=1, le=200),
user_id: str | None = Query(None),
q: str | None = Query(None),
status: str | None = Query(
None,
description="按会话 status 过滤,如 active",
),
):
catalog = SessionCatalogService(db)
rows, total = await catalog.list_sessions(
offset=offset, limit=limit, user_id=user_id, q=q, status=status
)
return SessionListResponse(
items=[
SessionListItem(
id=r.id,
user_id=r.user_id,
user_phone=r.user_phone,
started_at=r.started_at,
last_message_at=r.last_message_at,
conversation_stage=r.conversation_stage,
current_topic=r.current_topic,
status=r.status,
)
for r in rows
],
total=total,
)
@router.get(
"/sessions/{conversation_id}/dialogue",
response_model=SessionDialogueOut,
)
async def get_session_dialogue(
conversation_id: str,
_auth: InternalEvalAuth,
db: Annotated[AsyncSession, Depends(get_async_db)],
):
catalog = SessionCatalogService(db)
out = await catalog.get_session_dialogue(conversation_id)
if not out:
raise HTTPException(status_code=404, detail="conversation not found")
return out
@router.get(
"/sessions/{conversation_id}/transcript", response_model=SessionTranscriptOut
)
async def get_session_transcript(
conversation_id: str,
_auth: InternalEvalAuth,
db: Annotated[AsyncSession, Depends(get_async_db)],
):
catalog = SessionCatalogService(db)
tr = await catalog.get_transcript(conversation_id)
if not tr:
raise HTTPException(status_code=404, detail="conversation not found")
return SessionTranscriptOut(
conversation_id=tr.conversation_id,
user_id=tr.user_id,
user_utterances_from_segments=tr.user_utterances_from_segments,
user_utterances_from_messages=tr.user_utterances_from_messages,
)
@router.get(
"/sessions/{conversation_id}/playground-conversation-judge",
response_model=PlaygroundConversationJudgeOut,
)
async def get_playground_conversation_judge(
conversation_id: str,
_auth: InternalEvalAuth,
db: Annotated[AsyncSession, Depends(get_async_db)],
):
catalog = SessionCatalogService(db)
tr = await catalog.get_transcript(conversation_id)
if not tr:
raise HTTPException(status_code=404, detail="conversation not found")
judge = await catalog.get_playground_conversation_judge_json(conversation_id)
return PlaygroundConversationJudgeOut(
conversation_id=conversation_id,
judge=judge,
)
@router.get(
"/users/{user_id}/memoir-pipeline-run",
response_model=MemoirPipelineRunOut,
)
async def get_memoir_pipeline_run(
user_id: str,
_auth: InternalEvalAuth,
phase1_task_id: Annotated[
str | None,
Query(description="Phase1 Celery task id与 memoir-submit 返回一致)"),
] = None,
memoir_correlation_id: Annotated[
str | None,
Query(description="流水线聚合根 ID与日志 memoir_correlation_id 一致)"),
] = None,
):
if not phase1_task_id and not memoir_correlation_id:
raise HTTPException(
status_code=400,
detail="provide phase1_task_id or memoir_correlation_id",
)
if phase1_task_id and memoir_correlation_id:
raise HTTPException(
status_code=400,
detail="provide only one of phase1_task_id or memoir_correlation_id",
)
snap = get_pipeline_run_for_eval(
user_id.strip(),
memoir_correlation_id=memoir_correlation_id,
phase1_task_id=phase1_task_id,
)
if not snap:
raise HTTPException(status_code=404, detail="pipeline snapshot not found")
return MemoirPipelineRunOut.model_validate(snap)
@router.get(
"/sessions/{conversation_id}/memoir-phase1-ready",
response_model=MemoirPhase1ReadyOut,
)
async def memoir_phase1_ready(
conversation_id: str,
_auth: InternalEvalAuth,
svc: Annotated[MemoirReadinessService, Depends(get_memoir_readiness_service)],
segment_ids: Annotated[
list[str],
Query(
min_length=1,
description="本批待检查的 segment id可重复 query 参数 segment_ids=id1&segment_ids=id2",
),
],
):
try:
return await svc.memoir_phase1_ready_for_segments(
conversation_id=conversation_id,
segment_ids=segment_ids,
)
except EvaluationNotFoundError as e:
raise _eval_http_exc(e) from e
except EvaluationBadRequestError as e:
raise _eval_http_exc(e) from e
@router.post(
"/sessions/{conversation_id}/memoir-submit",
response_model=MemoirSubmitOut,
)
async def memoir_submit_phase1(
conversation_id: str,
_auth: InternalEvalAuth,
svc: Annotated[MemoirReadinessService, Depends(get_memoir_readiness_service)],
):
try:
return await svc.submit_memoir_phase1_for_conversation(
conversation_id=conversation_id,
)
except EvaluationNotFoundError as e:
raise _eval_http_exc(e) from e
except EvaluationBadRequestError as e:
raise _eval_http_exc(e) from e
2026-04-06 23:19:20 +08:00
@router.post("/sessions/replay-bootstrap", response_model=ReplayBootstrapOut)
async def replay_bootstrap(
body: ReplayBootstrapBody,
_auth: InternalEvalAuth,
replay: Annotated[
ReplayConversationService, Depends(get_replay_conversation_service)
],
):
try:
cid = await replay.bootstrap_conversation(body.user_id)
except EvaluationBadRequestError as e:
raise _eval_http_exc(e) from e
return ReplayBootstrapOut(conversation_id=cid)
@router.post("/sessions/eval-sandbox", response_model=EvalSandboxOut)
async def create_eval_sandbox(
_auth: InternalEvalAuth,
replay: Annotated[
ReplayConversationService, Depends(get_replay_conversation_service)
],
):
try:
uid, cid, phone, nick = await replay.create_eval_sandbox()
except EvaluationBadRequestError as e:
raise _eval_http_exc(e) from e
return EvalSandboxOut(
user_id=uid,
conversation_id=cid,
phone=phone,
nickname=nick,
)
@router.post("/replay/conversation", response_model=ReplayConversationOut)
async def replay_conversation(
body: ReplayConversationBody,
_auth: InternalEvalAuth,
replay: Annotated[
ReplayConversationService, Depends(get_replay_conversation_service)
],
):
if body.fixture_filename and body.user_utterances:
raise HTTPException(
status_code=400,
detail="provide only one of fixture_filename or user_utterances",
)
try:
segment_ids: list[str] = []
timing = None
2026-04-06 23:19:20 +08:00
if body.fixture_filename:
fn = body.fixture_filename.strip()
n, echo, segment_ids, timing = await replay.replay_fixture(
2026-04-06 23:19:20 +08:00
conversation_id=body.conversation_id,
fixture_filename=fn,
flush_memoir_after=body.flush_memoir_after,
skip_memoir=body.skip_memoir,
2026-04-06 23:19:20 +08:00
skip_tts=body.skip_tts,
)
elif body.user_utterances is not None:
utt = [str(u) for u in body.user_utterances if str(u).strip()]
if not utt:
raise EvaluationBadRequestError("user_utterances is empty")
n, segment_ids, timing = await replay.replay_utterances(
2026-04-06 23:19:20 +08:00
conversation_id=body.conversation_id,
utterances=utt,
flush_memoir_after=body.flush_memoir_after,
skip_memoir=body.skip_memoir,
2026-04-06 23:19:20 +08:00
skip_tts=body.skip_tts,
)
echo = utt
else:
raise EvaluationBadRequestError(
"fixture_filename or user_utterances required"
)
except EvaluationNotFoundError as e:
raise _eval_http_exc(e) from e
except EvaluationBadRequestError as e:
raise _eval_http_exc(e) from e
return ReplayConversationOut(
conversation_id=body.conversation_id,
turns_replayed=n,
utterances_echo=echo,
segment_ids=segment_ids,
started_at_utc=timing.started_at_utc if timing else None,
finished_at_utc=timing.finished_at_utc if timing else None,
elapsed_ms=timing.elapsed_ms if timing else None,
2026-04-06 23:19:20 +08:00
)
@router.post("/judge/conversation", response_model=ManualJudgeConversationOut)
async def judge_conversation_manual(
body: ManualJudgeConversationBody,
_auth: InternalEvalAuth,
judge_svc: Annotated[
EvalJudgeManualService, Depends(get_eval_judge_manual_service)
],
):
try:
payload = await judge_svc.judge_conversation(
body.conversation_id,
body.fixture_filename,
judge_provider=body.judge_provider,
judge_model=body.judge_model,
2026-04-06 23:19:20 +08:00
)
except EvaluationNotFoundError as e:
raise _eval_http_exc(e) from e
except EvaluationBadRequestError as e:
raise _eval_http_exc(e) from e
return ManualJudgeConversationOut.model_validate(payload)
@router.post("/judge/conversation-stream")
async def judge_conversation_manual_stream(
body: ManualJudgeConversationStreamBody,
_auth: InternalEvalAuth,
judge_svc: Annotated[
EvalJudgeManualService, Depends(get_eval_judge_manual_service)
],
):
async def event_iter():
try:
async for evt in judge_svc.iter_conversation_judge_sse(
body.conversation_id,
body.fixture_filename,
include_turn_judges=body.include_turn_judges,
include_baseline_turn_judges=body.include_baseline_turn_judges,
judge_provider=body.judge_provider,
judge_model=body.judge_model,
2026-04-06 23:19:20 +08:00
):
yield f"data: {json.dumps(evt, ensure_ascii=False)}\n\n"
except Exception as e:
err = json.dumps(
{"event": "error", "phase": "server", "message": str(e)},
ensure_ascii=False,
)
yield f"data: {err}\n\n"
yield f"data: {json.dumps({'event': 'done'}, ensure_ascii=False)}\n\n"
return StreamingResponse(
event_iter(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no",
},
)
@router.post(
"/judge/conversation-retry-baseline",
response_model=RetryBaselineJudgeOut,
)
async def retry_baseline_conversation_judge(
body: RetryBaselineJudgeBody,
_auth: InternalEvalAuth,
judge_svc: Annotated[
EvalJudgeManualService, Depends(get_eval_judge_manual_service)
],
):
try:
payload = await judge_svc.retry_baseline_conversation_judge(
body.conversation_id,
body.fixture_filename,
include_baseline_turn_judges=body.include_baseline_turn_judges,
judge_provider=body.judge_provider,
judge_model=body.judge_model,
)
except EvaluationNotFoundError as e:
raise _eval_http_exc(e) from e
except EvaluationBadRequestError as e:
raise _eval_http_exc(e) from e
return RetryBaselineJudgeOut.model_validate(payload)
2026-04-06 23:19:20 +08:00
@router.post("/judge/memoir-chapters", response_model=ManualJudgeMemoirOut)
async def judge_memoir_chapters_manual(
body: ManualJudgeMemoirBody,
_auth: InternalEvalAuth,
judge_svc: Annotated[
EvalJudgeManualService, Depends(get_eval_judge_manual_service)
],
):
try:
payload = await judge_svc.judge_memoir_for_user(
body.user_id,
body.baseline_sections,
judge_provider=body.judge_provider,
judge_model=body.judge_model,
2026-04-06 23:19:20 +08:00
)
except EvaluationBadRequestError as e:
raise _eval_http_exc(e) from e
return ManualJudgeMemoirOut.model_validate(payload)
@router.post("/judge/memoir-chapters-stream")
async def judge_memoir_chapters_stream(
body: ManualJudgeMemoirBody,
_auth: InternalEvalAuth,
judge_svc: Annotated[
EvalJudgeManualService, Depends(get_eval_judge_manual_service)
],
):
async def event_iter():
try:
async for evt in judge_svc.iter_memoir_chapter_judge_sse(
body.user_id,
body.baseline_sections,
judge_provider=body.judge_provider,
judge_model=body.judge_model,
):
yield f"data: {json.dumps(evt, ensure_ascii=False)}\n\n"
except Exception as e:
err = json.dumps(
{"event": "error", "phase": "server", "message": str(e)},
ensure_ascii=False,
)
yield f"data: {err}\n\n"
yield f"data: {json.dumps({'event': 'done'}, ensure_ascii=False)}\n\n"
return StreamingResponse(
event_iter(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no",
},
)
2026-04-06 23:19:20 +08:00
@router.get("/users/{user_id}/memoir-snapshot", response_model=UserMemoirSnapshotOut)
async def get_user_memoir_snapshot(
user_id: str,
_auth: InternalEvalAuth,
judge_svc: Annotated[
EvalJudgeManualService, Depends(get_eval_judge_manual_service)
],
):
try:
payload = await judge_svc.memoir_snapshot(user_id)
except EvaluationBadRequestError as e:
raise _eval_http_exc(e) from e
return UserMemoirSnapshotOut.model_validate(payload)
@router.get(
"/fixtures/user-exports",
response_model=UserExportFixtureListOut,
)
async def list_user_export_fixtures(
_auth: InternalEvalAuth,
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
):
return UserExportFixtureListOut(items=svc.list_user_export_fixture_names())
@router.get(
"/fixtures/user-exports/{filename}",
response_model=UserExportFixtureDetailOut,
)
async def get_user_export_fixture(
filename: str,
_auth: InternalEvalAuth,
):
try:
2026-04-06 23:19:20 +08:00
turns, raw_md = read_user_export_fixture(filename)
except ValueError:
raise HTTPException(
status_code=400, detail="invalid fixture filename"
) from None
except FileNotFoundError:
raise HTTPException(status_code=404, detail="fixture not found") from None
2026-04-06 23:19:20 +08:00
memoir_tuples = extract_memoir_chapter_sections_from_export_md(raw_md)
return UserExportFixtureDetailOut(
filename=filename,
turns=[UserExportFixtureTurnOut(user=u, ai=a) for u, a in turns],
2026-04-06 23:19:20 +08:00
source_user_id=extract_source_user_id_from_export_md(raw_md),
memoir_sections=[
MemoirSectionBaselineOut(title=t, body=b) for t, b in memoir_tuples
],
)