Files
life-echo/api/app/features/evaluation/router.py
Kevin 064ad2161d refactor(eval+memoir):精简内部评测路由与服务,composite/对话摘要与 judge 能力补强
- 访谈:新增 interview_state_hints,联动 orchestrator 与提示词
- 回忆录:story_pipeline_sync/state/memory/post_commit 与 Celery 任务调整
- 基建:开发用 celery broker、compose/development 脚本、依赖注入
- eval-web:移除数据集/实验/版本等页面与流式轮询,突出 Playground
- 文档与单测同步
2026-04-08 21:36:12 +08:00

462 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""内部评测 REST API。"""
from __future__ import annotations
import json
from typing import Annotated
from fastapi import APIRouter, Depends, HTTPException, Query
from fastapi.responses import StreamingResponse
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.db import get_async_db
from app.features.evaluation.admin_service import EvaluationAdminService
from app.features.evaluation.deps import (
get_eval_judge_manual_service,
get_evaluation_admin_service,
get_memoir_readiness_service,
get_replay_conversation_service,
)
from app.features.evaluation.errors import (
EvaluationBadRequestError,
EvaluationNotFoundError,
)
from app.features.evaluation.importers.user_export_markdown import (
extract_memoir_chapter_sections_from_export_md,
extract_source_user_id_from_export_md,
)
from app.features.evaluation.internal_auth import InternalEvalAuth
from app.features.evaluation.judge_manual_service import EvalJudgeManualService
from app.features.evaluation.memoir_readiness_service import MemoirReadinessService
from app.features.evaluation.replay_service import ReplayConversationService
from app.features.evaluation.schemas import (
EvalSandboxOut,
ManualJudgeConversationBody,
ManualJudgeConversationOut,
ManualJudgeConversationStreamBody,
ManualJudgeMemoirBody,
ManualJudgeMemoirOut,
MemoirPhase1ReadyOut,
MemoirSectionBaselineOut,
MemoirSubmitOut,
PlaygroundConversationJudgeOut,
ReplayBootstrapBody,
ReplayBootstrapOut,
ReplayConversationBody,
ReplayConversationOut,
RetryBaselineJudgeBody,
RetryBaselineJudgeOut,
SessionDialogueOut,
SessionListItem,
SessionListResponse,
SessionTranscriptOut,
UserExportFixtureDetailOut,
UserExportFixtureListOut,
UserExportFixtureTurnOut,
UserMemoirSnapshotOut,
)
from app.features.evaluation.session_catalog_service import SessionCatalogService
from app.features.evaluation.user_export_fixtures import read_user_export_fixture
router = APIRouter(tags=["internal-evaluation"])
@router.get("/ping", include_in_schema=False)
async def eval_api_ping() -> dict[str, str | bool]:
"""无鉴权:确认当前进程是 internal_main 且路由已挂载。"""
return {"ok": True, "service": "life-echo-internal-eval"}
def _eval_http_exc(
e: EvaluationNotFoundError | EvaluationBadRequestError,
) -> HTTPException:
if isinstance(e, EvaluationNotFoundError):
return HTTPException(status_code=404, detail=e.detail)
return HTTPException(status_code=400, detail=e.detail)
@router.get("/sessions", response_model=SessionListResponse)
async def list_sessions(
_auth: InternalEvalAuth,
db: Annotated[AsyncSession, Depends(get_async_db)],
offset: int = Query(0, ge=0),
limit: int = Query(50, ge=1, le=200),
user_id: str | None = Query(None),
q: str | None = Query(None),
status: str | None = Query(
None,
description="按会话 status 过滤,如 active",
),
):
catalog = SessionCatalogService(db)
rows, total = await catalog.list_sessions(
offset=offset, limit=limit, user_id=user_id, q=q, status=status
)
return SessionListResponse(
items=[
SessionListItem(
id=r.id,
user_id=r.user_id,
user_phone=r.user_phone,
started_at=r.started_at,
last_message_at=r.last_message_at,
conversation_stage=r.conversation_stage,
current_topic=r.current_topic,
status=r.status,
)
for r in rows
],
total=total,
)
@router.get(
"/sessions/{conversation_id}/dialogue",
response_model=SessionDialogueOut,
)
async def get_session_dialogue(
conversation_id: str,
_auth: InternalEvalAuth,
db: Annotated[AsyncSession, Depends(get_async_db)],
):
catalog = SessionCatalogService(db)
out = await catalog.get_session_dialogue(conversation_id)
if not out:
raise HTTPException(status_code=404, detail="conversation not found")
return out
@router.get(
"/sessions/{conversation_id}/transcript", response_model=SessionTranscriptOut
)
async def get_session_transcript(
conversation_id: str,
_auth: InternalEvalAuth,
db: Annotated[AsyncSession, Depends(get_async_db)],
):
catalog = SessionCatalogService(db)
tr = await catalog.get_transcript(conversation_id)
if not tr:
raise HTTPException(status_code=404, detail="conversation not found")
return SessionTranscriptOut(
conversation_id=tr.conversation_id,
user_id=tr.user_id,
user_utterances_from_segments=tr.user_utterances_from_segments,
user_utterances_from_messages=tr.user_utterances_from_messages,
)
@router.get(
"/sessions/{conversation_id}/playground-conversation-judge",
response_model=PlaygroundConversationJudgeOut,
)
async def get_playground_conversation_judge(
conversation_id: str,
_auth: InternalEvalAuth,
db: Annotated[AsyncSession, Depends(get_async_db)],
):
catalog = SessionCatalogService(db)
tr = await catalog.get_transcript(conversation_id)
if not tr:
raise HTTPException(status_code=404, detail="conversation not found")
judge = await catalog.get_playground_conversation_judge_json(conversation_id)
return PlaygroundConversationJudgeOut(
conversation_id=conversation_id,
judge=judge,
)
@router.get(
"/sessions/{conversation_id}/memoir-phase1-ready",
response_model=MemoirPhase1ReadyOut,
)
async def memoir_phase1_ready(
conversation_id: str,
_auth: InternalEvalAuth,
svc: Annotated[
MemoirReadinessService, Depends(get_memoir_readiness_service)
],
segment_ids: Annotated[
list[str],
Query(
min_length=1,
description="本批待检查的 segment id可重复 query 参数 segment_ids=id1&segment_ids=id2",
),
],
):
try:
return await svc.memoir_phase1_ready_for_segments(
conversation_id=conversation_id,
segment_ids=segment_ids,
)
except EvaluationNotFoundError as e:
raise _eval_http_exc(e) from e
except EvaluationBadRequestError as e:
raise _eval_http_exc(e) from e
@router.post(
"/sessions/{conversation_id}/memoir-submit",
response_model=MemoirSubmitOut,
)
async def memoir_submit_phase1(
conversation_id: str,
_auth: InternalEvalAuth,
svc: Annotated[
MemoirReadinessService, Depends(get_memoir_readiness_service)
],
):
try:
return await svc.submit_memoir_phase1_for_conversation(
conversation_id=conversation_id,
)
except EvaluationNotFoundError as e:
raise _eval_http_exc(e) from e
except EvaluationBadRequestError as e:
raise _eval_http_exc(e) from e
@router.post("/sessions/replay-bootstrap", response_model=ReplayBootstrapOut)
async def replay_bootstrap(
body: ReplayBootstrapBody,
_auth: InternalEvalAuth,
replay: Annotated[
ReplayConversationService, Depends(get_replay_conversation_service)
],
):
try:
cid = await replay.bootstrap_conversation(body.user_id)
except EvaluationBadRequestError as e:
raise _eval_http_exc(e) from e
return ReplayBootstrapOut(conversation_id=cid)
@router.post("/sessions/eval-sandbox", response_model=EvalSandboxOut)
async def create_eval_sandbox(
_auth: InternalEvalAuth,
replay: Annotated[
ReplayConversationService, Depends(get_replay_conversation_service)
],
):
try:
uid, cid, phone, nick = await replay.create_eval_sandbox()
except EvaluationBadRequestError as e:
raise _eval_http_exc(e) from e
return EvalSandboxOut(
user_id=uid,
conversation_id=cid,
phone=phone,
nickname=nick,
)
@router.post("/replay/conversation", response_model=ReplayConversationOut)
async def replay_conversation(
body: ReplayConversationBody,
_auth: InternalEvalAuth,
replay: Annotated[
ReplayConversationService, Depends(get_replay_conversation_service)
],
):
if body.fixture_filename and body.user_utterances:
raise HTTPException(
status_code=400,
detail="provide only one of fixture_filename or user_utterances",
)
try:
segment_ids: list[str] = []
if body.fixture_filename:
fn = body.fixture_filename.strip()
n, echo, segment_ids = await replay.replay_fixture(
conversation_id=body.conversation_id,
fixture_filename=fn,
flush_memoir_after=body.flush_memoir_after,
skip_memoir=body.skip_memoir,
skip_tts=body.skip_tts,
)
elif body.user_utterances is not None:
utt = [str(u) for u in body.user_utterances if str(u).strip()]
if not utt:
raise EvaluationBadRequestError("user_utterances is empty")
n, segment_ids = await replay.replay_utterances(
conversation_id=body.conversation_id,
utterances=utt,
flush_memoir_after=body.flush_memoir_after,
skip_memoir=body.skip_memoir,
skip_tts=body.skip_tts,
)
echo = utt
else:
raise EvaluationBadRequestError(
"fixture_filename or user_utterances required"
)
except EvaluationNotFoundError as e:
raise _eval_http_exc(e) from e
except EvaluationBadRequestError as e:
raise _eval_http_exc(e) from e
return ReplayConversationOut(
conversation_id=body.conversation_id,
turns_replayed=n,
utterances_echo=echo,
segment_ids=segment_ids,
)
@router.post("/judge/conversation", response_model=ManualJudgeConversationOut)
async def judge_conversation_manual(
body: ManualJudgeConversationBody,
_auth: InternalEvalAuth,
judge_svc: Annotated[
EvalJudgeManualService, Depends(get_eval_judge_manual_service)
],
):
try:
payload = await judge_svc.judge_conversation(
body.conversation_id,
body.fixture_filename,
judge_provider=body.judge_provider,
judge_model=body.judge_model,
)
except EvaluationNotFoundError as e:
raise _eval_http_exc(e) from e
except EvaluationBadRequestError as e:
raise _eval_http_exc(e) from e
return ManualJudgeConversationOut.model_validate(payload)
@router.post("/judge/conversation-stream")
async def judge_conversation_manual_stream(
body: ManualJudgeConversationStreamBody,
_auth: InternalEvalAuth,
judge_svc: Annotated[
EvalJudgeManualService, Depends(get_eval_judge_manual_service)
],
):
async def event_iter():
try:
async for evt in judge_svc.iter_conversation_judge_sse(
body.conversation_id,
body.fixture_filename,
include_turn_judges=body.include_turn_judges,
include_baseline_turn_judges=body.include_baseline_turn_judges,
judge_provider=body.judge_provider,
judge_model=body.judge_model,
):
yield f"data: {json.dumps(evt, ensure_ascii=False)}\n\n"
except Exception as e:
err = json.dumps(
{"event": "error", "phase": "server", "message": str(e)},
ensure_ascii=False,
)
yield f"data: {err}\n\n"
yield f"data: {json.dumps({'event': 'done'}, ensure_ascii=False)}\n\n"
return StreamingResponse(
event_iter(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no",
},
)
@router.post(
"/judge/conversation-retry-baseline",
response_model=RetryBaselineJudgeOut,
)
async def retry_baseline_conversation_judge(
body: RetryBaselineJudgeBody,
_auth: InternalEvalAuth,
judge_svc: Annotated[
EvalJudgeManualService, Depends(get_eval_judge_manual_service)
],
):
try:
payload = await judge_svc.retry_baseline_conversation_judge(
body.conversation_id,
body.fixture_filename,
include_baseline_turn_judges=body.include_baseline_turn_judges,
judge_provider=body.judge_provider,
judge_model=body.judge_model,
)
except EvaluationNotFoundError as e:
raise _eval_http_exc(e) from e
except EvaluationBadRequestError as e:
raise _eval_http_exc(e) from e
return RetryBaselineJudgeOut.model_validate(payload)
@router.post("/judge/memoir-chapters", response_model=ManualJudgeMemoirOut)
async def judge_memoir_chapters_manual(
body: ManualJudgeMemoirBody,
_auth: InternalEvalAuth,
judge_svc: Annotated[
EvalJudgeManualService, Depends(get_eval_judge_manual_service)
],
):
try:
payload = await judge_svc.judge_memoir_for_user(
body.user_id,
body.baseline_sections,
judge_provider=body.judge_provider,
judge_model=body.judge_model,
)
except EvaluationBadRequestError as e:
raise _eval_http_exc(e) from e
return ManualJudgeMemoirOut.model_validate(payload)
@router.get("/users/{user_id}/memoir-snapshot", response_model=UserMemoirSnapshotOut)
async def get_user_memoir_snapshot(
user_id: str,
_auth: InternalEvalAuth,
judge_svc: Annotated[
EvalJudgeManualService, Depends(get_eval_judge_manual_service)
],
):
try:
payload = await judge_svc.memoir_snapshot(user_id)
except EvaluationBadRequestError as e:
raise _eval_http_exc(e) from e
return UserMemoirSnapshotOut.model_validate(payload)
@router.get(
"/fixtures/user-exports",
response_model=UserExportFixtureListOut,
)
async def list_user_export_fixtures(
_auth: InternalEvalAuth,
svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
):
return UserExportFixtureListOut(items=svc.list_user_export_fixture_names())
@router.get(
"/fixtures/user-exports/{filename}",
response_model=UserExportFixtureDetailOut,
)
async def get_user_export_fixture(
filename: str,
_auth: InternalEvalAuth,
):
try:
turns, raw_md = read_user_export_fixture(filename)
except ValueError:
raise HTTPException(
status_code=400, detail="invalid fixture filename"
) from None
except FileNotFoundError:
raise HTTPException(status_code=404, detail="fixture not found") from None
memoir_tuples = extract_memoir_chapter_sections_from_export_md(raw_md)
return UserExportFixtureDetailOut(
filename=filename,
turns=[UserExportFixtureTurnOut(user=u, ai=a) for u, a in turns],
source_user_id=extract_source_user_id_from_export_md(raw_md),
memoir_sections=[
MemoirSectionBaselineOut(title=t, body=b) for t, b in memoir_tuples
],
)