feat(eval): memoir A/B chapter judging and eval-web parity with dialogue
- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas. - Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error. - MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings. - app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS. - Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014. - Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.
This commit is contained in:
@@ -10,6 +10,7 @@ from fastapi.responses import StreamingResponse
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.core.db import get_async_db
|
||||
from app.core.memoir_pipeline_progress import get_pipeline_run_for_eval
|
||||
from app.features.evaluation.admin_service import EvaluationAdminService
|
||||
from app.features.evaluation.deps import (
|
||||
get_eval_judge_manual_service,
|
||||
@@ -37,6 +38,7 @@ from app.features.evaluation.schemas import (
|
||||
ManualJudgeMemoirBody,
|
||||
ManualJudgeMemoirOut,
|
||||
MemoirPhase1ReadyOut,
|
||||
MemoirPipelineRunOut,
|
||||
MemoirSectionBaselineOut,
|
||||
MemoirSubmitOut,
|
||||
PlaygroundConversationJudgeOut,
|
||||
@@ -166,6 +168,42 @@ async def get_playground_conversation_judge(
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/users/{user_id}/memoir-pipeline-run",
|
||||
response_model=MemoirPipelineRunOut,
|
||||
)
|
||||
async def get_memoir_pipeline_run(
|
||||
user_id: str,
|
||||
_auth: InternalEvalAuth,
|
||||
phase1_task_id: Annotated[
|
||||
str | None,
|
||||
Query(description="Phase1 Celery task id(与 memoir-submit 返回一致)"),
|
||||
] = None,
|
||||
memoir_correlation_id: Annotated[
|
||||
str | None,
|
||||
Query(description="流水线聚合根 ID(与日志 memoir_correlation_id 一致)"),
|
||||
] = None,
|
||||
):
|
||||
if not phase1_task_id and not memoir_correlation_id:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="provide phase1_task_id or memoir_correlation_id",
|
||||
)
|
||||
if phase1_task_id and memoir_correlation_id:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="provide only one of phase1_task_id or memoir_correlation_id",
|
||||
)
|
||||
snap = get_pipeline_run_for_eval(
|
||||
user_id.strip(),
|
||||
memoir_correlation_id=memoir_correlation_id,
|
||||
phase1_task_id=phase1_task_id,
|
||||
)
|
||||
if not snap:
|
||||
raise HTTPException(status_code=404, detail="pipeline snapshot not found")
|
||||
return MemoirPipelineRunOut.model_validate(snap)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/sessions/{conversation_id}/memoir-phase1-ready",
|
||||
response_model=MemoirPhase1ReadyOut,
|
||||
@@ -412,6 +450,42 @@ async def judge_memoir_chapters_manual(
|
||||
return ManualJudgeMemoirOut.model_validate(payload)
|
||||
|
||||
|
||||
@router.post("/judge/memoir-chapters-stream")
|
||||
async def judge_memoir_chapters_stream(
|
||||
body: ManualJudgeMemoirBody,
|
||||
_auth: InternalEvalAuth,
|
||||
judge_svc: Annotated[
|
||||
EvalJudgeManualService, Depends(get_eval_judge_manual_service)
|
||||
],
|
||||
):
|
||||
async def event_iter():
|
||||
try:
|
||||
async for evt in judge_svc.iter_memoir_chapter_judge_sse(
|
||||
body.user_id,
|
||||
body.baseline_sections,
|
||||
judge_provider=body.judge_provider,
|
||||
judge_model=body.judge_model,
|
||||
):
|
||||
yield f"data: {json.dumps(evt, ensure_ascii=False)}\n\n"
|
||||
except Exception as e:
|
||||
err = json.dumps(
|
||||
{"event": "error", "phase": "server", "message": str(e)},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
yield f"data: {err}\n\n"
|
||||
yield f"data: {json.dumps({'event': 'done'}, ensure_ascii=False)}\n\n"
|
||||
|
||||
return StreamingResponse(
|
||||
event_iter(),
|
||||
media_type="text/event-stream",
|
||||
headers={
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"X-Accel-Buffering": "no",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@router.get("/users/{user_id}/memoir-snapshot", response_model=UserMemoirSnapshotOut)
|
||||
async def get_user_memoir_snapshot(
|
||||
user_id: str,
|
||||
|
||||
Reference in New Issue
Block a user