feat(eval): memoir A/B chapter judging and eval-web parity with dialogue
- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas. - Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error. - MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings. - app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS. - Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014. - Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.
This commit is contained in:
@@ -134,6 +134,19 @@ class MemoirSubmitOut(BaseModel):
|
||||
elapsed_ms: int | None = Field(default=None, ge=0)
|
||||
|
||||
|
||||
class MemoirPipelineRunOut(BaseModel):
|
||||
"""Redis 流水线快照(memoir_pipeline_run:*);字段随迭代扩展。"""
|
||||
|
||||
model_config = ConfigDict(extra="allow")
|
||||
|
||||
memoir_correlation_id: str
|
||||
user_id: str | None = None
|
||||
started_at_utc: str | None = None
|
||||
phase1: dict[str, Any] | None = None
|
||||
phase2: list[Any] = Field(default_factory=list)
|
||||
fanout: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class ManualJudgeConversationBody(BaseModel):
|
||||
conversation_id: str
|
||||
"""与当前评测台选中的 MD 一致,供基准 transcript / 整体打分。"""
|
||||
@@ -202,8 +215,15 @@ class ManualJudgeMemoirBody(BaseModel):
|
||||
|
||||
class ManualJudgeMemoirOut(BaseModel):
|
||||
user_id: str
|
||||
judge_provider: EvalJudgeProviderLiteral = "zhipu"
|
||||
judge_model: str = ""
|
||||
"""本次请求实际解析后的模型 id(与 `build_eval_judge_llm_spec` 一致)。"""
|
||||
chapter_results: list[dict[str, Any]] = Field(default_factory=list)
|
||||
story_results: list[dict[str, Any]] = Field(default_factory=list)
|
||||
errors: list[str] = Field(default_factory=list)
|
||||
"""单条章节/故事评审或列表加载失败时的可读原因(HTTP 仍为 200)。"""
|
||||
warnings: list[str] = Field(default_factory=list)
|
||||
"""无失败但未评到任何条目时的提示(例如成稿均为空)。"""
|
||||
|
||||
|
||||
class MemoirChapterSnapOut(BaseModel):
|
||||
|
||||
Reference in New Issue
Block a user