refactor(eval+memoir):精简内部评测路由与服务,composite/对话摘要与 judge 能力补强

- 访谈:新增 interview_state_hints,联动 orchestrator 与提示词
- 回忆录:story_pipeline_sync/state/memory/post_commit 与 Celery 任务调整
- 基建:开发用 celery broker、compose/development 脚本、依赖注入
- eval-web:移除数据集/实验/版本等页面与流式轮询,突出 Playground
- 文档与单测同步
This commit is contained in:
Kevin
2026-04-08 21:36:12 +08:00
parent 2a0c80987d
commit 064ad2161d
64 changed files with 3412 additions and 3068 deletions

View File

@@ -0,0 +1,34 @@
"""评测合成记分(与批量实验 Celery 解耦后的纯函数,供单测保留)。"""
from __future__ import annotations
from typing import Any
def composite_score(
conv: float | None,
mem: float | None,
weights: dict[str, Any] | None,
) -> float | None:
"""合成总分;缺失的一侧不计为 0避免把评审失败误标为极差。
仅一侧有分:返回该侧原始分(不乘权重),表示当前 run 仅完成了部分评审维度。
"""
w = weights or {}
wc = float(w.get("conversation", 0.5))
wm = float(w.get("memoir", 0.5))
has_c = conv is not None
has_m = mem is not None
if not has_c and not has_m:
return None
if has_c and has_m:
return float(wc) * float(conv) + float(wm) * float(mem)
if has_c:
return float(conv)
return float(mem)
# 兼容旧测试中的私有名
_composite = composite_score
__all__ = ["composite_score", "_composite"]