- Merge internal-eval into development.sh (single Celery/infra); internal-eval.sh wraps with LIFE_ECHO_WITH_INTERNAL_EVAL; EVAL_ATTACH_ONLY for attaching 8001 when :8000 is already up; document in api/docs/internal-eval.md. - Evaluation: transcript_for_judge, judge error surfacing, rubric/schema tweaks, execution_service and router updates; tests for judge and composite eval. - Memory: ingest nested transaction for embedding/enrichment rollback safety. - Conversation WS: logger.exception for pipeline errors (avoid loguru KeyError). - app-eval-web: Playground saved replays, dialogue turns helper, hash user_id for Memoir; Memoir chapter baseline↔DB row compare with title heuristics; Stories page (#memoir-stories); Markdown + copy buttons; toolbar/panel UI; react-markdown; development proxy and fixture updates.
19 lines
751 B
Python
19 lines
751 B
Python
"""评测合成分:评审缺失侧不得被当作 0 分。"""
|
|
|
|
from app.features.evaluation.execution_service import _composite
|
|
|
|
|
|
def test_composite_none_when_both_missing() -> None:
|
|
assert _composite(None, None, None) is None
|
|
assert _composite(None, None, {"conversation": 0.7, "memoir": 0.3}) is None
|
|
|
|
|
|
def test_composite_weighted_when_both_present() -> None:
|
|
assert _composite(80.0, 60.0, {"conversation": 0.5, "memoir": 0.5}) == 70.0
|
|
assert _composite(100.0, 0.0, {"conversation": 0.8, "memoir": 0.2}) == 80.0
|
|
|
|
|
|
def test_composite_single_side_uses_raw_score() -> None:
|
|
assert _composite(77.0, None, {"conversation": 0.5, "memoir": 0.5}) == 77.0
|
|
assert _composite(None, 55.5, {"conversation": 0.5, "memoir": 0.5}) == 55.5
|