feat(evaluation): 成稿 100 分 rubric、证据评审与评测台调整
- 回忆录细项上限收紧为合计 100 分,去掉 110 折算与 raw_dimension_total - judge_memoir 拼接原始访谈与可选导出基线;无证据时提示保守打真实性相关分 - 自动评测 run 与手动章节/故事评审统一带 transcript 证据(会话/用户聚合、截断) - 访谈打分仍为情绪强化版 15 细项、总分 100 - 评测台默认基准改为 zuckxu 导出 MD;移除逐轮用户句对齐表及相关逻辑 - 新增 judge schema 与 memoir prompt 组装的单元测试
This commit is contained in:
27
api/tests/test_judge_service.py
Normal file
27
api/tests/test_judge_service.py
Normal file
@@ -0,0 +1,27 @@
|
||||
"""Evaluation judge prompt assembly tests."""
|
||||
|
||||
from app.features.evaluation.judge_service import _build_memoir_judge_prompt
|
||||
|
||||
|
||||
def test_build_memoir_prompt_includes_source_and_reference_evidence() -> None:
|
||||
prompt = _build_memoir_judge_prompt(
|
||||
memoir_markdown="# 当前正文\n他后来去了南方。",
|
||||
source_transcript="用户: 我后来去了深圳工作。",
|
||||
reference_memoir_markdown="# 导出基线\n他去了深圳。",
|
||||
evidence_notes="必须严格核对真实性。",
|
||||
)
|
||||
|
||||
assert "【评审说明】" in prompt
|
||||
assert "【原始访谈/证据】" in prompt
|
||||
assert "用户: 我后来去了深圳工作。" in prompt
|
||||
assert "【参考基线/导出成稿】" in prompt
|
||||
assert "【当前回忆录正文】" in prompt
|
||||
|
||||
|
||||
def test_build_memoir_prompt_requires_conservative_scoring_without_evidence() -> None:
|
||||
prompt = _build_memoir_judge_prompt(
|
||||
memoir_markdown="# 当前正文\n他后来去了南方。"
|
||||
)
|
||||
|
||||
assert "无可用原始访谈证据" in prompt
|
||||
assert "必须保守打分" in prompt
|
||||
Reference in New Issue
Block a user