Files
life-echo/api/tests/evaluation_calibration/fixtures.json

46 lines
1.6 KiB
JSON
Raw Normal View History

{
"rubric_id": "conversation_v1_memoir_v1_hardened",
"description": "定性标定用例:用于人工或半自动 spot-check非 CI 黄金分值。",
"conversation_cases": [
{
"id": "cold_skip_emotion",
"summary": "用户表达艰难AI 直接追问事实、无情绪承接",
"transcript_hint": "用户: 那段时间挺难熬的。\\nAI: 你当时做什么工作?",
"expected_band": {
"emotion_carry": [0, 4],
"non_leading": [0, 3]
},
"must_flag_issues": ["情绪", "承接"]
},
{
"id": "strong_reflect",
"summary": "先承接情绪再具体追问",
"transcript_hint": "用户: 我很愧疚没陪父亲最后一段。\\nAI: 听起来那份愧疚一直在你心里。你愿意说说最后一次见面时发生了什么吗?",
"expected_band": {
"emotion_carry": [6, 10],
"empathy_depth": [5, 8]
},
"must_flag_issues": []
}
],
"memoir_cases": [
{
"id": "hallucination_risk",
"summary": "成稿出现证据 transcript 未提及的具体职务/数字",
"expected_band": {
"mem_fidelity": [0, 5],
"mem_traceability": [0, 3]
},
"must_flag_issues": ["编造", "追溯"]
},
{
"id": "single_chapter_scope",
"summary": "仅单章节节选时 coh_cross_chapter 应保守或标 insufficient_evidence",
"notes": "评审 prompt 已要求单节选不臆造全书关联;人工检查 rationale / insufficient_evidence。",
"expected_band": {
"coh_cross_chapter": [0, 2]
}
}
]
}