- Merge internal-eval into development.sh (single Celery/infra); internal-eval.sh wraps with LIFE_ECHO_WITH_INTERNAL_EVAL; EVAL_ATTACH_ONLY for attaching 8001 when :8000 is already up; document in api/docs/internal-eval.md. - Evaluation: transcript_for_judge, judge error surfacing, rubric/schema tweaks, execution_service and router updates; tests for judge and composite eval. - Memory: ingest nested transaction for embedding/enrichment rollback safety. - Conversation WS: logger.exception for pipeline errors (avoid loguru KeyError). - app-eval-web: Playground saved replays, dialogue turns helper, hash user_id for Memoir; Memoir chapter baseline↔DB row compare with title heuristics; Stories page (#memoir-stories); Markdown + copy buttons; toolbar/panel UI; react-markdown; development proxy and fixture updates.
46 lines
1.6 KiB
JSON
46 lines
1.6 KiB
JSON
{
|
||
"rubric_id": "conversation_v1_memoir_v1_hardened",
|
||
"description": "定性标定用例:用于人工或半自动 spot-check,非 CI 黄金分值。",
|
||
"conversation_cases": [
|
||
{
|
||
"id": "cold_skip_emotion",
|
||
"summary": "用户表达艰难,AI 直接追问事实、无情绪承接",
|
||
"transcript_hint": "用户: 那段时间挺难熬的。\\nAI: 你当时做什么工作?",
|
||
"expected_band": {
|
||
"emotion_carry": [0, 4],
|
||
"non_leading": [0, 3]
|
||
},
|
||
"must_flag_issues": ["情绪", "承接"]
|
||
},
|
||
{
|
||
"id": "strong_reflect",
|
||
"summary": "先承接情绪再具体追问",
|
||
"transcript_hint": "用户: 我很愧疚没陪父亲最后一段。\\nAI: 听起来那份愧疚一直在你心里。你愿意说说最后一次见面时发生了什么吗?",
|
||
"expected_band": {
|
||
"emotion_carry": [6, 10],
|
||
"empathy_depth": [5, 8]
|
||
},
|
||
"must_flag_issues": []
|
||
}
|
||
],
|
||
"memoir_cases": [
|
||
{
|
||
"id": "hallucination_risk",
|
||
"summary": "成稿出现证据 transcript 未提及的具体职务/数字",
|
||
"expected_band": {
|
||
"mem_fidelity": [0, 5],
|
||
"mem_traceability": [0, 3]
|
||
},
|
||
"must_flag_issues": ["编造", "追溯"]
|
||
},
|
||
{
|
||
"id": "single_chapter_scope",
|
||
"summary": "仅单章节节选时 coh_cross_chapter 应保守或标 insufficient_evidence",
|
||
"notes": "评审 prompt 已要求单节选不臆造全书关联;人工检查 rationale / insufficient_evidence。",
|
||
"expected_band": {
|
||
"coh_cross_chapter": [0, 2]
|
||
}
|
||
}
|
||
]
|
||
}
|