- 回忆录细项上限收紧为合计 100 分,去掉 110 折算与 raw_dimension_total - judge_memoir 拼接原始访谈与可选导出基线;无证据时提示保守打真实性相关分 - 自动评测 run 与手动章节/故事评审统一带 transcript 证据(会话/用户聚合、截断) - 访谈打分仍为情绪强化版 15 细项、总分 100 - 评测台默认基准改为 zuckxu 导出 MD;移除逐轮用户句对齐表及相关逻辑 - 新增 judge schema 与 memoir prompt 组装的单元测试
200 lines
9.0 KiB
Python
200 lines
9.0 KiB
Python
"""评审 LLM 结构化输出(json_object)。
|
||
|
||
成稿(回忆录)子项上限已自洽为 **总分 100**(由原 110 分表等比例收紧整数档,见附件 rubric)。
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from typing import Self
|
||
|
||
from pydantic import BaseModel, Field, model_validator
|
||
|
||
|
||
class TurnJudgeOutput(BaseModel):
|
||
"""单轮 / 整段对话质量(情绪强化版 100 分,15 个细项)。"""
|
||
|
||
# 一、情绪价值与陪伴感(30)
|
||
emotion_carry: float = Field(ge=0, le=10, description="情绪承接能力")
|
||
empathy_depth: float = Field(ge=0, le=8, description="共情深度")
|
||
emotion_safety: float = Field(ge=0, le=6, description="情绪安全感")
|
||
emotion_guidance: float = Field(ge=0, le=6, description="情绪引导能力")
|
||
|
||
# 二、信息获取能力(25)
|
||
fact_mining: float = Field(ge=0, le=8, description="关键事实挖掘")
|
||
info_completeness_guide: float = Field(ge=0, le=8, description="信息完整性引导")
|
||
info_depth_mining: float = Field(ge=0, le=9, description="信息深度挖掘")
|
||
|
||
# 三、人物建模能力(15)
|
||
persona_understanding: float = Field(ge=0, le=7, description="人物理解")
|
||
persona_consistency_verify: float = Field(ge=0, le=4, description="人物一致性验证")
|
||
persona_expression_guide: float = Field(ge=0, le=4, description="人物表达引导")
|
||
|
||
# 四、结构化引导(15)
|
||
interview_structure: float = Field(ge=0, le=6, description="访谈结构")
|
||
context_memory: float = Field(ge=0, le=5, description="上下文记忆")
|
||
rhythm_control: float = Field(ge=0, le=4, description="节奏控制")
|
||
|
||
# 五、提问质量(15)
|
||
question_quality: float = Field(ge=0, le=7, description="问题质量")
|
||
follow_up_depth: float = Field(ge=0, le=5, description="追问能力")
|
||
non_leading: float = Field(ge=0, le=3, description="非引导性")
|
||
|
||
total_score: float = Field(ge=0, le=100)
|
||
rationale: str = ""
|
||
|
||
# 与历史 JSON 对齐的一级聚合分(由细项派生,可缺省由模型填写)
|
||
emotion_score: float = Field(default=0, ge=0, le=30)
|
||
information_score: float = Field(default=0, ge=0, le=25)
|
||
persona_score: float = Field(default=0, ge=0, le=15)
|
||
structure_score: float = Field(default=0, ge=0, le=15)
|
||
question_score: float = Field(default=0, ge=0, le=15)
|
||
|
||
@model_validator(mode="after")
|
||
def _sync_aggregates_and_total(self) -> Self:
|
||
emotion = (
|
||
self.emotion_carry
|
||
+ self.empathy_depth
|
||
+ self.emotion_safety
|
||
+ self.emotion_guidance
|
||
)
|
||
information = (
|
||
self.fact_mining + self.info_completeness_guide + self.info_depth_mining
|
||
)
|
||
persona = (
|
||
self.persona_understanding
|
||
+ self.persona_consistency_verify
|
||
+ self.persona_expression_guide
|
||
)
|
||
structure = self.interview_structure + self.context_memory + self.rhythm_control
|
||
question = self.question_quality + self.follow_up_depth + self.non_leading
|
||
expected = emotion + information + persona + structure + question
|
||
if abs(expected - self.total_score) > 0.51:
|
||
raise ValueError(
|
||
f"total_score ({self.total_score}) 与细项合计 ({expected:.2f}) 不一致"
|
||
)
|
||
object.__setattr__(self, "emotion_score", emotion)
|
||
object.__setattr__(self, "information_score", information)
|
||
object.__setattr__(self, "persona_score", persona)
|
||
object.__setattr__(self, "structure_score", structure)
|
||
object.__setattr__(self, "question_score", question)
|
||
return self
|
||
|
||
|
||
# 整条 transcript 与单轮使用同一套细项
|
||
ConversationJudgeOutput = TurnJudgeOutput
|
||
|
||
|
||
class MemoirJudgeOutput(BaseModel):
|
||
"""成稿回忆录评分(总分 100,子项上限见 rubric)。"""
|
||
|
||
# 一、真实性与覆盖(小计最高 23;由原 25 收紧)
|
||
mem_fidelity: float = Field(ge=0, le=9, description="记忆忠实度")
|
||
mem_factual_accuracy: float = Field(ge=0, le=5, description="事实准确性")
|
||
mem_factual_coverage: float = Field(ge=0, le=5, description="事实覆盖率")
|
||
mem_traceability: float = Field(ge=0, le=4, description="记忆可追溯性")
|
||
|
||
# 二、信息质量(小计最高 14;由原 15 收紧)
|
||
info_slot_coverage: float = Field(ge=0, le=6, description="槽位覆盖度")
|
||
info_sufficiency: float = Field(ge=0, le=4, description="信息充分性")
|
||
info_density: float = Field(ge=0, le=4, description="信息密度")
|
||
|
||
# 三、叙事结构(小计最高 14;由原 15 收紧)
|
||
narr_structure: float = Field(ge=0, le=6, description="故事结构")
|
||
narr_paragraphs: float = Field(ge=0, le=5, description="段落组织")
|
||
narr_pacing: float = Field(ge=0, le=3, description="节奏控制")
|
||
|
||
# 四、语言与文笔(小计最高 18;由原 20 及六项上限一并收紧)
|
||
lang_fluency: float = Field(ge=0, le=3, description="语言流畅度")
|
||
lang_conciseness: float = Field(ge=0, le=3, description="表达精炼度")
|
||
lang_literary: float = Field(ge=0, le=4, description="文笔质量")
|
||
lang_controlled_expansion: float = Field(ge=0, le=4, description="控制性扩写能力")
|
||
lang_detail: float = Field(ge=0, le=2, description="细节还原与强化")
|
||
lang_style: float = Field(ge=0, le=2, description="风格一致性")
|
||
|
||
# 五、情感表达(小计最高 9;由原 10 收紧)
|
||
emo_authenticity: float = Field(ge=0, le=5, description="情感真实度")
|
||
emo_depth: float = Field(ge=0, le=4, description="情感深度")
|
||
|
||
# 六、人物建模(小计最高 9;由原 10 收紧)
|
||
char_understanding: float = Field(ge=0, le=4, description="人物理解")
|
||
char_consistency: float = Field(ge=0, le=3, description="人物一致性")
|
||
char_integration: float = Field(ge=0, le=2, description="人物融入度")
|
||
|
||
# 七、连贯性(小计最高 4;由原 5 收紧)
|
||
coh_timeline: float = Field(ge=0, le=2, description="时间线一致性")
|
||
coh_cross_chapter: float = Field(ge=0, le=2, description="跨章节关联")
|
||
|
||
# 八、表达丰富度(小计最高 5)
|
||
rich_analogy: float = Field(ge=0, le=3, description="类比与引用")
|
||
rich_diversity: float = Field(ge=0, le=2, description="表达多样性")
|
||
|
||
# 九、出版就绪度(小计最高 4;由原 5 收紧)
|
||
pub_editorial_cost: float = Field(ge=0, le=2, description="编辑成本")
|
||
pub_completeness: float = Field(ge=0, le=2, description="完整度")
|
||
|
||
total_score: float = Field(ge=0, le=100)
|
||
rationale: str = ""
|
||
|
||
authenticity_score: float = Field(default=0, ge=0, le=23)
|
||
information_score: float = Field(default=0, ge=0, le=14)
|
||
narrative_score: float = Field(default=0, ge=0, le=14)
|
||
language_score: float = Field(default=0, ge=0, le=18)
|
||
emotion_score: float = Field(default=0, ge=0, le=9)
|
||
character_score: float = Field(default=0, ge=0, le=9)
|
||
coherence_score: float = Field(default=0, ge=0, le=4)
|
||
richness_score: float = Field(default=0, ge=0, le=5)
|
||
publish_ready_score: float = Field(default=0, ge=0, le=4)
|
||
|
||
@model_validator(mode="after")
|
||
def _sync_aggregates_and_total(self) -> Self:
|
||
authenticity = (
|
||
self.mem_fidelity
|
||
+ self.mem_factual_accuracy
|
||
+ self.mem_factual_coverage
|
||
+ self.mem_traceability
|
||
)
|
||
information = (
|
||
self.info_slot_coverage + self.info_sufficiency + self.info_density
|
||
)
|
||
narrative = self.narr_structure + self.narr_paragraphs + self.narr_pacing
|
||
language = (
|
||
self.lang_fluency
|
||
+ self.lang_conciseness
|
||
+ self.lang_literary
|
||
+ self.lang_controlled_expansion
|
||
+ self.lang_detail
|
||
+ self.lang_style
|
||
)
|
||
emotion = self.emo_authenticity + self.emo_depth
|
||
character = (
|
||
self.char_understanding + self.char_consistency + self.char_integration
|
||
)
|
||
coherence = self.coh_timeline + self.coh_cross_chapter
|
||
richness = self.rich_analogy + self.rich_diversity
|
||
publish = self.pub_editorial_cost + self.pub_completeness
|
||
expected = (
|
||
authenticity
|
||
+ information
|
||
+ narrative
|
||
+ language
|
||
+ emotion
|
||
+ character
|
||
+ coherence
|
||
+ richness
|
||
+ publish
|
||
)
|
||
if abs(expected - self.total_score) > 0.51:
|
||
raise ValueError(
|
||
f"total_score ({self.total_score}) 与分项合计 ({expected:.2f}) 不一致"
|
||
)
|
||
object.__setattr__(self, "authenticity_score", authenticity)
|
||
object.__setattr__(self, "information_score", information)
|
||
object.__setattr__(self, "narrative_score", narrative)
|
||
object.__setattr__(self, "language_score", language)
|
||
object.__setattr__(self, "emotion_score", emotion)
|
||
object.__setattr__(self, "character_score", character)
|
||
object.__setattr__(self, "coherence_score", coherence)
|
||
object.__setattr__(self, "richness_score", richness)
|
||
object.__setattr__(self, "publish_ready_score", publish)
|
||
return self
|