"""评审 LLM 结构化输出(json_object)。 成稿(回忆录)子项上限已自洽为 **总分 100**(由原 110 分表等比例收紧整数档,见附件 rubric)。 """ from __future__ import annotations from typing import Self from pydantic import BaseModel, Field, model_validator class TurnJudgeOutput(BaseModel): """单轮 / 整段对话质量(情绪强化版 100 分,15 个细项)。""" # 一、情绪价值与陪伴感(30) emotion_carry: float = Field(ge=0, le=10, description="情绪承接能力") empathy_depth: float = Field(ge=0, le=8, description="共情深度") emotion_safety: float = Field(ge=0, le=6, description="情绪安全感") emotion_guidance: float = Field(ge=0, le=6, description="情绪引导能力") # 二、信息获取能力(25) fact_mining: float = Field(ge=0, le=8, description="关键事实挖掘") info_completeness_guide: float = Field(ge=0, le=8, description="信息完整性引导") info_depth_mining: float = Field(ge=0, le=9, description="信息深度挖掘") # 三、人物建模能力(15) persona_understanding: float = Field(ge=0, le=7, description="人物理解") persona_consistency_verify: float = Field(ge=0, le=4, description="人物一致性验证") persona_expression_guide: float = Field(ge=0, le=4, description="人物表达引导") # 四、结构化引导(15) interview_structure: float = Field(ge=0, le=6, description="访谈结构") context_memory: float = Field(ge=0, le=5, description="上下文记忆") rhythm_control: float = Field(ge=0, le=4, description="节奏控制") # 五、提问质量(15) question_quality: float = Field(ge=0, le=7, description="问题质量") follow_up_depth: float = Field(ge=0, le=5, description="追问能力") non_leading: float = Field(ge=0, le=3, description="非引导性") total_score: float = Field(ge=0, le=100) rationale: str = "" # 与历史 JSON 对齐的一级聚合分(由细项派生,可缺省由模型填写) emotion_score: float = Field(default=0, ge=0, le=30) information_score: float = Field(default=0, ge=0, le=25) persona_score: float = Field(default=0, ge=0, le=15) structure_score: float = Field(default=0, ge=0, le=15) question_score: float = Field(default=0, ge=0, le=15) @model_validator(mode="after") def _sync_aggregates_and_total(self) -> Self: emotion = ( self.emotion_carry + self.empathy_depth + self.emotion_safety + self.emotion_guidance ) information = ( self.fact_mining + self.info_completeness_guide + self.info_depth_mining ) persona = ( self.persona_understanding + self.persona_consistency_verify + self.persona_expression_guide ) structure = self.interview_structure + self.context_memory + self.rhythm_control question = self.question_quality + self.follow_up_depth + self.non_leading expected = emotion + information + persona + structure + question if abs(expected - self.total_score) > 0.51: raise ValueError( f"total_score ({self.total_score}) 与细项合计 ({expected:.2f}) 不一致" ) object.__setattr__(self, "emotion_score", emotion) object.__setattr__(self, "information_score", information) object.__setattr__(self, "persona_score", persona) object.__setattr__(self, "structure_score", structure) object.__setattr__(self, "question_score", question) return self # 整条 transcript 与单轮使用同一套细项 ConversationJudgeOutput = TurnJudgeOutput class MemoirJudgeOutput(BaseModel): """成稿回忆录评分(总分 100,子项上限见 rubric)。""" # 一、真实性与覆盖(小计最高 23;由原 25 收紧) mem_fidelity: float = Field(ge=0, le=9, description="记忆忠实度") mem_factual_accuracy: float = Field(ge=0, le=5, description="事实准确性") mem_factual_coverage: float = Field(ge=0, le=5, description="事实覆盖率") mem_traceability: float = Field(ge=0, le=4, description="记忆可追溯性") # 二、信息质量(小计最高 14;由原 15 收紧) info_slot_coverage: float = Field(ge=0, le=6, description="槽位覆盖度") info_sufficiency: float = Field(ge=0, le=4, description="信息充分性") info_density: float = Field(ge=0, le=4, description="信息密度") # 三、叙事结构(小计最高 14;由原 15 收紧) narr_structure: float = Field(ge=0, le=6, description="故事结构") narr_paragraphs: float = Field(ge=0, le=5, description="段落组织") narr_pacing: float = Field(ge=0, le=3, description="节奏控制") # 四、语言与文笔(小计最高 18;由原 20 及六项上限一并收紧) lang_fluency: float = Field(ge=0, le=3, description="语言流畅度") lang_conciseness: float = Field(ge=0, le=3, description="表达精炼度") lang_literary: float = Field(ge=0, le=4, description="文笔质量") lang_controlled_expansion: float = Field(ge=0, le=4, description="控制性扩写能力") lang_detail: float = Field(ge=0, le=2, description="细节还原与强化") lang_style: float = Field(ge=0, le=2, description="风格一致性") # 五、情感表达(小计最高 9;由原 10 收紧) emo_authenticity: float = Field(ge=0, le=5, description="情感真实度") emo_depth: float = Field(ge=0, le=4, description="情感深度") # 六、人物建模(小计最高 9;由原 10 收紧) char_understanding: float = Field(ge=0, le=4, description="人物理解") char_consistency: float = Field(ge=0, le=3, description="人物一致性") char_integration: float = Field(ge=0, le=2, description="人物融入度") # 七、连贯性(小计最高 4;由原 5 收紧) coh_timeline: float = Field(ge=0, le=2, description="时间线一致性") coh_cross_chapter: float = Field(ge=0, le=2, description="跨章节关联") # 八、表达丰富度(小计最高 5) rich_analogy: float = Field(ge=0, le=3, description="类比与引用") rich_diversity: float = Field(ge=0, le=2, description="表达多样性") # 九、出版就绪度(小计最高 4;由原 5 收紧) pub_editorial_cost: float = Field(ge=0, le=2, description="编辑成本") pub_completeness: float = Field(ge=0, le=2, description="完整度") total_score: float = Field(ge=0, le=100) rationale: str = "" authenticity_score: float = Field(default=0, ge=0, le=23) information_score: float = Field(default=0, ge=0, le=14) narrative_score: float = Field(default=0, ge=0, le=14) language_score: float = Field(default=0, ge=0, le=18) emotion_score: float = Field(default=0, ge=0, le=9) character_score: float = Field(default=0, ge=0, le=9) coherence_score: float = Field(default=0, ge=0, le=4) richness_score: float = Field(default=0, ge=0, le=5) publish_ready_score: float = Field(default=0, ge=0, le=4) @model_validator(mode="after") def _sync_aggregates_and_total(self) -> Self: authenticity = ( self.mem_fidelity + self.mem_factual_accuracy + self.mem_factual_coverage + self.mem_traceability ) information = ( self.info_slot_coverage + self.info_sufficiency + self.info_density ) narrative = self.narr_structure + self.narr_paragraphs + self.narr_pacing language = ( self.lang_fluency + self.lang_conciseness + self.lang_literary + self.lang_controlled_expansion + self.lang_detail + self.lang_style ) emotion = self.emo_authenticity + self.emo_depth character = ( self.char_understanding + self.char_consistency + self.char_integration ) coherence = self.coh_timeline + self.coh_cross_chapter richness = self.rich_analogy + self.rich_diversity publish = self.pub_editorial_cost + self.pub_completeness expected = ( authenticity + information + narrative + language + emotion + character + coherence + richness + publish ) if abs(expected - self.total_score) > 0.51: raise ValueError( f"total_score ({self.total_score}) 与分项合计 ({expected:.2f}) 不一致" ) object.__setattr__(self, "authenticity_score", authenticity) object.__setattr__(self, "information_score", information) object.__setattr__(self, "narrative_score", narrative) object.__setattr__(self, "language_score", language) object.__setattr__(self, "emotion_score", emotion) object.__setattr__(self, "character_score", character) object.__setattr__(self, "coherence_score", coherence) object.__setattr__(self, "richness_score", richness) object.__setattr__(self, "publish_ready_score", publish) return self