"""ORM:内部回归评测(与生产 conversation 表隔离)。""" from __future__ import annotations from sqlalchemy import ( JSON, Boolean, Column, DateTime, Float, ForeignKey, Integer, String, Text, UniqueConstraint, ) from sqlalchemy.orm import relationship from app.core.db import Base, utc_now class EvalRegressionSet(Base): __tablename__ = "eval_regression_sets" id = Column(String, primary_key=True) name = Column(String, nullable=False) description = Column(Text, nullable=True) created_at = Column(DateTime(timezone=True), default=utc_now, nullable=False) cases = relationship( "EvalCase", back_populates="regression_set", cascade="all, delete-orphan", ) experiments = relationship("EvalExperiment", back_populates="regression_set") class EvalCase(Base): """从真实会话快照或导入脚本的不可变用户轮次列表。""" __tablename__ = "eval_cases" id = Column(String, primary_key=True) regression_set_id = Column( String, ForeignKey("eval_regression_sets.id"), nullable=False ) source_conversation_id = Column(String, nullable=True, index=True) source_user_id = Column(String, nullable=True, index=True) title = Column(String, nullable=True) user_utterances = Column(JSON, nullable=False) reference_memoir_markdown = Column(Text, nullable=True) is_protected = Column( Boolean, nullable=False, default=False, server_default="false" ) meta = Column(JSON, nullable=True) created_at = Column(DateTime(timezone=True), default=utc_now, nullable=False) regression_set = relationship("EvalRegressionSet", back_populates="cases") runs = relationship("EvalRun", back_populates="case") class EvalVersion(Base): """基线或候选:回放配置(模型、system 补充等)。""" __tablename__ = "eval_versions" id = Column(String, primary_key=True) name = Column(String, nullable=False) runner_kind = Column(String, nullable=False, default="llm_chat_v1") config_json = Column(JSON, nullable=True) created_at = Column(DateTime(timezone=True), default=utc_now, nullable=False) experiments_as_baseline = relationship( "EvalExperiment", foreign_keys="EvalExperiment.baseline_version_id", back_populates="baseline_version", ) experiments_as_candidate = relationship( "EvalExperiment", foreign_keys="EvalExperiment.candidate_version_id", back_populates="candidate_version", ) class EvalExperiment(Base): __tablename__ = "eval_experiments" id = Column(String, primary_key=True) name = Column(String, nullable=False) regression_set_id = Column( String, ForeignKey("eval_regression_sets.id"), nullable=False ) baseline_version_id = Column(String, ForeignKey("eval_versions.id"), nullable=False) candidate_version_id = Column( String, ForeignKey("eval_versions.id"), nullable=False ) rubric_pack = Column(String, nullable=False, default="conversation_v1+memoir_v1") composite_weights_json = Column(JSON, nullable=True) status = Column(String, nullable=False, default="pending") error_message = Column(Text, nullable=True) created_at = Column(DateTime(timezone=True), default=utc_now, nullable=False) completed_at = Column(DateTime(timezone=True), nullable=True) regression_set = relationship("EvalRegressionSet", back_populates="experiments") baseline_version = relationship( "EvalVersion", foreign_keys=[baseline_version_id], ) candidate_version = relationship( "EvalVersion", foreign_keys=[candidate_version_id], ) runs = relationship( "EvalRun", back_populates="experiment", cascade="all, delete-orphan" ) gate_verdict = relationship( "EvalGateVerdict", back_populates="experiment", uselist=False, cascade="all, delete-orphan", ) class EvalRun(Base): """单次:某 experiment × 某 case × baseline 或 candidate。""" __tablename__ = "eval_runs" id = Column(String, primary_key=True) experiment_id = Column(String, ForeignKey("eval_experiments.id"), nullable=False) case_id = Column(String, ForeignKey("eval_cases.id"), nullable=False) side = Column(String, nullable=False) status = Column(String, nullable=False, default="pending") error_message = Column(Text, nullable=True) memoir_markdown = Column(Text, nullable=True) conversation_score_total = Column(Float, nullable=True) memoir_score_total = Column(Float, nullable=True) composite_score = Column(Float, nullable=True) judge_bundle_json = Column(JSON, nullable=True) started_at = Column(DateTime(timezone=True), nullable=True) completed_at = Column(DateTime(timezone=True), nullable=True) experiment = relationship("EvalExperiment", back_populates="runs") case = relationship("EvalCase", back_populates="runs") turns = relationship( "EvalRunTurn", back_populates="run", cascade="all, delete-orphan", order_by="EvalRunTurn.turn_index", ) __table_args__ = ( UniqueConstraint( "experiment_id", "case_id", "side", name="uq_eval_run_experiment_case_side", ), ) class EvalRunTurn(Base): __tablename__ = "eval_run_turns" id = Column(String, primary_key=True) run_id = Column(String, ForeignKey("eval_runs.id"), nullable=False) turn_index = Column(Integer, nullable=False) user_utterance = Column(Text, nullable=False) assistant_reply = Column(Text, nullable=True) duration_ms = Column(Integer, nullable=True) judge_scores_json = Column(JSON, nullable=True) judge_rationale = Column(Text, nullable=True) run = relationship("EvalRun", back_populates="turns") __table_args__ = ( UniqueConstraint("run_id", "turn_index", name="uq_eval_run_turn_index"), ) class EvalGateVerdict(Base): __tablename__ = "eval_gate_verdicts" id = Column(String, primary_key=True) experiment_id = Column( String, ForeignKey("eval_experiments.id"), nullable=False, unique=True ) passed = Column(Boolean, nullable=False) mean_composite_delta = Column(Float, nullable=True) protected_regressions_json = Column(JSON, nullable=True) details_json = Column(JSON, nullable=True) computed_at = Column(DateTime(timezone=True), default=utc_now, nullable=False) experiment = relationship("EvalExperiment", back_populates="gate_verdict")