195 lines
6.5 KiB
Python
195 lines
6.5 KiB
Python
"""ORM:内部回归评测(与生产 conversation 表隔离)。"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from sqlalchemy import (
|
||
JSON,
|
||
Boolean,
|
||
Column,
|
||
DateTime,
|
||
Float,
|
||
ForeignKey,
|
||
Integer,
|
||
String,
|
||
Text,
|
||
UniqueConstraint,
|
||
)
|
||
from sqlalchemy.orm import relationship
|
||
|
||
from app.core.db import Base, utc_now
|
||
|
||
|
||
class EvalRegressionSet(Base):
|
||
__tablename__ = "eval_regression_sets"
|
||
|
||
id = Column(String, primary_key=True)
|
||
name = Column(String, nullable=False)
|
||
description = Column(Text, nullable=True)
|
||
created_at = Column(DateTime(timezone=True), default=utc_now, nullable=False)
|
||
|
||
cases = relationship(
|
||
"EvalCase",
|
||
back_populates="regression_set",
|
||
cascade="all, delete-orphan",
|
||
)
|
||
experiments = relationship("EvalExperiment", back_populates="regression_set")
|
||
|
||
|
||
class EvalCase(Base):
|
||
"""从真实会话快照或导入脚本的不可变用户轮次列表。"""
|
||
|
||
__tablename__ = "eval_cases"
|
||
|
||
id = Column(String, primary_key=True)
|
||
regression_set_id = Column(
|
||
String, ForeignKey("eval_regression_sets.id"), nullable=False
|
||
)
|
||
source_conversation_id = Column(String, nullable=True, index=True)
|
||
source_user_id = Column(String, nullable=True, index=True)
|
||
title = Column(String, nullable=True)
|
||
user_utterances = Column(JSON, nullable=False)
|
||
reference_memoir_markdown = Column(Text, nullable=True)
|
||
is_protected = Column(
|
||
Boolean, nullable=False, default=False, server_default="false"
|
||
)
|
||
meta = Column(JSON, nullable=True)
|
||
created_at = Column(DateTime(timezone=True), default=utc_now, nullable=False)
|
||
|
||
regression_set = relationship("EvalRegressionSet", back_populates="cases")
|
||
runs = relationship("EvalRun", back_populates="case")
|
||
|
||
|
||
class EvalVersion(Base):
|
||
"""基线或候选:回放配置(模型、system 补充等)。"""
|
||
|
||
__tablename__ = "eval_versions"
|
||
|
||
id = Column(String, primary_key=True)
|
||
name = Column(String, nullable=False)
|
||
runner_kind = Column(String, nullable=False, default="llm_chat_v1")
|
||
config_json = Column(JSON, nullable=True)
|
||
created_at = Column(DateTime(timezone=True), default=utc_now, nullable=False)
|
||
|
||
experiments_as_baseline = relationship(
|
||
"EvalExperiment",
|
||
foreign_keys="EvalExperiment.baseline_version_id",
|
||
back_populates="baseline_version",
|
||
)
|
||
experiments_as_candidate = relationship(
|
||
"EvalExperiment",
|
||
foreign_keys="EvalExperiment.candidate_version_id",
|
||
back_populates="candidate_version",
|
||
)
|
||
|
||
|
||
class EvalExperiment(Base):
|
||
__tablename__ = "eval_experiments"
|
||
|
||
id = Column(String, primary_key=True)
|
||
name = Column(String, nullable=False)
|
||
regression_set_id = Column(
|
||
String, ForeignKey("eval_regression_sets.id"), nullable=False
|
||
)
|
||
baseline_version_id = Column(String, ForeignKey("eval_versions.id"), nullable=False)
|
||
candidate_version_id = Column(
|
||
String, ForeignKey("eval_versions.id"), nullable=False
|
||
)
|
||
rubric_pack = Column(String, nullable=False, default="conversation_v1+memoir_v1")
|
||
composite_weights_json = Column(JSON, nullable=True)
|
||
status = Column(String, nullable=False, default="pending")
|
||
error_message = Column(Text, nullable=True)
|
||
created_at = Column(DateTime(timezone=True), default=utc_now, nullable=False)
|
||
completed_at = Column(DateTime(timezone=True), nullable=True)
|
||
|
||
regression_set = relationship("EvalRegressionSet", back_populates="experiments")
|
||
baseline_version = relationship(
|
||
"EvalVersion",
|
||
foreign_keys=[baseline_version_id],
|
||
)
|
||
candidate_version = relationship(
|
||
"EvalVersion",
|
||
foreign_keys=[candidate_version_id],
|
||
)
|
||
runs = relationship(
|
||
"EvalRun", back_populates="experiment", cascade="all, delete-orphan"
|
||
)
|
||
gate_verdict = relationship(
|
||
"EvalGateVerdict",
|
||
back_populates="experiment",
|
||
uselist=False,
|
||
cascade="all, delete-orphan",
|
||
)
|
||
|
||
|
||
class EvalRun(Base):
|
||
"""单次:某 experiment × 某 case × baseline 或 candidate。"""
|
||
|
||
__tablename__ = "eval_runs"
|
||
|
||
id = Column(String, primary_key=True)
|
||
experiment_id = Column(String, ForeignKey("eval_experiments.id"), nullable=False)
|
||
case_id = Column(String, ForeignKey("eval_cases.id"), nullable=False)
|
||
side = Column(String, nullable=False)
|
||
status = Column(String, nullable=False, default="pending")
|
||
error_message = Column(Text, nullable=True)
|
||
memoir_markdown = Column(Text, nullable=True)
|
||
conversation_score_total = Column(Float, nullable=True)
|
||
memoir_score_total = Column(Float, nullable=True)
|
||
composite_score = Column(Float, nullable=True)
|
||
judge_bundle_json = Column(JSON, nullable=True)
|
||
started_at = Column(DateTime(timezone=True), nullable=True)
|
||
completed_at = Column(DateTime(timezone=True), nullable=True)
|
||
|
||
experiment = relationship("EvalExperiment", back_populates="runs")
|
||
case = relationship("EvalCase", back_populates="runs")
|
||
turns = relationship(
|
||
"EvalRunTurn",
|
||
back_populates="run",
|
||
cascade="all, delete-orphan",
|
||
order_by="EvalRunTurn.turn_index",
|
||
)
|
||
|
||
__table_args__ = (
|
||
UniqueConstraint(
|
||
"experiment_id",
|
||
"case_id",
|
||
"side",
|
||
name="uq_eval_run_experiment_case_side",
|
||
),
|
||
)
|
||
|
||
|
||
class EvalRunTurn(Base):
|
||
__tablename__ = "eval_run_turns"
|
||
|
||
id = Column(String, primary_key=True)
|
||
run_id = Column(String, ForeignKey("eval_runs.id"), nullable=False)
|
||
turn_index = Column(Integer, nullable=False)
|
||
user_utterance = Column(Text, nullable=False)
|
||
assistant_reply = Column(Text, nullable=True)
|
||
duration_ms = Column(Integer, nullable=True)
|
||
judge_scores_json = Column(JSON, nullable=True)
|
||
judge_rationale = Column(Text, nullable=True)
|
||
|
||
run = relationship("EvalRun", back_populates="turns")
|
||
|
||
__table_args__ = (
|
||
UniqueConstraint("run_id", "turn_index", name="uq_eval_run_turn_index"),
|
||
)
|
||
|
||
|
||
class EvalGateVerdict(Base):
|
||
__tablename__ = "eval_gate_verdicts"
|
||
|
||
id = Column(String, primary_key=True)
|
||
experiment_id = Column(
|
||
String, ForeignKey("eval_experiments.id"), nullable=False, unique=True
|
||
)
|
||
passed = Column(Boolean, nullable=False)
|
||
mean_composite_delta = Column(Float, nullable=True)
|
||
protected_regressions_json = Column(JSON, nullable=True)
|
||
details_json = Column(JSON, nullable=True)
|
||
computed_at = Column(DateTime(timezone=True), default=utc_now, nullable=False)
|
||
|
||
experiment = relationship("EvalExperiment", back_populates="gate_verdict")
|