Files
life-echo/api/app/features/evaluation/models.py

195 lines
6.5 KiB
Python
Raw Normal View History

"""ORM内部回归评测与生产 conversation 表隔离)。"""
from __future__ import annotations
from sqlalchemy import (
JSON,
Boolean,
Column,
DateTime,
Float,
ForeignKey,
Integer,
String,
Text,
UniqueConstraint,
)
from sqlalchemy.orm import relationship
from app.core.db import Base, utc_now
class EvalRegressionSet(Base):
__tablename__ = "eval_regression_sets"
id = Column(String, primary_key=True)
name = Column(String, nullable=False)
description = Column(Text, nullable=True)
created_at = Column(DateTime(timezone=True), default=utc_now, nullable=False)
cases = relationship(
"EvalCase",
back_populates="regression_set",
cascade="all, delete-orphan",
)
experiments = relationship("EvalExperiment", back_populates="regression_set")
class EvalCase(Base):
"""从真实会话快照或导入脚本的不可变用户轮次列表。"""
__tablename__ = "eval_cases"
id = Column(String, primary_key=True)
regression_set_id = Column(
String, ForeignKey("eval_regression_sets.id"), nullable=False
)
source_conversation_id = Column(String, nullable=True, index=True)
source_user_id = Column(String, nullable=True, index=True)
title = Column(String, nullable=True)
user_utterances = Column(JSON, nullable=False)
reference_memoir_markdown = Column(Text, nullable=True)
is_protected = Column(
Boolean, nullable=False, default=False, server_default="false"
)
meta = Column(JSON, nullable=True)
created_at = Column(DateTime(timezone=True), default=utc_now, nullable=False)
regression_set = relationship("EvalRegressionSet", back_populates="cases")
runs = relationship("EvalRun", back_populates="case")
class EvalVersion(Base):
"""基线或候选回放配置模型、system 补充等)。"""
__tablename__ = "eval_versions"
id = Column(String, primary_key=True)
name = Column(String, nullable=False)
runner_kind = Column(String, nullable=False, default="llm_chat_v1")
config_json = Column(JSON, nullable=True)
created_at = Column(DateTime(timezone=True), default=utc_now, nullable=False)
experiments_as_baseline = relationship(
"EvalExperiment",
foreign_keys="EvalExperiment.baseline_version_id",
back_populates="baseline_version",
)
experiments_as_candidate = relationship(
"EvalExperiment",
foreign_keys="EvalExperiment.candidate_version_id",
back_populates="candidate_version",
)
class EvalExperiment(Base):
__tablename__ = "eval_experiments"
id = Column(String, primary_key=True)
name = Column(String, nullable=False)
regression_set_id = Column(
String, ForeignKey("eval_regression_sets.id"), nullable=False
)
baseline_version_id = Column(String, ForeignKey("eval_versions.id"), nullable=False)
candidate_version_id = Column(
String, ForeignKey("eval_versions.id"), nullable=False
)
rubric_pack = Column(String, nullable=False, default="conversation_v1+memoir_v1")
composite_weights_json = Column(JSON, nullable=True)
status = Column(String, nullable=False, default="pending")
error_message = Column(Text, nullable=True)
created_at = Column(DateTime(timezone=True), default=utc_now, nullable=False)
completed_at = Column(DateTime(timezone=True), nullable=True)
regression_set = relationship("EvalRegressionSet", back_populates="experiments")
baseline_version = relationship(
"EvalVersion",
foreign_keys=[baseline_version_id],
)
candidate_version = relationship(
"EvalVersion",
foreign_keys=[candidate_version_id],
)
runs = relationship(
"EvalRun", back_populates="experiment", cascade="all, delete-orphan"
)
gate_verdict = relationship(
"EvalGateVerdict",
back_populates="experiment",
uselist=False,
cascade="all, delete-orphan",
)
class EvalRun(Base):
"""单次:某 experiment × 某 case × baseline 或 candidate。"""
__tablename__ = "eval_runs"
id = Column(String, primary_key=True)
experiment_id = Column(String, ForeignKey("eval_experiments.id"), nullable=False)
case_id = Column(String, ForeignKey("eval_cases.id"), nullable=False)
side = Column(String, nullable=False)
status = Column(String, nullable=False, default="pending")
error_message = Column(Text, nullable=True)
memoir_markdown = Column(Text, nullable=True)
conversation_score_total = Column(Float, nullable=True)
memoir_score_total = Column(Float, nullable=True)
composite_score = Column(Float, nullable=True)
judge_bundle_json = Column(JSON, nullable=True)
started_at = Column(DateTime(timezone=True), nullable=True)
completed_at = Column(DateTime(timezone=True), nullable=True)
experiment = relationship("EvalExperiment", back_populates="runs")
case = relationship("EvalCase", back_populates="runs")
turns = relationship(
"EvalRunTurn",
back_populates="run",
cascade="all, delete-orphan",
order_by="EvalRunTurn.turn_index",
)
__table_args__ = (
UniqueConstraint(
"experiment_id",
"case_id",
"side",
name="uq_eval_run_experiment_case_side",
),
)
class EvalRunTurn(Base):
__tablename__ = "eval_run_turns"
id = Column(String, primary_key=True)
run_id = Column(String, ForeignKey("eval_runs.id"), nullable=False)
turn_index = Column(Integer, nullable=False)
user_utterance = Column(Text, nullable=False)
assistant_reply = Column(Text, nullable=True)
duration_ms = Column(Integer, nullable=True)
judge_scores_json = Column(JSON, nullable=True)
judge_rationale = Column(Text, nullable=True)
run = relationship("EvalRun", back_populates="turns")
__table_args__ = (
UniqueConstraint("run_id", "turn_index", name="uq_eval_run_turn_index"),
)
class EvalGateVerdict(Base):
__tablename__ = "eval_gate_verdicts"
id = Column(String, primary_key=True)
experiment_id = Column(
String, ForeignKey("eval_experiments.id"), nullable=False, unique=True
)
passed = Column(Boolean, nullable=False)
mean_composite_delta = Column(Float, nullable=True)
protected_regressions_json = Column(JSON, nullable=True)
details_json = Column(JSON, nullable=True)
computed_at = Column(DateTime(timezone=True), default=utc_now, nullable=False)
experiment = relationship("EvalExperiment", back_populates="gate_verdict")