feat/ 导出开发容器内的数据用于评估
This commit is contained in:
194
api/app/features/evaluation/models.py
Normal file
194
api/app/features/evaluation/models.py
Normal file
@@ -0,0 +1,194 @@
|
||||
"""ORM:内部回归评测(与生产 conversation 表隔离)。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from sqlalchemy import (
|
||||
JSON,
|
||||
Boolean,
|
||||
Column,
|
||||
DateTime,
|
||||
Float,
|
||||
ForeignKey,
|
||||
Integer,
|
||||
String,
|
||||
Text,
|
||||
UniqueConstraint,
|
||||
)
|
||||
from sqlalchemy.orm import relationship
|
||||
|
||||
from app.core.db import Base, utc_now
|
||||
|
||||
|
||||
class EvalRegressionSet(Base):
|
||||
__tablename__ = "eval_regression_sets"
|
||||
|
||||
id = Column(String, primary_key=True)
|
||||
name = Column(String, nullable=False)
|
||||
description = Column(Text, nullable=True)
|
||||
created_at = Column(DateTime(timezone=True), default=utc_now, nullable=False)
|
||||
|
||||
cases = relationship(
|
||||
"EvalCase",
|
||||
back_populates="regression_set",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
experiments = relationship("EvalExperiment", back_populates="regression_set")
|
||||
|
||||
|
||||
class EvalCase(Base):
|
||||
"""从真实会话快照或导入脚本的不可变用户轮次列表。"""
|
||||
|
||||
__tablename__ = "eval_cases"
|
||||
|
||||
id = Column(String, primary_key=True)
|
||||
regression_set_id = Column(
|
||||
String, ForeignKey("eval_regression_sets.id"), nullable=False
|
||||
)
|
||||
source_conversation_id = Column(String, nullable=True, index=True)
|
||||
source_user_id = Column(String, nullable=True, index=True)
|
||||
title = Column(String, nullable=True)
|
||||
user_utterances = Column(JSON, nullable=False)
|
||||
reference_memoir_markdown = Column(Text, nullable=True)
|
||||
is_protected = Column(
|
||||
Boolean, nullable=False, default=False, server_default="false"
|
||||
)
|
||||
meta = Column(JSON, nullable=True)
|
||||
created_at = Column(DateTime(timezone=True), default=utc_now, nullable=False)
|
||||
|
||||
regression_set = relationship("EvalRegressionSet", back_populates="cases")
|
||||
runs = relationship("EvalRun", back_populates="case")
|
||||
|
||||
|
||||
class EvalVersion(Base):
|
||||
"""基线或候选:回放配置(模型、system 补充等)。"""
|
||||
|
||||
__tablename__ = "eval_versions"
|
||||
|
||||
id = Column(String, primary_key=True)
|
||||
name = Column(String, nullable=False)
|
||||
runner_kind = Column(String, nullable=False, default="llm_chat_v1")
|
||||
config_json = Column(JSON, nullable=True)
|
||||
created_at = Column(DateTime(timezone=True), default=utc_now, nullable=False)
|
||||
|
||||
experiments_as_baseline = relationship(
|
||||
"EvalExperiment",
|
||||
foreign_keys="EvalExperiment.baseline_version_id",
|
||||
back_populates="baseline_version",
|
||||
)
|
||||
experiments_as_candidate = relationship(
|
||||
"EvalExperiment",
|
||||
foreign_keys="EvalExperiment.candidate_version_id",
|
||||
back_populates="candidate_version",
|
||||
)
|
||||
|
||||
|
||||
class EvalExperiment(Base):
|
||||
__tablename__ = "eval_experiments"
|
||||
|
||||
id = Column(String, primary_key=True)
|
||||
name = Column(String, nullable=False)
|
||||
regression_set_id = Column(
|
||||
String, ForeignKey("eval_regression_sets.id"), nullable=False
|
||||
)
|
||||
baseline_version_id = Column(String, ForeignKey("eval_versions.id"), nullable=False)
|
||||
candidate_version_id = Column(
|
||||
String, ForeignKey("eval_versions.id"), nullable=False
|
||||
)
|
||||
rubric_pack = Column(String, nullable=False, default="conversation_v1+memoir_v1")
|
||||
composite_weights_json = Column(JSON, nullable=True)
|
||||
status = Column(String, nullable=False, default="pending")
|
||||
error_message = Column(Text, nullable=True)
|
||||
created_at = Column(DateTime(timezone=True), default=utc_now, nullable=False)
|
||||
completed_at = Column(DateTime(timezone=True), nullable=True)
|
||||
|
||||
regression_set = relationship("EvalRegressionSet", back_populates="experiments")
|
||||
baseline_version = relationship(
|
||||
"EvalVersion",
|
||||
foreign_keys=[baseline_version_id],
|
||||
)
|
||||
candidate_version = relationship(
|
||||
"EvalVersion",
|
||||
foreign_keys=[candidate_version_id],
|
||||
)
|
||||
runs = relationship(
|
||||
"EvalRun", back_populates="experiment", cascade="all, delete-orphan"
|
||||
)
|
||||
gate_verdict = relationship(
|
||||
"EvalGateVerdict",
|
||||
back_populates="experiment",
|
||||
uselist=False,
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
|
||||
|
||||
class EvalRun(Base):
|
||||
"""单次:某 experiment × 某 case × baseline 或 candidate。"""
|
||||
|
||||
__tablename__ = "eval_runs"
|
||||
|
||||
id = Column(String, primary_key=True)
|
||||
experiment_id = Column(String, ForeignKey("eval_experiments.id"), nullable=False)
|
||||
case_id = Column(String, ForeignKey("eval_cases.id"), nullable=False)
|
||||
side = Column(String, nullable=False)
|
||||
status = Column(String, nullable=False, default="pending")
|
||||
error_message = Column(Text, nullable=True)
|
||||
memoir_markdown = Column(Text, nullable=True)
|
||||
conversation_score_total = Column(Float, nullable=True)
|
||||
memoir_score_total = Column(Float, nullable=True)
|
||||
composite_score = Column(Float, nullable=True)
|
||||
judge_bundle_json = Column(JSON, nullable=True)
|
||||
started_at = Column(DateTime(timezone=True), nullable=True)
|
||||
completed_at = Column(DateTime(timezone=True), nullable=True)
|
||||
|
||||
experiment = relationship("EvalExperiment", back_populates="runs")
|
||||
case = relationship("EvalCase", back_populates="runs")
|
||||
turns = relationship(
|
||||
"EvalRunTurn",
|
||||
back_populates="run",
|
||||
cascade="all, delete-orphan",
|
||||
order_by="EvalRunTurn.turn_index",
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"experiment_id",
|
||||
"case_id",
|
||||
"side",
|
||||
name="uq_eval_run_experiment_case_side",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class EvalRunTurn(Base):
|
||||
__tablename__ = "eval_run_turns"
|
||||
|
||||
id = Column(String, primary_key=True)
|
||||
run_id = Column(String, ForeignKey("eval_runs.id"), nullable=False)
|
||||
turn_index = Column(Integer, nullable=False)
|
||||
user_utterance = Column(Text, nullable=False)
|
||||
assistant_reply = Column(Text, nullable=True)
|
||||
duration_ms = Column(Integer, nullable=True)
|
||||
judge_scores_json = Column(JSON, nullable=True)
|
||||
judge_rationale = Column(Text, nullable=True)
|
||||
|
||||
run = relationship("EvalRun", back_populates="turns")
|
||||
|
||||
__table_args__ = (
|
||||
UniqueConstraint("run_id", "turn_index", name="uq_eval_run_turn_index"),
|
||||
)
|
||||
|
||||
|
||||
class EvalGateVerdict(Base):
|
||||
__tablename__ = "eval_gate_verdicts"
|
||||
|
||||
id = Column(String, primary_key=True)
|
||||
experiment_id = Column(
|
||||
String, ForeignKey("eval_experiments.id"), nullable=False, unique=True
|
||||
)
|
||||
passed = Column(Boolean, nullable=False)
|
||||
mean_composite_delta = Column(Float, nullable=True)
|
||||
protected_regressions_json = Column(JSON, nullable=True)
|
||||
details_json = Column(JSON, nullable=True)
|
||||
computed_at = Column(DateTime(timezone=True), default=utc_now, nullable=False)
|
||||
|
||||
experiment = relationship("EvalExperiment", back_populates="gate_verdict")
|
||||
Reference in New Issue
Block a user