- 访谈:新增 interview_state_hints,联动 orchestrator 与提示词 - 回忆录:story_pipeline_sync/state/memory/post_commit 与 Celery 任务调整 - 基建:开发用 celery broker、compose/development 脚本、依赖注入 - eval-web:移除数据集/实验/版本等页面与流式轮询,突出 Playground - 文档与单测同步
54 lines
2.4 KiB
Python
54 lines
2.4 KiB
Python
"""评测评审 LLM 装配:多供应商与上下文预算。"""
|
|
|
|
import pytest
|
|
|
|
from app.core.config import settings
|
|
from app.core.dependencies import build_eval_judge_llm_spec
|
|
from app.features.evaluation.judge_service import (
|
|
eval_judge_compare_transcript_each_max_chars_for_context,
|
|
eval_judge_conversation_transcript_max_chars_for_context,
|
|
)
|
|
|
|
|
|
def test_build_eval_judge_zhipu_uses_bigmodel_defaults(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
monkeypatch.setattr(settings, "eval_judge_api_key", "")
|
|
monkeypatch.setattr(settings, "zhipu_api_key", "z-test")
|
|
monkeypatch.setattr(settings, "eval_judge_model", "glm-5")
|
|
spec = build_eval_judge_llm_spec("zhipu", None)
|
|
assert spec is not None
|
|
assert spec.provider == "zhipu"
|
|
assert spec.resolved_model == "glm-5"
|
|
assert spec.llm is not None
|
|
assert spec.context_window_tokens == settings.eval_judge_context_window_tokens
|
|
|
|
|
|
def test_build_eval_judge_zhipu_request_model_override(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
monkeypatch.setattr(settings, "eval_judge_api_key", "e-test")
|
|
monkeypatch.setattr(settings, "eval_judge_model", "glm-5")
|
|
spec = build_eval_judge_llm_spec("zhipu", "glm-4-plus")
|
|
assert spec is not None
|
|
assert spec.resolved_model == "glm-4-plus"
|
|
|
|
|
|
def test_build_eval_judge_deepseek_requires_key(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
monkeypatch.setattr(settings, "deepseek_api_key", "")
|
|
monkeypatch.setattr(settings, "llm_api_key", "")
|
|
assert build_eval_judge_llm_spec("deepseek", None) is None
|
|
|
|
|
|
def test_build_eval_judge_deepseek_context_budget(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
monkeypatch.setattr(settings, "deepseek_api_key", "d-test")
|
|
monkeypatch.setattr(settings, "eval_judge_deepseek_model", "deepseek-reasoner")
|
|
monkeypatch.setattr(settings, "eval_judge_deepseek_context_window_tokens", 64_000)
|
|
spec = build_eval_judge_llm_spec("deepseek", None)
|
|
assert spec is not None
|
|
assert spec.provider == "deepseek"
|
|
assert spec.resolved_model == "deepseek-reasoner"
|
|
assert spec.context_window_tokens == 64_000
|
|
n = eval_judge_conversation_transcript_max_chars_for_context(64_000)
|
|
glm_n = eval_judge_conversation_transcript_max_chars_for_context(200_000)
|
|
assert n < glm_n
|
|
each_ds = eval_judge_compare_transcript_each_max_chars_for_context(64_000)
|
|
each_glm = eval_judge_compare_transcript_each_max_chars_for_context(200_000)
|
|
assert each_ds < each_glm
|