Files
life-echo/api/tests/test_eval_judge_llm_spec.py
Sully 53e0065e3e refactor(api): TOML 配置 SSOT、统一错误契约、Auth/事务加固与可观测性 (#33)
配置 SSOT(TOML + .env)
统一错误契约
Auth 与事务边界
Redis / Celery 可靠性:业务 Redis(DB/0)与 Celery broker/backend(DB/1)显式拆分;连接池、sync client
可观测性(OpenTelemetry + LGTM)
2026-05-22 13:44:50 +08:00

76 lines
3.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""评测评审 LLM 装配:多供应商与上下文预算。"""
import pytest
from app.core.config import settings
from app.core.dependencies import build_eval_judge_llm_spec
from app.features.evaluation.constants import eval_cfg
from app.features.evaluation.judge_service import (
eval_judge_compare_transcript_each_max_chars_for_context,
eval_judge_conversation_transcript_max_chars_for_context,
)
def test_build_eval_judge_zhipu_uses_bigmodel_defaults(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(settings, "zhipu_api_key", "z-test")
monkeypatch.setattr(eval_cfg, "judge_model", "glm-5")
spec = build_eval_judge_llm_spec("zhipu", None)
assert spec is not None
assert spec.provider == "zhipu"
assert spec.resolved_model == "glm-5"
assert spec.llm is not None
assert spec.context_window_tokens == eval_cfg.judge_context_window_tokens
def test_build_eval_judge_zhipu_request_model_override(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(settings, "zhipu_api_key", "e-test")
monkeypatch.setattr(eval_cfg, "judge_model", "glm-5")
spec = build_eval_judge_llm_spec("zhipu", "glm-4-plus")
assert spec is not None
assert spec.resolved_model == "glm-4-plus"
def test_build_eval_judge_deepseek_requires_key(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(settings, "deepseek_api_key", "")
assert build_eval_judge_llm_spec("deepseek", None) is None
def test_build_eval_judge_deepseek_v4_flash_non_thinking_default_path(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""默认 deepseek-v4-flash 且关闭 thinking 时显式传 disabled避免 API 默认 enabled"""
monkeypatch.setattr(settings, "deepseek_api_key", "d-test")
monkeypatch.setattr(eval_cfg, "judge_deepseek_model", "deepseek-v4-flash")
monkeypatch.setattr(eval_cfg, "judge_deepseek_thinking_enabled", False)
spec = build_eval_judge_llm_spec("deepseek", None)
assert spec is not None
assert spec.resolved_model == "deepseek-v4-flash"
assert spec.llm.extra_body == {"thinking": {"type": "disabled"}}
assert spec.llm.reasoning_effort is None
def test_build_eval_judge_deepseek_context_budget(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(settings, "deepseek_api_key", "d-test")
monkeypatch.setattr(eval_cfg, "judge_deepseek_model", "deepseek-reasoner")
monkeypatch.setattr(eval_cfg, "judge_deepseek_context_window_tokens", 64_000)
spec = build_eval_judge_llm_spec("deepseek", None)
assert spec is not None
assert spec.provider == "deepseek"
# 旧名 deepseek-reasoner 规范为 v4-flash 思考模式
assert spec.resolved_model == "deepseek-v4-flash"
assert spec.context_window_tokens == 64_000
n = eval_judge_conversation_transcript_max_chars_for_context(64_000)
glm_n = eval_judge_conversation_transcript_max_chars_for_context(200_000)
assert n < glm_n
each_ds = eval_judge_compare_transcript_each_max_chars_for_context(64_000)
each_glm = eval_judge_compare_transcript_each_max_chars_for_context(200_000)
assert each_ds < each_glm