Files
life-echo/api/tests/test_eval_judge_llm_spec.py
yangshilin e1341c6d18 feat:
1. 建立问题库大纲,对应每个人生阶段槽位
2. 鼓励使用更生活化的交流语言共情与总结
3. 降低评审模型可能发生截断的概率
4. 成稿质量维度强化情感表达和上下文连贯性
2026-04-09 15:32:35 +08:00

62 lines
2.4 KiB
Python

"""评测评审 LLM 装配:多供应商与上下文预算。"""
import pytest
from app.core.config import settings
from app.core.dependencies import build_eval_judge_llm_spec
from app.features.evaluation.judge_service import (
eval_judge_compare_transcript_each_max_chars_for_context,
eval_judge_conversation_transcript_max_chars_for_context,
)
def test_build_eval_judge_zhipu_uses_bigmodel_defaults(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(settings, "eval_judge_api_key", "")
monkeypatch.setattr(settings, "zhipu_api_key", "z-test")
monkeypatch.setattr(settings, "eval_judge_model", "glm-5")
spec = build_eval_judge_llm_spec("zhipu", None)
assert spec is not None
assert spec.provider == "zhipu"
assert spec.resolved_model == "glm-5"
assert spec.llm is not None
assert spec.context_window_tokens == settings.eval_judge_context_window_tokens
def test_build_eval_judge_zhipu_request_model_override(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(settings, "eval_judge_api_key", "e-test")
monkeypatch.setattr(settings, "eval_judge_model", "glm-5")
spec = build_eval_judge_llm_spec("zhipu", "glm-4-plus")
assert spec is not None
assert spec.resolved_model == "glm-4-plus"
def test_build_eval_judge_deepseek_requires_key(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(settings, "deepseek_api_key", "")
monkeypatch.setattr(settings, "llm_api_key", "")
assert build_eval_judge_llm_spec("deepseek", None) is None
def test_build_eval_judge_deepseek_context_budget(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(settings, "deepseek_api_key", "d-test")
monkeypatch.setattr(settings, "eval_judge_deepseek_model", "deepseek-reasoner")
monkeypatch.setattr(settings, "eval_judge_deepseek_context_window_tokens", 64_000)
spec = build_eval_judge_llm_spec("deepseek", None)
assert spec is not None
assert spec.provider == "deepseek"
assert spec.resolved_model == "deepseek-reasoner"
assert spec.context_window_tokens == 64_000
n = eval_judge_conversation_transcript_max_chars_for_context(64_000)
glm_n = eval_judge_conversation_transcript_max_chars_for_context(200_000)
assert n < glm_n
each_ds = eval_judge_compare_transcript_each_max_chars_for_context(64_000)
each_glm = eval_judge_compare_transcript_each_max_chars_for_context(200_000)
assert each_ds < each_glm