Route all memory ingest/retrieve/enrichment/compaction through async MemoryService. Remove legacy sync memory implementations (ingest/retrieve/compaction); Celery and memoir Phase2 call asyncio.run into MemoryService-backed helpers. Memoir Phase1 batch ingest uses MemoryService.ingest_transcripts_batch; drop chapters. evidence_bundle_json mirror (Alembic 0015). Evaluation uses snapshot/link-only bundles; raise EvidenceClosureMissing instead of partial/fallback lineage tiers. Split memoir state into NarrativeCoverageState and InterviewControlState; delete the _interview_meta_store adapter layer. Remove rolling-query and recent-fact fallback settings from config and evidence assembly. Update judges, docs, tests, and PlaygroundPage alignment. Made-with: Cursor
77 lines
3.2 KiB
Python
77 lines
3.2 KiB
Python
"""评测评审 LLM 装配:多供应商与上下文预算。"""
|
||
|
||
import pytest
|
||
|
||
from app.core.config import settings
|
||
from app.core.dependencies import build_eval_judge_llm_spec
|
||
from app.features.evaluation.judge_service import (
|
||
eval_judge_compare_transcript_each_max_chars_for_context,
|
||
eval_judge_conversation_transcript_max_chars_for_context,
|
||
)
|
||
|
||
|
||
def test_build_eval_judge_zhipu_uses_bigmodel_defaults(
|
||
monkeypatch: pytest.MonkeyPatch,
|
||
) -> None:
|
||
monkeypatch.setattr(settings, "eval_judge_api_key", "")
|
||
monkeypatch.setattr(settings, "zhipu_api_key", "z-test")
|
||
monkeypatch.setattr(settings, "eval_judge_model", "glm-5")
|
||
spec = build_eval_judge_llm_spec("zhipu", None)
|
||
assert spec is not None
|
||
assert spec.provider == "zhipu"
|
||
assert spec.resolved_model == "glm-5"
|
||
assert spec.llm is not None
|
||
assert spec.context_window_tokens == settings.eval_judge_context_window_tokens
|
||
|
||
|
||
def test_build_eval_judge_zhipu_request_model_override(
|
||
monkeypatch: pytest.MonkeyPatch,
|
||
) -> None:
|
||
monkeypatch.setattr(settings, "eval_judge_api_key", "e-test")
|
||
monkeypatch.setattr(settings, "eval_judge_model", "glm-5")
|
||
spec = build_eval_judge_llm_spec("zhipu", "glm-4-plus")
|
||
assert spec is not None
|
||
assert spec.resolved_model == "glm-4-plus"
|
||
|
||
|
||
def test_build_eval_judge_deepseek_requires_key(
|
||
monkeypatch: pytest.MonkeyPatch,
|
||
) -> None:
|
||
monkeypatch.setattr(settings, "deepseek_api_key", "")
|
||
monkeypatch.setattr(settings, "llm_api_key", "")
|
||
assert build_eval_judge_llm_spec("deepseek", None) is None
|
||
|
||
|
||
def test_build_eval_judge_deepseek_v4_flash_non_thinking_default_path(
|
||
monkeypatch: pytest.MonkeyPatch,
|
||
) -> None:
|
||
"""默认 deepseek-v4-flash 且关闭 thinking 时显式传 disabled(避免 API 默认 enabled)。"""
|
||
monkeypatch.setattr(settings, "deepseek_api_key", "d-test")
|
||
monkeypatch.setattr(settings, "eval_judge_deepseek_model", "deepseek-v4-flash")
|
||
monkeypatch.setattr(settings, "eval_judge_deepseek_thinking_enabled", False)
|
||
spec = build_eval_judge_llm_spec("deepseek", None)
|
||
assert spec is not None
|
||
assert spec.resolved_model == "deepseek-v4-flash"
|
||
assert spec.llm.extra_body == {"thinking": {"type": "disabled"}}
|
||
assert spec.llm.reasoning_effort is None
|
||
|
||
|
||
def test_build_eval_judge_deepseek_context_budget(
|
||
monkeypatch: pytest.MonkeyPatch,
|
||
) -> None:
|
||
monkeypatch.setattr(settings, "deepseek_api_key", "d-test")
|
||
monkeypatch.setattr(settings, "eval_judge_deepseek_model", "deepseek-reasoner")
|
||
monkeypatch.setattr(settings, "eval_judge_deepseek_context_window_tokens", 64_000)
|
||
spec = build_eval_judge_llm_spec("deepseek", None)
|
||
assert spec is not None
|
||
assert spec.provider == "deepseek"
|
||
# 旧名 deepseek-reasoner 规范为 v4-flash 思考模式
|
||
assert spec.resolved_model == "deepseek-v4-flash"
|
||
assert spec.context_window_tokens == 64_000
|
||
n = eval_judge_conversation_transcript_max_chars_for_context(64_000)
|
||
glm_n = eval_judge_conversation_transcript_max_chars_for_context(200_000)
|
||
assert n < glm_n
|
||
each_ds = eval_judge_compare_transcript_each_max_chars_for_context(64_000)
|
||
each_glm = eval_judge_compare_transcript_each_max_chars_for_context(200_000)
|
||
assert each_ds < each_glm
|