api/app/adapters/llm/deepseek_eval_judge.py

"""DeepSeek 评测台评审：模型别名解析 + ChatOpenAI 装配。"""

from __future__ import annotations

from langchain_openai import ChatOpenAI

from app.adapters.llm.openai_base_url import normalize_openai_compatible_base_url
from app.core.config import settings
from app.core.eval_judge_spec import EvalJudgeLlmSpec
from app.features.evaluation.constants import eval_cfg
from app.core.runtime_constants import llm_defaults


def resolve_deepseek_eval_judge_model(
    requested: str,
) -> tuple[str, dict | None, str | None]:
    """将模型名（含旧别名）规范为 V4 的 model id、extra_body 与 reasoning_effort。

    官方：deepseek-chat / deepseek-reasoner 将弃用，分别对应 v4-flash 非思考 / 思考。
    """
    m = (requested or "").strip()
    if m == "deepseek-chat":
        return (
            "deepseek-v4-flash",
            {"thinking": {"type": "disabled"}},
            None,
        )
    if m in (
        "deepseek-reasoner",
        "deepseek-r1",
    ):
        return (
            "deepseek-v4-flash",
            {"thinking": {"type": "enabled"}},
            "high",
        )
    if m == "deepseek-v4-pro":
        return ("deepseek-v4-pro", None, "high")
    if m in ("", "deepseek-v4-flash"):
        if eval_cfg.judge_deepseek_thinking_enabled:
            return (
                "deepseek-v4-flash",
                {"thinking": {"type": "enabled"}},
                "high",
            )
        return (
            "deepseek-v4-flash",
            {"thinking": {"type": "disabled"}},
            None,
        )
    if "flash" in m.lower() or m.startswith("deepseek-v4"):
        return (m, None, None)
    return (m, None, None)


def build_deepseek_eval_judge_spec(
    judge_model: str | None,
) -> EvalJudgeLlmSpec | None:
    """密钥缺失时返回 None。"""
    api_key = (settings.deepseek_api_key or "").strip()
    if not api_key:
        return None
    want = (judge_model or "").strip()
    base = normalize_openai_compatible_base_url(
        llm_defaults.deepseek_base_url,
        fallback="https://api.deepseek.com",
    )
    default_m = (eval_cfg.judge_deepseek_model or "deepseek-v4-flash").strip()
    combined = want or default_m
    model, extra, effort = resolve_deepseek_eval_judge_model(combined)
    ctx = int(eval_cfg.judge_deepseek_context_window_tokens)
    llm_kw: dict = {
        "api_key": api_key,
        "base_url": base,
        "model": model,
        "temperature": eval_cfg.judge_temperature,
    }
    if extra is not None:
        llm_kw["extra_body"] = extra
    if effort is not None:
        llm_kw["reasoning_effort"] = effort
    return EvalJudgeLlmSpec(
        llm=ChatOpenAI(**llm_kw),
        provider="deepseek",
        resolved_model=model,
        context_window_tokens=ctx,
    )
-												feat(api): DeepSeek V4 Flash 默认、HTTP 错讯与多供应商分层

- 主链路默认 deepseek-v4-flash，DEEPSEEK_THINKING_ENABLED 对齐旧非思考 chat
- 评测台评审装配迁入 adapters/llm（deepseek_eval_judge、zhipu_eval_judge）与 eval_judge_spec
- 拆分 llm_http_openai_chat_errors 与 llm_errors（DeepSeek/智谱品牌与文档链），llm_call 支持 http_error_vendor
- EvalJudgeService 按 spec.provider 传入 allm_json_call；评测台前端文案改为 V4 Flash
- 更新 .env 示例与 staging/production 的 DEEPSEEK_MODEL；补充 openai/供应商错讯测试

Made-with: Cursor

											
										
										
											2026-04-27 14:34:30 +08:00
+								"""DeepSeek 评测台评审：模型别名解析 + ChatOpenAI 装配。"""
 								from __future__ import annotations
 								from langchain_openai import ChatOpenAI
 								from app.adapters.llm.openai_base_url import normalize_openai_compatible_base_url
 								from app.core.config import settings
 								from app.core.eval_judge_spec import EvalJudgeLlmSpec
-												refactor(api): TOML 配置 SSOT、统一错误契约、Auth/事务加固与可观测性 (#33)

配置 SSOT（TOML + .env）
统一错误契约
Auth 与事务边界
Redis / Celery 可靠性:业务 Redis（DB/0）与 Celery broker/backend（DB/1）显式拆分；连接池、sync client
可观测性（OpenTelemetry + LGTM）
											
										
										
											2026-05-22 13:44:50 +08:00
+								from app.features.evaluation.constants import eval_cfg
 								from app.core.runtime_constants import llm_defaults
-												feat(api): DeepSeek V4 Flash 默认、HTTP 错讯与多供应商分层

- 主链路默认 deepseek-v4-flash，DEEPSEEK_THINKING_ENABLED 对齐旧非思考 chat
- 评测台评审装配迁入 adapters/llm（deepseek_eval_judge、zhipu_eval_judge）与 eval_judge_spec
- 拆分 llm_http_openai_chat_errors 与 llm_errors（DeepSeek/智谱品牌与文档链），llm_call 支持 http_error_vendor
- EvalJudgeService 按 spec.provider 传入 allm_json_call；评测台前端文案改为 V4 Flash
- 更新 .env 示例与 staging/production 的 DEEPSEEK_MODEL；补充 openai/供应商错讯测试

Made-with: Cursor

											
										
										
											2026-04-27 14:34:30 +08:00
 								def resolve_deepseek_eval_judge_model(
 								    requested: str,
 								) -> tuple[str, dict | None, str | None]:
 								    """将模型名（含旧别名）规范为 V4 的 model id、extra_body 与 reasoning_effort。
 								    官方：deepseek-chat / deepseek-reasoner 将弃用，分别对应 v4-flash 非思考 / 思考。
 								    """
 								    m = (requested or "").strip()
 								    if m == "deepseek-chat":
 								        return (
 								            "deepseek-v4-flash",
 								            {"thinking": {"type": "disabled"}},
 								            None,
 								        )
 								    if m in (
 								        "deepseek-reasoner",
 								        "deepseek-r1",
 								    ):
 								        return (
 								            "deepseek-v4-flash",
 								            {"thinking": {"type": "enabled"}},
 								            "high",
 								        )
 								    if m == "deepseek-v4-pro":
 								        return ("deepseek-v4-pro", None, "high")
 								    if m in ("", "deepseek-v4-flash"):
-												refactor(api): TOML 配置 SSOT、统一错误契约、Auth/事务加固与可观测性 (#33)

配置 SSOT（TOML + .env）
统一错误契约
Auth 与事务边界
Redis / Celery 可靠性:业务 Redis（DB/0）与 Celery broker/backend（DB/1）显式拆分；连接池、sync client
可观测性（OpenTelemetry + LGTM）
											
										
										
											2026-05-22 13:44:50 +08:00
+								        if eval_cfg.judge_deepseek_thinking_enabled:
-												feat(api): DeepSeek V4 Flash 默认、HTTP 错讯与多供应商分层

- 主链路默认 deepseek-v4-flash，DEEPSEEK_THINKING_ENABLED 对齐旧非思考 chat
- 评测台评审装配迁入 adapters/llm（deepseek_eval_judge、zhipu_eval_judge）与 eval_judge_spec
- 拆分 llm_http_openai_chat_errors 与 llm_errors（DeepSeek/智谱品牌与文档链），llm_call 支持 http_error_vendor
- EvalJudgeService 按 spec.provider 传入 allm_json_call；评测台前端文案改为 V4 Flash
- 更新 .env 示例与 staging/production 的 DEEPSEEK_MODEL；补充 openai/供应商错讯测试

Made-with: Cursor

											
										
										
											2026-04-27 14:34:30 +08:00
+								            return (
 								                "deepseek-v4-flash",
 								                {"thinking": {"type": "enabled"}},
 								                "high",
 								            )
 								        return (
 								            "deepseek-v4-flash",
 								            {"thinking": {"type": "disabled"}},
 								            None,
 								        )
 								    if "flash" in m.lower() or m.startswith("deepseek-v4"):
 								        return (m, None, None)
 								    return (m, None, None)
 								def build_deepseek_eval_judge_spec(
 								    judge_model: str | None,
 								) -> EvalJudgeLlmSpec | None:
 								    """密钥缺失时返回 None。"""
-												refactor(api): TOML 配置 SSOT、统一错误契约、Auth/事务加固与可观测性 (#33)

配置 SSOT（TOML + .env）
统一错误契约
Auth 与事务边界
Redis / Celery 可靠性:业务 Redis（DB/0）与 Celery broker/backend（DB/1）显式拆分；连接池、sync client
可观测性（OpenTelemetry + LGTM）
											
										
										
											2026-05-22 13:44:50 +08:00
+								    api_key = (settings.deepseek_api_key or "").strip()
-												feat(api): DeepSeek V4 Flash 默认、HTTP 错讯与多供应商分层

- 主链路默认 deepseek-v4-flash，DEEPSEEK_THINKING_ENABLED 对齐旧非思考 chat
- 评测台评审装配迁入 adapters/llm（deepseek_eval_judge、zhipu_eval_judge）与 eval_judge_spec
- 拆分 llm_http_openai_chat_errors 与 llm_errors（DeepSeek/智谱品牌与文档链），llm_call 支持 http_error_vendor
- EvalJudgeService 按 spec.provider 传入 allm_json_call；评测台前端文案改为 V4 Flash
- 更新 .env 示例与 staging/production 的 DEEPSEEK_MODEL；补充 openai/供应商错讯测试

Made-with: Cursor

											
										
										
											2026-04-27 14:34:30 +08:00
+								    if not api_key:
 								        return None
 								    want = (judge_model or "").strip()
 								    base = normalize_openai_compatible_base_url(
-												refactor(api): TOML 配置 SSOT、统一错误契约、Auth/事务加固与可观测性 (#33)

配置 SSOT（TOML + .env）
统一错误契约
Auth 与事务边界
Redis / Celery 可靠性:业务 Redis（DB/0）与 Celery broker/backend（DB/1）显式拆分；连接池、sync client
可观测性（OpenTelemetry + LGTM）
											
										
										
											2026-05-22 13:44:50 +08:00
+								        llm_defaults.deepseek_base_url,
-												feat(api): DeepSeek V4 Flash 默认、HTTP 错讯与多供应商分层

- 主链路默认 deepseek-v4-flash，DEEPSEEK_THINKING_ENABLED 对齐旧非思考 chat
- 评测台评审装配迁入 adapters/llm（deepseek_eval_judge、zhipu_eval_judge）与 eval_judge_spec
- 拆分 llm_http_openai_chat_errors 与 llm_errors（DeepSeek/智谱品牌与文档链），llm_call 支持 http_error_vendor
- EvalJudgeService 按 spec.provider 传入 allm_json_call；评测台前端文案改为 V4 Flash
- 更新 .env 示例与 staging/production 的 DEEPSEEK_MODEL；补充 openai/供应商错讯测试

Made-with: Cursor

											
										
										
											2026-04-27 14:34:30 +08:00
+								        fallback="https://api.deepseek.com",
 								    )
-												refactor(api): TOML 配置 SSOT、统一错误契约、Auth/事务加固与可观测性 (#33)

配置 SSOT（TOML + .env）
统一错误契约
Auth 与事务边界
Redis / Celery 可靠性:业务 Redis（DB/0）与 Celery broker/backend（DB/1）显式拆分；连接池、sync client
可观测性（OpenTelemetry + LGTM）
											
										
										
											2026-05-22 13:44:50 +08:00
+								    default_m = (eval_cfg.judge_deepseek_model or "deepseek-v4-flash").strip()
-												feat(api): DeepSeek V4 Flash 默认、HTTP 错讯与多供应商分层

- 主链路默认 deepseek-v4-flash，DEEPSEEK_THINKING_ENABLED 对齐旧非思考 chat
- 评测台评审装配迁入 adapters/llm（deepseek_eval_judge、zhipu_eval_judge）与 eval_judge_spec
- 拆分 llm_http_openai_chat_errors 与 llm_errors（DeepSeek/智谱品牌与文档链），llm_call 支持 http_error_vendor
- EvalJudgeService 按 spec.provider 传入 allm_json_call；评测台前端文案改为 V4 Flash
- 更新 .env 示例与 staging/production 的 DEEPSEEK_MODEL；补充 openai/供应商错讯测试

Made-with: Cursor

											
										
										
											2026-04-27 14:34:30 +08:00
+								    combined = want or default_m
 								    model, extra, effort = resolve_deepseek_eval_judge_model(combined)
-												refactor(api): TOML 配置 SSOT、统一错误契约、Auth/事务加固与可观测性 (#33)

配置 SSOT（TOML + .env）
统一错误契约
Auth 与事务边界
Redis / Celery 可靠性:业务 Redis（DB/0）与 Celery broker/backend（DB/1）显式拆分；连接池、sync client
可观测性（OpenTelemetry + LGTM）
											
										
										
											2026-05-22 13:44:50 +08:00
+								    ctx = int(eval_cfg.judge_deepseek_context_window_tokens)
-												feat(api): DeepSeek V4 Flash 默认、HTTP 错讯与多供应商分层

- 主链路默认 deepseek-v4-flash，DEEPSEEK_THINKING_ENABLED 对齐旧非思考 chat
- 评测台评审装配迁入 adapters/llm（deepseek_eval_judge、zhipu_eval_judge）与 eval_judge_spec
- 拆分 llm_http_openai_chat_errors 与 llm_errors（DeepSeek/智谱品牌与文档链），llm_call 支持 http_error_vendor
- EvalJudgeService 按 spec.provider 传入 allm_json_call；评测台前端文案改为 V4 Flash
- 更新 .env 示例与 staging/production 的 DEEPSEEK_MODEL；补充 openai/供应商错讯测试

Made-with: Cursor

											
										
										
											2026-04-27 14:34:30 +08:00
+								    llm_kw: dict = {
 								        "api_key": api_key,
 								        "base_url": base,
 								        "model": model,
-												refactor(api): TOML 配置 SSOT、统一错误契约、Auth/事务加固与可观测性 (#33)

配置 SSOT（TOML + .env）
统一错误契约
Auth 与事务边界
Redis / Celery 可靠性:业务 Redis（DB/0）与 Celery broker/backend（DB/1）显式拆分；连接池、sync client
可观测性（OpenTelemetry + LGTM）
											
										
										
											2026-05-22 13:44:50 +08:00
+								        "temperature": eval_cfg.judge_temperature,
-												feat(api): DeepSeek V4 Flash 默认、HTTP 错讯与多供应商分层

- 主链路默认 deepseek-v4-flash，DEEPSEEK_THINKING_ENABLED 对齐旧非思考 chat
- 评测台评审装配迁入 adapters/llm（deepseek_eval_judge、zhipu_eval_judge）与 eval_judge_spec
- 拆分 llm_http_openai_chat_errors 与 llm_errors（DeepSeek/智谱品牌与文档链），llm_call 支持 http_error_vendor
- EvalJudgeService 按 spec.provider 传入 allm_json_call；评测台前端文案改为 V4 Flash
- 更新 .env 示例与 staging/production 的 DEEPSEEK_MODEL；补充 openai/供应商错讯测试

Made-with: Cursor

											
										
										
											2026-04-27 14:34:30 +08:00
+								    }
 								    if extra is not None:
 								        llm_kw["extra_body"] = extra
 								    if effort is not None:
 								        llm_kw["reasoning_effort"] = effort
 								    return EvalJudgeLlmSpec(
 								        llm=ChatOpenAI(**llm_kw),
 								        provider="deepseek",
 								        resolved_model=model,
 								        context_window_tokens=ctx,
 								    )