diff --git a/api/.env.example b/api/.env.example
index b1012eb..f5ba274 100644
--- a/api/.env.example
+++ b/api/.env.example
@@ -44,7 +44,10 @@ LOG_LEVEL=INFO
# =============================================================================
DEEPSEEK_API_KEY=your_deepseek_api_key
DEEPSEEK_BASE_URL=https://api.deepseek.com
-DEEPSEEK_MODEL=deepseek-chat
+# 官方新模型名见 https://api-docs.deepseek.com/zh-cn/quick_start/pricing
+DEEPSEEK_MODEL=deepseek-v4-flash
+# v4-flash 主链路非思考须显式关(对齐旧版 deepseek-chat;默认 false)
+# DEEPSEEK_THINKING_ENABLED=false
# =============================================================================
# Memory 向量(智谱 BigModel 国内 embedding-3;与 DeepSeek/OpenAI 用途分离)
diff --git a/api/.env.production b/api/.env.production
index 3022b1a..3ab9b2a 100644
--- a/api/.env.production
+++ b/api/.env.production
@@ -38,7 +38,7 @@ LOG_LEVEL=INFO
# =============================================================================
DEEPSEEK_API_KEY=sk-09f17fb61c5a4299a3afc2a01de7af75
DEEPSEEK_BASE_URL=https://api.deepseek.com
-DEEPSEEK_MODEL=deepseek-chat
+DEEPSEEK_MODEL=deepseek-v4-flash
# =============================================================================
# Memory 向量(智谱 BigModel 国内 embedding-3;与 DeepSeek/OpenAI 用途分离)
diff --git a/api/.env.staging b/api/.env.staging
index 5a8ab42..81dc5d2 100644
--- a/api/.env.staging
+++ b/api/.env.staging
@@ -31,7 +31,7 @@ LOG_LEVEL=INFO
# =============================================================================
DEEPSEEK_API_KEY=your_deepseek_api_key
DEEPSEEK_BASE_URL=https://api.deepseek.com
-DEEPSEEK_MODEL=deepseek-chat
+DEEPSEEK_MODEL=deepseek-v4-flash
# =============================================================================
# Database
diff --git a/api/app/adapters/llm/deepseek.py b/api/app/adapters/llm/deepseek.py
index 4565611..2245b97 100644
--- a/api/app/adapters/llm/deepseek.py
+++ b/api/app/adapters/llm/deepseek.py
@@ -18,8 +18,10 @@ class DeepSeekLLMProvider:
self,
api_key: str,
base_url: str = "https://api.deepseek.com",
- model: str = "deepseek-chat",
+ model: str = "deepseek-v4-flash",
temperature: float = 0.7,
+ *,
+ extra_body: dict | None = None,
):
self._default_model = model
self._default_temperature = temperature
@@ -28,6 +30,8 @@ class DeepSeekLLMProvider:
"model": model,
"api_key": api_key,
}
+ if extra_body:
+ kwargs["extra_body"] = extra_body
if base_url:
cleaned = base_url.rstrip("/")
for suffix in ("/v1/chat/completions", "/v1"):
diff --git a/api/app/adapters/llm/deepseek_eval_judge.py b/api/app/adapters/llm/deepseek_eval_judge.py
new file mode 100644
index 0000000..f7ca992
--- /dev/null
+++ b/api/app/adapters/llm/deepseek_eval_judge.py
@@ -0,0 +1,85 @@
+"""DeepSeek 评测台评审:模型别名解析 + ChatOpenAI 装配。"""
+
+from __future__ import annotations
+
+from langchain_openai import ChatOpenAI
+
+from app.adapters.llm.openai_base_url import normalize_openai_compatible_base_url
+from app.core.config import settings
+from app.core.eval_judge_spec import EvalJudgeLlmSpec
+
+
+def resolve_deepseek_eval_judge_model(
+ requested: str,
+) -> tuple[str, dict | None, str | None]:
+ """将模型名(含旧别名)规范为 V4 的 model id、extra_body 与 reasoning_effort。
+
+ 官方:deepseek-chat / deepseek-reasoner 将弃用,分别对应 v4-flash 非思考 / 思考。
+ """
+ m = (requested or "").strip()
+ if m == "deepseek-chat":
+ return (
+ "deepseek-v4-flash",
+ {"thinking": {"type": "disabled"}},
+ None,
+ )
+ if m in (
+ "deepseek-reasoner",
+ "deepseek-r1",
+ ):
+ return (
+ "deepseek-v4-flash",
+ {"thinking": {"type": "enabled"}},
+ "high",
+ )
+ if m == "deepseek-v4-pro":
+ return ("deepseek-v4-pro", None, "high")
+ if m in ("", "deepseek-v4-flash"):
+ if settings.eval_judge_deepseek_thinking_enabled:
+ return (
+ "deepseek-v4-flash",
+ {"thinking": {"type": "enabled"}},
+ "high",
+ )
+ return (
+ "deepseek-v4-flash",
+ {"thinking": {"type": "disabled"}},
+ None,
+ )
+ if "flash" in m.lower() or m.startswith("deepseek-v4"):
+ return (m, None, None)
+ return (m, None, None)
+
+
+def build_deepseek_eval_judge_spec(
+ judge_model: str | None,
+) -> EvalJudgeLlmSpec | None:
+ """密钥缺失时返回 None。"""
+ api_key = (settings.deepseek_api_key or settings.llm_api_key or "").strip()
+ if not api_key:
+ return None
+ want = (judge_model or "").strip()
+ base = normalize_openai_compatible_base_url(
+ settings.deepseek_base_url,
+ fallback="https://api.deepseek.com",
+ )
+ default_m = (settings.eval_judge_deepseek_model or "deepseek-v4-flash").strip()
+ combined = want or default_m
+ model, extra, effort = resolve_deepseek_eval_judge_model(combined)
+ ctx = int(settings.eval_judge_deepseek_context_window_tokens)
+ llm_kw: dict = {
+ "api_key": api_key,
+ "base_url": base,
+ "model": model,
+ "temperature": settings.eval_judge_temperature,
+ }
+ if extra is not None:
+ llm_kw["extra_body"] = extra
+ if effort is not None:
+ llm_kw["reasoning_effort"] = effort
+ return EvalJudgeLlmSpec(
+ llm=ChatOpenAI(**llm_kw),
+ provider="deepseek",
+ resolved_model=model,
+ context_window_tokens=ctx,
+ )
diff --git a/api/app/adapters/llm/openai_base_url.py b/api/app/adapters/llm/openai_base_url.py
new file mode 100644
index 0000000..878e072
--- /dev/null
+++ b/api/app/adapters/llm/openai_base_url.py
@@ -0,0 +1,9 @@
+"""OpenAI/Chat Completions 兼容基址规范化(多供应商共用)。"""
+
+
+def normalize_openai_compatible_base_url(raw: str, *, fallback: str) -> str:
+ base = (raw or "").strip().rstrip("/") or fallback
+ for suffix in ("/v1/chat/completions", "/v1"):
+ if base.endswith(suffix):
+ base = base[: -len(suffix)]
+ return base
diff --git a/api/app/adapters/llm/zhipu_eval_judge.py b/api/app/adapters/llm/zhipu_eval_judge.py
new file mode 100644
index 0000000..756b164
--- /dev/null
+++ b/api/app/adapters/llm/zhipu_eval_judge.py
@@ -0,0 +1,37 @@
+"""智谱 GLM 评测台评审:OpenAI 兼容端点 + ChatOpenAI 装配。"""
+
+from __future__ import annotations
+
+from langchain_openai import ChatOpenAI
+
+from app.adapters.llm.openai_base_url import normalize_openai_compatible_base_url
+from app.core.config import settings
+from app.core.eval_judge_spec import EvalJudgeLlmSpec
+
+
+def build_zhipu_eval_judge_spec(
+ judge_model: str | None,
+) -> EvalJudgeLlmSpec | None:
+ """密钥缺失时返回 None。"""
+ api_key = (settings.eval_judge_api_key or settings.zhipu_api_key or "").strip()
+ if not api_key:
+ return None
+ want = (judge_model or "").strip()
+ base = normalize_openai_compatible_base_url(
+ settings.eval_judge_base_url,
+ fallback="https://open.bigmodel.cn/api/paas/v4",
+ )
+ model = want or (settings.eval_judge_model or "glm-5")
+ ctx = int(settings.eval_judge_context_window_tokens)
+ llm_kw: dict = {
+ "api_key": api_key,
+ "base_url": base,
+ "model": model,
+ "temperature": settings.eval_judge_temperature,
+ }
+ return EvalJudgeLlmSpec(
+ llm=ChatOpenAI(**llm_kw),
+ provider="zhipu",
+ resolved_model=model,
+ context_window_tokens=ctx,
+ )
diff --git a/api/app/core/config.py b/api/app/core/config.py
index ce3c7af..683970d 100644
--- a/api/app/core/config.py
+++ b/api/app/core/config.py
@@ -56,7 +56,10 @@ class Settings(BaseSettings):
# ── LLM / DeepSeek ───────────────────────────────────────
deepseek_api_key: str = ""
deepseek_base_url: str = "https://api.deepseek.com"
- deepseek_model: str = "deepseek-chat"
+ # 官方新模型名(V4-Flash);与弃用名 deepseek-chat 对齐为「非思考」需另设 deepseek_thinking_enabled
+ deepseek_model: str = "deepseek-v4-flash"
+ # V4-Flash 在官方 API 中 thinking 默认为 enabled;主链路为对齐旧版 deepseek-chat 默认关闭
+ deepseek_thinking_enabled: bool = False
llm_api_key: str = ""
llm_base_url: str = ""
llm_model: str = ""
@@ -399,8 +402,10 @@ class Settings(BaseSettings):
eval_judge_base_url: str = "https://open.bigmodel.cn/api/paas/v4"
eval_judge_model: str = "glm-5"
eval_judge_temperature: float = 0.3
- # 评测评审:DeepSeek(OpenAI 兼容);默认 deepseek-reasoner 即官网 R1
- eval_judge_deepseek_model: str = "deepseek-reasoner"
+ # 评测评审:DeepSeek(OpenAI 兼容);默认 V4-Flash 思考模式(对齐旧版 deepseek-reasoner)
+ eval_judge_deepseek_model: str = "deepseek-v4-flash"
+ # 当仅指定 deepseek-v4-flash、未用弃用名区分时,是否走思考模式(与 eval_judge_deepseek_model 联用)
+ eval_judge_deepseek_thinking_enabled: bool = True
eval_judge_deepseek_context_window_tokens: int = Field(
default=64_000,
ge=4096,
diff --git a/api/app/core/dependencies.py b/api/app/core/dependencies.py
index dac9e7c..adb4f27 100644
--- a/api/app/core/dependencies.py
+++ b/api/app/core/dependencies.py
@@ -5,11 +5,8 @@
- Port DI factory:get_sms_sender / get_llm_provider / get_tts_provider / ...
"""
-from dataclasses import dataclass
from functools import lru_cache
-from typing import Any, Literal, Optional
-
-EvalJudgeProvider = Literal["zhipu", "deepseek"]
+from typing import Optional
from fastapi import Depends, HTTPException, status
from fastapi.security import OAuth2PasswordBearer
@@ -17,6 +14,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
from app.core.config import settings
from app.core.db import get_async_db
+from app.core.eval_judge_spec import EvalJudgeLlmSpec, EvalJudgeProvider
from app.core.security import verify_token
from app.ports.asr import ASRProvider
from app.ports.embedding import EmbeddingProvider
@@ -52,12 +50,17 @@ def get_llm_provider() -> LLMProvider:
api_key = settings.deepseek_api_key or settings.llm_api_key
base_url = settings.deepseek_base_url or settings.llm_base_url
- model = settings.deepseek_model or settings.llm_model or "deepseek-chat"
+ model = settings.deepseek_model or settings.llm_model or "deepseek-v4-flash"
return DeepSeekLLMProvider(
api_key=api_key,
base_url=base_url,
model=model,
temperature=settings.llm_temperature,
+ extra_body={
+ "thinking": {
+ "type": "enabled" if settings.deepseek_thinking_enabled else "disabled"
+ }
+ },
)
@@ -76,6 +79,11 @@ def get_llm_provider_fast() -> LLMProvider:
base_url=base_url,
model=fast,
temperature=settings.llm_temperature,
+ extra_body={
+ "thinking": {
+ "type": "enabled" if settings.deepseek_thinking_enabled else "disabled"
+ }
+ },
)
@@ -188,74 +196,17 @@ async def get_current_user(
return user
-def _normalize_openai_compatible_base_url(raw: str, *, fallback: str) -> str:
- base = (raw or "").strip().rstrip("/") or fallback
- for suffix in ("/v1/chat/completions", "/v1"):
- if base.endswith(suffix):
- base = base[: -len(suffix)]
- return base
-
-
-@dataclass(slots=True, frozen=True)
-class EvalJudgeLlmSpec:
- """评测台评审:OpenAI 兼容 Chat Completions 的一条装配结果。"""
-
- llm: Any | None
- provider: EvalJudgeProvider
- resolved_model: str
- context_window_tokens: int
-
-
def build_eval_judge_llm_spec(
provider: EvalJudgeProvider = "zhipu",
judge_model: str | None = None,
) -> EvalJudgeLlmSpec | None:
"""按供应商装配 ChatOpenAI;密钥缺失时返回 None(llm 为 None)。"""
- from langchain_openai import ChatOpenAI
+ from app.adapters.llm.deepseek_eval_judge import build_deepseek_eval_judge_spec
+ from app.adapters.llm.zhipu_eval_judge import build_zhipu_eval_judge_spec
- want = (judge_model or "").strip()
if provider == "deepseek":
- api_key = (settings.deepseek_api_key or settings.llm_api_key or "").strip()
- if not api_key:
- return None
- base = _normalize_openai_compatible_base_url(
- settings.deepseek_base_url,
- fallback="https://api.deepseek.com",
- )
- model = want or (settings.eval_judge_deepseek_model or "deepseek-reasoner")
- ctx = int(settings.eval_judge_deepseek_context_window_tokens)
- return EvalJudgeLlmSpec(
- llm=ChatOpenAI(
- api_key=api_key,
- base_url=base,
- model=model,
- temperature=settings.eval_judge_temperature,
- ),
- provider="deepseek",
- resolved_model=model,
- context_window_tokens=ctx,
- )
-
- api_key = (settings.eval_judge_api_key or settings.zhipu_api_key or "").strip()
- if not api_key:
- return None
- base = _normalize_openai_compatible_base_url(
- settings.eval_judge_base_url,
- fallback="https://open.bigmodel.cn/api/paas/v4",
- )
- model = want or (settings.eval_judge_model or "glm-5")
- ctx = int(settings.eval_judge_context_window_tokens)
- return EvalJudgeLlmSpec(
- llm=ChatOpenAI(
- api_key=api_key,
- base_url=base,
- model=model,
- temperature=settings.eval_judge_temperature,
- ),
- provider="zhipu",
- resolved_model=model,
- context_window_tokens=ctx,
- )
+ return build_deepseek_eval_judge_spec(judge_model)
+ return build_zhipu_eval_judge_spec(judge_model)
def get_eval_judge_langchain_llm():
diff --git a/api/app/core/eval_judge_spec.py b/api/app/core/eval_judge_spec.py
new file mode 100644
index 0000000..45190bd
--- /dev/null
+++ b/api/app/core/eval_judge_spec.py
@@ -0,0 +1,18 @@
+"""评测台评审 LLM 装配契约(与 OpenAI/Chat Completions 兼容的供应商解耦于适配器层)。"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any, Literal
+
+EvalJudgeProvider = Literal["zhipu", "deepseek"]
+
+
+@dataclass(slots=True, frozen=True)
+class EvalJudgeLlmSpec:
+ """智谱 / DeepSeek 等:装配结果供 dependencies 与评测服务使用。"""
+
+ llm: Any | None
+ provider: EvalJudgeProvider
+ resolved_model: str
+ context_window_tokens: int
diff --git a/api/app/core/llm_call.py b/api/app/core/llm_call.py
index 8116a05..4b4ae7b 100644
--- a/api/app/core/llm_call.py
+++ b/api/app/core/llm_call.py
@@ -28,6 +28,8 @@ from app.core.langchain_llm import (
bind_json_object_mode,
ensure_json_object_prompt_has_json_keyword,
)
+from app.core.llm_errors import LlmHttpErrorVendor, format_llm_http_error_message
+from app.core.llm_http_openai_chat_errors import should_log_openai_error_as_warning
from app.core.logging import get_logger
logger = get_logger(__name__)
@@ -100,9 +102,14 @@ _LLM_MSG_CONTENT_FILTER = (
)
-def _format_llm_invoke_error_message(exc: BaseException) -> str:
+def _format_llm_invoke_error_message(
+ exc: BaseException, *, http_error_vendor: LlmHttpErrorVendor = "deepseek"
+) -> str:
if _is_content_filter_refusal(exc):
return _LLM_MSG_CONTENT_FILTER
+ friendly = format_llm_http_error_message(exc, http_error_vendor)
+ if friendly is not None:
+ return friendly
return str(exc)
@@ -116,6 +123,11 @@ def _log_invoke_failure(*, agent: str, exc: BaseException, sync: bool) -> None:
)
return
tag = "llm_json_call" if sync else "allm_json_call"
+ if should_log_openai_error_as_warning(exc):
+ logger.bind(agent=agent).warning(
+ "{} provider http error: {}", tag, str(exc)[:800]
+ )
+ return
logger.bind(agent=agent).exception("{} invoke error: {}", tag, exc)
@@ -273,6 +285,7 @@ def llm_json_call(
agent: str,
fallback_factory: Callable[[], T] | None = None,
retry_empty: bool = True,
+ http_error_vendor: LlmHttpErrorVendor = "deepseek",
) -> T:
"""同步:invoke → 解析 JSON → `schema.model_validate`;失败时 `fallback_factory` 或 `LLMCallError`。"""
t0 = time.perf_counter()
@@ -353,7 +366,7 @@ def llm_json_call(
return fallback_factory()
raise LLMCallError(
"invoke",
- _format_llm_invoke_error_message(e),
+ _format_llm_invoke_error_message(e, http_error_vendor=http_error_vendor),
raw_content=raw[:4096] if raw else None,
) from e
@@ -367,6 +380,7 @@ async def allm_json_call(
agent: str,
fallback_factory: Callable[[], T] | None = None,
retry_empty: bool = True,
+ http_error_vendor: LlmHttpErrorVendor = "deepseek",
) -> T:
"""异步版,语义与 `llm_json_call` 一致。"""
t0 = time.perf_counter()
@@ -447,7 +461,7 @@ async def allm_json_call(
return fallback_factory()
raise LLMCallError(
"invoke",
- _format_llm_invoke_error_message(e),
+ _format_llm_invoke_error_message(e, http_error_vendor=http_error_vendor),
raw_content=raw[:4096] if raw else None,
) from e
@@ -455,6 +469,7 @@ async def allm_json_call(
__all__ = [
"LLMCallError",
"LLMCallMeta",
+ "LlmHttpErrorVendor",
"allm_json_call",
"llm_json_call",
]
diff --git a/api/app/core/llm_errors/__init__.py b/api/app/core/llm_errors/__init__.py
new file mode 100644
index 0000000..4f53ad1
--- /dev/null
+++ b/api/app/core/llm_errors/__init__.py
@@ -0,0 +1,32 @@
+"""大模型各供应商的 HTTP 错讯文案与文档链接。"""
+
+from __future__ import annotations
+
+from app.core.eval_judge_spec import EvalJudgeProvider
+from app.core.llm_http_openai_chat_errors import format_openai_shape_http_error_message
+
+LlmHttpErrorVendor = EvalJudgeProvider | None
+
+__all__ = ["LlmHttpErrorVendor", "format_llm_http_error_message"]
+
+
+def format_llm_http_error_message(
+ exc: BaseException,
+ vendor: LlmHttpErrorVendor,
+) -> str | None:
+ """
+ 先经 OpenAI-Chat-Completions 传输层解析,再按供应商加品牌与官方文档链。
+ ``vendor is None`` 时仅返回中性传输层说明。
+ """
+ base = format_openai_shape_http_error_message(exc)
+ if base is None:
+ return None
+ if vendor is None:
+ return base
+ if vendor == "deepseek":
+ from app.core.llm_errors import deepseek as deepseek_meta
+
+ return f"【{deepseek_meta.BRAND}】{base}({deepseek_meta.ERROR_CODES_DOC_URL})"
+ from app.core.llm_errors import zhipu as zhipu_meta
+
+ return f"【{zhipu_meta.BRAND}】{base}({zhipu_meta.ERROR_CODES_DOC_URL})"
diff --git a/api/app/core/llm_errors/deepseek.py b/api/app/core/llm_errors/deepseek.py
new file mode 100644
index 0000000..4e58953
--- /dev/null
+++ b/api/app/core/llm_errors/deepseek.py
@@ -0,0 +1,4 @@
+"""DeepSeek API 错讯元数据(与传输层解耦)。"""
+
+BRAND = "DeepSeek"
+ERROR_CODES_DOC_URL = "https://api-docs.deepseek.com/zh-cn/quick_start/error_codes"
diff --git a/api/app/core/llm_errors/zhipu.py b/api/app/core/llm_errors/zhipu.py
new file mode 100644
index 0000000..cd32d3c
--- /dev/null
+++ b/api/app/core/llm_errors/zhipu.py
@@ -0,0 +1,5 @@
+"""智谱 BigModel 开放平台错讯元数据(与传输层解耦)。"""
+
+BRAND = "智谱 GLM"
+# 开放平台「HTTP 状态码 / 鉴权与计费」等说明
+ERROR_CODES_DOC_URL = "https://docs.bigmodel.cn/cn/codinggude/notice"
diff --git a/api/app/core/llm_http_openai_chat_errors.py b/api/app/core/llm_http_openai_chat_errors.py
new file mode 100644
index 0000000..28a1fb9
--- /dev/null
+++ b/api/app/core/llm_http_openai_chat_errors.py
@@ -0,0 +1,108 @@
+"""
+OpenAI Chat Completions 兼容 API 的 HTTP 传输层错讯(不绑定单一大模型厂商)。
+
+解析 ``APIStatusError`` / ``httpx`` 状态、OpenAI 形 JSON body 中的 error.message。
+"""
+
+from __future__ import annotations
+
+import json
+from collections.abc import Iterator
+
+import httpx
+from openai import APIStatusError
+
+_CLIENT_RECOVERABLE_STATUSES = frozenset({400, 401, 402, 422, 429})
+
+
+def _iter_exception_chain(exc: BaseException) -> Iterator[BaseException]:
+ seen: set[int] = set()
+ cur: BaseException | None = exc
+ while cur is not None and id(cur) not in seen:
+ yield cur
+ seen.add(id(cur))
+ cur = cur.__cause__ or cur.__context__
+
+
+def _detail_from_error_body(body: object | None, *, max_len: int = 400) -> str:
+ if body is None:
+ return ""
+ if isinstance(body, dict):
+ err = body.get("error")
+ if isinstance(err, dict):
+ msg = err.get("message")
+ if isinstance(msg, str) and msg.strip():
+ return msg.strip()[:max_len]
+ msg2 = body.get("message")
+ if isinstance(msg2, str) and msg2.strip():
+ return msg2.strip()[:max_len]
+ if isinstance(body, str) and body.strip():
+ s = body.strip()
+ if len(s) > max_len:
+ return s[:max_len]
+ try:
+ return _detail_from_error_body(json.loads(s), max_len=max_len)
+ except json.JSONDecodeError:
+ return s[:max_len]
+ return ""
+
+
+def _neutral_message_for_status(status: int) -> str:
+ m: dict[int, str] = {
+ 400: "请求体格式或字段不符合接口要求,请按返回说明修改(HTTP 400)。",
+ 401: "API 密钥错误或未通过认证,请检查密钥与 BASE_URL 配置(HTTP 401)。",
+ 402: "账户余额不足,请确认余额后充值再试(HTTP 402)。",
+ 403: "权限不足或密钥无权访问该资源(HTTP 403)。",
+ 404: "请求路径或资源不存在,请检查 BASE_URL/路由(HTTP 404)。",
+ 409: "与当前资源状态冲突,请重试或联系管理员(HTTP 409)。",
+ 422: "请求参数不合法,请按错误信息调整参数(HTTP 422)。",
+ 429: "请求速率达到上限(如 TPM/RPM),请降低频率或稍后再试(HTTP 429)。",
+ 500: "大模型服务内部故障,请等待后重试;若持续出现请联系服务商(HTTP 500)。",
+ 502: "网关在从上游大模型取数时失败,请稍后重试(HTTP 502)。",
+ 503: "大模型服务繁忙或暂不可用,请稍后重试(HTTP 503)。",
+ 504: "大模型或网关响应超时,请稍后重试(HTTP 504)。",
+ }
+ if status in m:
+ return m[status]
+ if 400 <= status < 500:
+ return f"大模型服务返回客户端错误(HTTP {status}),请根据返回信息修正后重试。"
+ if 500 <= status < 600:
+ return f"大模型服务返回服务端错误(HTTP {status}),请稍后重试或联系管理员。"
+ return f"大模型服务返回错误(HTTP {status})。"
+
+
+def extract_openai_http_status(exc: BaseException) -> int | None:
+ """从异常链中取出 OpenAI SDK / httpx 的 HTTP 状态码,若无则 None。"""
+ for e in _iter_exception_chain(exc):
+ if isinstance(e, APIStatusError):
+ return int(e.status_code)
+ if isinstance(e, httpx.HTTPStatusError) and e.response is not None:
+ return int(e.response.status_code)
+ return None
+
+
+def should_log_openai_error_as_warning(exc: BaseException) -> bool:
+ """4xx/429/402 等可预期客户端问题,用 warning 少打 ERROR 堆栈。"""
+ s = extract_openai_http_status(exc)
+ return s in _CLIENT_RECOVERABLE_STATUSES if s is not None else False
+
+
+def format_openai_shape_http_error_message(exc: BaseException) -> str | None:
+ """可识别为 OpenAI/httpx 的 HTTP 错误时返回中性中文说明;否则 None。"""
+ for e in _iter_exception_chain(exc):
+ if isinstance(e, APIStatusError):
+ st = int(e.status_code)
+ base = _neutral_message_for_status(st)
+ detail = _detail_from_error_body(e.body) or (e.message or "").strip()
+ if (
+ detail
+ and len(detail) <= 500
+ and detail not in base
+ and "HTTP" not in detail[:80]
+ ):
+ return f"{base} 服务商说明:{detail}"
+ return base
+ if isinstance(e, httpx.HTTPStatusError) and e.response is not None:
+ st = int(e.response.status_code)
+ return _neutral_message_for_status(st)
+ return None
diff --git a/api/app/core/openai_compatible_errors.py b/api/app/core/openai_compatible_errors.py
new file mode 100644
index 0000000..001a0f4
--- /dev/null
+++ b/api/app/core/openai_compatible_errors.py
@@ -0,0 +1,15 @@
+"""兼容旧 import:实现已迁至 `llm_http_openai_chat_errors` / `llm_errors`。"""
+
+from app.core.llm_http_openai_chat_errors import (
+ extract_openai_http_status,
+ should_log_openai_error_as_warning,
+)
+from app.core.llm_http_openai_chat_errors import (
+ format_openai_shape_http_error_message as format_openai_compatible_http_error_message,
+)
+
+__all__ = [
+ "extract_openai_http_status",
+ "format_openai_compatible_http_error_message",
+ "should_log_openai_error_as_warning",
+]
diff --git a/api/app/features/evaluation/judge_manual_service.py b/api/app/features/evaluation/judge_manual_service.py
index 01c0b43..9f02da9 100644
--- a/api/app/features/evaluation/judge_manual_service.py
+++ b/api/app/features/evaluation/judge_manual_service.py
@@ -71,6 +71,7 @@ def _make_eval_judge(
EvalJudgeService(
spec.llm,
context_window_tokens=spec.context_window_tokens,
+ http_error_vendor=spec.provider,
),
spec.resolved_model,
)
diff --git a/api/app/features/evaluation/judge_service.py b/api/app/features/evaluation/judge_service.py
index 2b93758..f8d7342 100644
--- a/api/app/features/evaluation/judge_service.py
+++ b/api/app/features/evaluation/judge_service.py
@@ -7,6 +7,7 @@ from dataclasses import dataclass
from typing import Any, Generic, TypeVar
from app.core.config import settings
+from app.core.eval_judge_spec import EvalJudgeProvider
from app.core.llm_call import LLMCallError, allm_json_call
from app.core.logging import get_logger
from app.features.evaluation.judge_schemas import (
@@ -284,8 +285,10 @@ class EvalJudgeService:
judge_llm: Any | None,
*,
context_window_tokens: int | None = None,
+ http_error_vendor: EvalJudgeProvider = "deepseek",
) -> None:
self._llm = judge_llm
+ self._http_error_vendor: EvalJudgeProvider = http_error_vendor
self._ctx_tokens = int(
context_window_tokens or settings.eval_judge_context_window_tokens
)
@@ -329,6 +332,7 @@ class EvalJudgeService:
TurnJudgeOutput,
max_tokens=_TURN_MAX,
agent="EvalJudgeService.judge_turn",
+ http_error_vendor=self._http_error_vendor,
)
except LLMCallError as e:
logger.warning("turn judge failed: {}", e)
@@ -354,6 +358,7 @@ class EvalJudgeService:
ConversationJudgeOutput,
max_tokens=_CONV_JUDGE_JSON_MAX,
agent="EvalJudgeService.judge_conversation",
+ http_error_vendor=self._http_error_vendor,
)
return JudgeCallResult(output=out)
except LLMCallError as e:
@@ -496,6 +501,7 @@ class EvalJudgeService:
512, int(settings.eval_judge_memoir_completion_max_tokens)
),
agent="EvalJudgeService.judge_memoir",
+ http_error_vendor=self._http_error_vendor,
)
return JudgeCallResult(output=out)
except LLMCallError as e:
diff --git a/api/tests/test_eval_judge_llm_spec.py b/api/tests/test_eval_judge_llm_spec.py
index 6b06daf..f48c4c9 100644
--- a/api/tests/test_eval_judge_llm_spec.py
+++ b/api/tests/test_eval_judge_llm_spec.py
@@ -51,7 +51,8 @@ def test_build_eval_judge_deepseek_context_budget(
spec = build_eval_judge_llm_spec("deepseek", None)
assert spec is not None
assert spec.provider == "deepseek"
- assert spec.resolved_model == "deepseek-reasoner"
+ # 旧名 deepseek-reasoner 规范为 v4-flash 思考模式
+ assert spec.resolved_model == "deepseek-v4-flash"
assert spec.context_window_tokens == 64_000
n = eval_judge_conversation_transcript_max_chars_for_context(64_000)
glm_n = eval_judge_conversation_transcript_max_chars_for_context(200_000)
diff --git a/api/tests/test_llm_json_call.py b/api/tests/test_llm_json_call.py
index ae8c729..59af178 100644
--- a/api/tests/test_llm_json_call.py
+++ b/api/tests/test_llm_json_call.py
@@ -2,8 +2,10 @@
from __future__ import annotations
+import httpx
import pytest
from langchain_core.messages import AIMessage
+from openai import APIStatusError
from pydantic import BaseModel, Field
from app.core.langchain_llm import ensure_json_object_prompt_has_json_keyword
@@ -128,6 +130,41 @@ def test_llm_json_call_no_fallback_raises() -> None:
assert ei.value.kind == "validation"
+def _api_status_402() -> APIStatusError:
+ req = httpx.Request("POST", "https://api.deepseek.com/v1/chat/completions")
+ resp = httpx.Response(
+ 402, request=req, json={"error": {"message": "Insufficient balance"}}
+ )
+ return APIStatusError("Payment required", response=resp, body=resp.json())
+
+
+class _LlmInvokeRaises:
+ def bind(self, **_kwargs: object):
+ return self
+
+ def invoke(self, _prompt: str) -> object:
+ raise _api_status_402()
+
+ async def ainvoke(self, _prompt: str) -> object:
+ return self.invoke(_prompt)
+
+
+def test_llm_json_call_openai_status_maps_to_friendly_chinese() -> None:
+ with pytest.raises(LLMCallError) as ei:
+ llm_json_call(
+ _LlmInvokeRaises(),
+ "p",
+ _SmallOut,
+ max_tokens=8,
+ agent="t",
+ )
+ assert ei.value.kind == "invoke"
+ s = str(ei.value)
+ assert "402" in s
+ assert "余额" in s
+ assert "DeepSeek" in s
+
+
@pytest.mark.asyncio
async def test_allm_json_call_parity_with_sync() -> None:
llm = _SyncFakeLlm(['{"answer": "async", "score": 7}'])
diff --git a/api/tests/test_llm_vendor_errors.py b/api/tests/test_llm_vendor_errors.py
new file mode 100644
index 0000000..3aec62b
--- /dev/null
+++ b/api/tests/test_llm_vendor_errors.py
@@ -0,0 +1,36 @@
+"""供应商层 HTTP 错讯:DeepSeek / 智谱 品牌与文档链。"""
+
+import httpx
+from openai import APIStatusError
+
+from app.core.llm_errors import format_llm_http_error_message
+
+
+def _status_402() -> APIStatusError:
+ req = httpx.Request("POST", "https://x/v1/chat/completions")
+ resp = httpx.Response(402, request=req, json={"error": {"message": "x"}})
+ return APIStatusError("u", response=resp, body=resp.json())
+
+
+def test_vendor_deepseek_includes_brand_and_doc() -> None:
+ e = _status_402()
+ m = format_llm_http_error_message(e, "deepseek")
+ assert m is not None
+ assert "DeepSeek" in m
+ assert "api-docs.deepseek.com" in m
+
+
+def test_vendor_zhipu_includes_brand_and_doc() -> None:
+ e = _status_402()
+ m = format_llm_http_error_message(e, "zhipu")
+ assert m is not None
+ assert "智谱" in m
+ assert "bigmodel" in m
+
+
+def test_vendor_none_is_transport_only() -> None:
+ e = _status_402()
+ m = format_llm_http_error_message(e, None)
+ assert m is not None
+ assert "【" not in m
+ assert "402" in m
diff --git a/api/tests/test_openai_compatible_errors.py b/api/tests/test_openai_compatible_errors.py
new file mode 100644
index 0000000..ca4a323
--- /dev/null
+++ b/api/tests/test_openai_compatible_errors.py
@@ -0,0 +1,68 @@
+"""传输层 `llm_http_openai_chat_errors` 的中性错讯;兼容 re-export 仍经 openai_compatible_errors。"""
+
+import httpx
+import pytest
+from openai import APIStatusError
+
+from app.core.openai_compatible_errors import (
+ extract_openai_http_status,
+ format_openai_compatible_http_error_message,
+ should_log_openai_error_as_warning,
+)
+
+
+def _status_error(status: int, *, body: object | None = None) -> APIStatusError:
+ req = httpx.Request("POST", "https://api.deepseek.com/v1/chat/completions")
+ resp = httpx.Response(status, request=req, json=body if body is not None else {})
+ return APIStatusError("upstream", response=resp, body=body)
+
+
+def test_extract_status_from_api_status_error() -> None:
+ e = _status_error(429)
+ assert extract_openai_http_status(e) == 429
+
+
+def test_format_402_balance_chinese_message() -> None:
+ e = _status_error(
+ 402,
+ body={"error": {"message": "Insufficient balance", "type": "insufficient_quota"}},
+ )
+ msg = format_openai_compatible_http_error_message(e)
+ assert msg is not None
+ assert "402" in msg
+ assert "余额" in msg
+
+
+def test_format_401_and_warning_flag() -> None:
+ e = _status_error(401, body={"error": {"message": "invalid api key"}})
+ assert should_log_openai_error_as_warning(e) is True
+ m = format_openai_compatible_http_error_message(e)
+ assert m is not None
+ assert "401" in m
+ assert "密钥" in m
+
+
+def test_format_503_server_busy() -> None:
+ e = _status_error(503)
+ m = format_openai_compatible_http_error_message(e)
+ assert m is not None
+ assert "503" in m
+ assert should_log_openai_error_as_warning(e) is False
+
+
+def test_format_httpx_http_status_error() -> None:
+ req = httpx.Request("GET", "https://api.deepseek.com/v1/models")
+ resp = httpx.Response(429, request=req)
+ try:
+ resp.raise_for_status()
+ except httpx.HTTPStatusError as e:
+ m = format_openai_compatible_http_error_message(e)
+ assert m is not None
+ assert "429" in m
+
+
+def test_unknown_status_418() -> None:
+ e = _status_error(418)
+ m = format_openai_compatible_http_error_message(e)
+ assert m is not None
+ assert "418" in m
diff --git a/app-eval-web/src/pages/MemoirPage.tsx b/app-eval-web/src/pages/MemoirPage.tsx
index 59d448f..00835af 100644
--- a/app-eval-web/src/pages/MemoirPage.tsx
+++ b/app-eval-web/src/pages/MemoirPage.tsx
@@ -358,8 +358,8 @@ export default function MemoirPage() {
style={{ minWidth: "min(100%, 220px)" }}
>
-
-
+
+
diff --git a/app-eval-web/src/pages/PlaygroundPage.tsx b/app-eval-web/src/pages/PlaygroundPage.tsx
index 011dc2b..6c8eca3 100644
--- a/app-eval-web/src/pages/PlaygroundPage.tsx
+++ b/app-eval-web/src/pages/PlaygroundPage.tsx
@@ -1147,7 +1147,7 @@ export default function PlaygroundPage() {
aria-label="评测评审模型供应商"
>
-
+