diff --git a/api/.env.example b/api/.env.example index b1012eb..f5ba274 100644 --- a/api/.env.example +++ b/api/.env.example @@ -44,7 +44,10 @@ LOG_LEVEL=INFO # ============================================================================= DEEPSEEK_API_KEY=your_deepseek_api_key DEEPSEEK_BASE_URL=https://api.deepseek.com -DEEPSEEK_MODEL=deepseek-chat +# 官方新模型名见 https://api-docs.deepseek.com/zh-cn/quick_start/pricing +DEEPSEEK_MODEL=deepseek-v4-flash +# v4-flash 主链路非思考须显式关(对齐旧版 deepseek-chat;默认 false) +# DEEPSEEK_THINKING_ENABLED=false # ============================================================================= # Memory 向量(智谱 BigModel 国内 embedding-3;与 DeepSeek/OpenAI 用途分离) diff --git a/api/.env.production b/api/.env.production index 3022b1a..3ab9b2a 100644 --- a/api/.env.production +++ b/api/.env.production @@ -38,7 +38,7 @@ LOG_LEVEL=INFO # ============================================================================= DEEPSEEK_API_KEY=sk-09f17fb61c5a4299a3afc2a01de7af75 DEEPSEEK_BASE_URL=https://api.deepseek.com -DEEPSEEK_MODEL=deepseek-chat +DEEPSEEK_MODEL=deepseek-v4-flash # ============================================================================= # Memory 向量(智谱 BigModel 国内 embedding-3;与 DeepSeek/OpenAI 用途分离) diff --git a/api/.env.staging b/api/.env.staging index 5a8ab42..81dc5d2 100644 --- a/api/.env.staging +++ b/api/.env.staging @@ -31,7 +31,7 @@ LOG_LEVEL=INFO # ============================================================================= DEEPSEEK_API_KEY=your_deepseek_api_key DEEPSEEK_BASE_URL=https://api.deepseek.com -DEEPSEEK_MODEL=deepseek-chat +DEEPSEEK_MODEL=deepseek-v4-flash # ============================================================================= # Database diff --git a/api/app/adapters/llm/deepseek.py b/api/app/adapters/llm/deepseek.py index 4565611..2245b97 100644 --- a/api/app/adapters/llm/deepseek.py +++ b/api/app/adapters/llm/deepseek.py @@ -18,8 +18,10 @@ class DeepSeekLLMProvider: self, api_key: str, base_url: str = "https://api.deepseek.com", - model: str = "deepseek-chat", + model: str = "deepseek-v4-flash", temperature: float = 0.7, + *, + extra_body: dict | None = None, ): self._default_model = model self._default_temperature = temperature @@ -28,6 +30,8 @@ class DeepSeekLLMProvider: "model": model, "api_key": api_key, } + if extra_body: + kwargs["extra_body"] = extra_body if base_url: cleaned = base_url.rstrip("/") for suffix in ("/v1/chat/completions", "/v1"): diff --git a/api/app/adapters/llm/deepseek_eval_judge.py b/api/app/adapters/llm/deepseek_eval_judge.py new file mode 100644 index 0000000..f7ca992 --- /dev/null +++ b/api/app/adapters/llm/deepseek_eval_judge.py @@ -0,0 +1,85 @@ +"""DeepSeek 评测台评审:模型别名解析 + ChatOpenAI 装配。""" + +from __future__ import annotations + +from langchain_openai import ChatOpenAI + +from app.adapters.llm.openai_base_url import normalize_openai_compatible_base_url +from app.core.config import settings +from app.core.eval_judge_spec import EvalJudgeLlmSpec + + +def resolve_deepseek_eval_judge_model( + requested: str, +) -> tuple[str, dict | None, str | None]: + """将模型名(含旧别名)规范为 V4 的 model id、extra_body 与 reasoning_effort。 + + 官方:deepseek-chat / deepseek-reasoner 将弃用,分别对应 v4-flash 非思考 / 思考。 + """ + m = (requested or "").strip() + if m == "deepseek-chat": + return ( + "deepseek-v4-flash", + {"thinking": {"type": "disabled"}}, + None, + ) + if m in ( + "deepseek-reasoner", + "deepseek-r1", + ): + return ( + "deepseek-v4-flash", + {"thinking": {"type": "enabled"}}, + "high", + ) + if m == "deepseek-v4-pro": + return ("deepseek-v4-pro", None, "high") + if m in ("", "deepseek-v4-flash"): + if settings.eval_judge_deepseek_thinking_enabled: + return ( + "deepseek-v4-flash", + {"thinking": {"type": "enabled"}}, + "high", + ) + return ( + "deepseek-v4-flash", + {"thinking": {"type": "disabled"}}, + None, + ) + if "flash" in m.lower() or m.startswith("deepseek-v4"): + return (m, None, None) + return (m, None, None) + + +def build_deepseek_eval_judge_spec( + judge_model: str | None, +) -> EvalJudgeLlmSpec | None: + """密钥缺失时返回 None。""" + api_key = (settings.deepseek_api_key or settings.llm_api_key or "").strip() + if not api_key: + return None + want = (judge_model or "").strip() + base = normalize_openai_compatible_base_url( + settings.deepseek_base_url, + fallback="https://api.deepseek.com", + ) + default_m = (settings.eval_judge_deepseek_model or "deepseek-v4-flash").strip() + combined = want or default_m + model, extra, effort = resolve_deepseek_eval_judge_model(combined) + ctx = int(settings.eval_judge_deepseek_context_window_tokens) + llm_kw: dict = { + "api_key": api_key, + "base_url": base, + "model": model, + "temperature": settings.eval_judge_temperature, + } + if extra is not None: + llm_kw["extra_body"] = extra + if effort is not None: + llm_kw["reasoning_effort"] = effort + return EvalJudgeLlmSpec( + llm=ChatOpenAI(**llm_kw), + provider="deepseek", + resolved_model=model, + context_window_tokens=ctx, + ) diff --git a/api/app/adapters/llm/openai_base_url.py b/api/app/adapters/llm/openai_base_url.py new file mode 100644 index 0000000..878e072 --- /dev/null +++ b/api/app/adapters/llm/openai_base_url.py @@ -0,0 +1,9 @@ +"""OpenAI/Chat Completions 兼容基址规范化(多供应商共用)。""" + + +def normalize_openai_compatible_base_url(raw: str, *, fallback: str) -> str: + base = (raw or "").strip().rstrip("/") or fallback + for suffix in ("/v1/chat/completions", "/v1"): + if base.endswith(suffix): + base = base[: -len(suffix)] + return base diff --git a/api/app/adapters/llm/zhipu_eval_judge.py b/api/app/adapters/llm/zhipu_eval_judge.py new file mode 100644 index 0000000..756b164 --- /dev/null +++ b/api/app/adapters/llm/zhipu_eval_judge.py @@ -0,0 +1,37 @@ +"""智谱 GLM 评测台评审:OpenAI 兼容端点 + ChatOpenAI 装配。""" + +from __future__ import annotations + +from langchain_openai import ChatOpenAI + +from app.adapters.llm.openai_base_url import normalize_openai_compatible_base_url +from app.core.config import settings +from app.core.eval_judge_spec import EvalJudgeLlmSpec + + +def build_zhipu_eval_judge_spec( + judge_model: str | None, +) -> EvalJudgeLlmSpec | None: + """密钥缺失时返回 None。""" + api_key = (settings.eval_judge_api_key or settings.zhipu_api_key or "").strip() + if not api_key: + return None + want = (judge_model or "").strip() + base = normalize_openai_compatible_base_url( + settings.eval_judge_base_url, + fallback="https://open.bigmodel.cn/api/paas/v4", + ) + model = want or (settings.eval_judge_model or "glm-5") + ctx = int(settings.eval_judge_context_window_tokens) + llm_kw: dict = { + "api_key": api_key, + "base_url": base, + "model": model, + "temperature": settings.eval_judge_temperature, + } + return EvalJudgeLlmSpec( + llm=ChatOpenAI(**llm_kw), + provider="zhipu", + resolved_model=model, + context_window_tokens=ctx, + ) diff --git a/api/app/core/config.py b/api/app/core/config.py index ce3c7af..683970d 100644 --- a/api/app/core/config.py +++ b/api/app/core/config.py @@ -56,7 +56,10 @@ class Settings(BaseSettings): # ── LLM / DeepSeek ─────────────────────────────────────── deepseek_api_key: str = "" deepseek_base_url: str = "https://api.deepseek.com" - deepseek_model: str = "deepseek-chat" + # 官方新模型名(V4-Flash);与弃用名 deepseek-chat 对齐为「非思考」需另设 deepseek_thinking_enabled + deepseek_model: str = "deepseek-v4-flash" + # V4-Flash 在官方 API 中 thinking 默认为 enabled;主链路为对齐旧版 deepseek-chat 默认关闭 + deepseek_thinking_enabled: bool = False llm_api_key: str = "" llm_base_url: str = "" llm_model: str = "" @@ -399,8 +402,10 @@ class Settings(BaseSettings): eval_judge_base_url: str = "https://open.bigmodel.cn/api/paas/v4" eval_judge_model: str = "glm-5" eval_judge_temperature: float = 0.3 - # 评测评审:DeepSeek(OpenAI 兼容);默认 deepseek-reasoner 即官网 R1 - eval_judge_deepseek_model: str = "deepseek-reasoner" + # 评测评审:DeepSeek(OpenAI 兼容);默认 V4-Flash 思考模式(对齐旧版 deepseek-reasoner) + eval_judge_deepseek_model: str = "deepseek-v4-flash" + # 当仅指定 deepseek-v4-flash、未用弃用名区分时,是否走思考模式(与 eval_judge_deepseek_model 联用) + eval_judge_deepseek_thinking_enabled: bool = True eval_judge_deepseek_context_window_tokens: int = Field( default=64_000, ge=4096, diff --git a/api/app/core/dependencies.py b/api/app/core/dependencies.py index dac9e7c..adb4f27 100644 --- a/api/app/core/dependencies.py +++ b/api/app/core/dependencies.py @@ -5,11 +5,8 @@ - Port DI factory:get_sms_sender / get_llm_provider / get_tts_provider / ... """ -from dataclasses import dataclass from functools import lru_cache -from typing import Any, Literal, Optional - -EvalJudgeProvider = Literal["zhipu", "deepseek"] +from typing import Optional from fastapi import Depends, HTTPException, status from fastapi.security import OAuth2PasswordBearer @@ -17,6 +14,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from app.core.config import settings from app.core.db import get_async_db +from app.core.eval_judge_spec import EvalJudgeLlmSpec, EvalJudgeProvider from app.core.security import verify_token from app.ports.asr import ASRProvider from app.ports.embedding import EmbeddingProvider @@ -52,12 +50,17 @@ def get_llm_provider() -> LLMProvider: api_key = settings.deepseek_api_key or settings.llm_api_key base_url = settings.deepseek_base_url or settings.llm_base_url - model = settings.deepseek_model or settings.llm_model or "deepseek-chat" + model = settings.deepseek_model or settings.llm_model or "deepseek-v4-flash" return DeepSeekLLMProvider( api_key=api_key, base_url=base_url, model=model, temperature=settings.llm_temperature, + extra_body={ + "thinking": { + "type": "enabled" if settings.deepseek_thinking_enabled else "disabled" + } + }, ) @@ -76,6 +79,11 @@ def get_llm_provider_fast() -> LLMProvider: base_url=base_url, model=fast, temperature=settings.llm_temperature, + extra_body={ + "thinking": { + "type": "enabled" if settings.deepseek_thinking_enabled else "disabled" + } + }, ) @@ -188,74 +196,17 @@ async def get_current_user( return user -def _normalize_openai_compatible_base_url(raw: str, *, fallback: str) -> str: - base = (raw or "").strip().rstrip("/") or fallback - for suffix in ("/v1/chat/completions", "/v1"): - if base.endswith(suffix): - base = base[: -len(suffix)] - return base - - -@dataclass(slots=True, frozen=True) -class EvalJudgeLlmSpec: - """评测台评审:OpenAI 兼容 Chat Completions 的一条装配结果。""" - - llm: Any | None - provider: EvalJudgeProvider - resolved_model: str - context_window_tokens: int - - def build_eval_judge_llm_spec( provider: EvalJudgeProvider = "zhipu", judge_model: str | None = None, ) -> EvalJudgeLlmSpec | None: """按供应商装配 ChatOpenAI;密钥缺失时返回 None(llm 为 None)。""" - from langchain_openai import ChatOpenAI + from app.adapters.llm.deepseek_eval_judge import build_deepseek_eval_judge_spec + from app.adapters.llm.zhipu_eval_judge import build_zhipu_eval_judge_spec - want = (judge_model or "").strip() if provider == "deepseek": - api_key = (settings.deepseek_api_key or settings.llm_api_key or "").strip() - if not api_key: - return None - base = _normalize_openai_compatible_base_url( - settings.deepseek_base_url, - fallback="https://api.deepseek.com", - ) - model = want or (settings.eval_judge_deepseek_model or "deepseek-reasoner") - ctx = int(settings.eval_judge_deepseek_context_window_tokens) - return EvalJudgeLlmSpec( - llm=ChatOpenAI( - api_key=api_key, - base_url=base, - model=model, - temperature=settings.eval_judge_temperature, - ), - provider="deepseek", - resolved_model=model, - context_window_tokens=ctx, - ) - - api_key = (settings.eval_judge_api_key or settings.zhipu_api_key or "").strip() - if not api_key: - return None - base = _normalize_openai_compatible_base_url( - settings.eval_judge_base_url, - fallback="https://open.bigmodel.cn/api/paas/v4", - ) - model = want or (settings.eval_judge_model or "glm-5") - ctx = int(settings.eval_judge_context_window_tokens) - return EvalJudgeLlmSpec( - llm=ChatOpenAI( - api_key=api_key, - base_url=base, - model=model, - temperature=settings.eval_judge_temperature, - ), - provider="zhipu", - resolved_model=model, - context_window_tokens=ctx, - ) + return build_deepseek_eval_judge_spec(judge_model) + return build_zhipu_eval_judge_spec(judge_model) def get_eval_judge_langchain_llm(): diff --git a/api/app/core/eval_judge_spec.py b/api/app/core/eval_judge_spec.py new file mode 100644 index 0000000..45190bd --- /dev/null +++ b/api/app/core/eval_judge_spec.py @@ -0,0 +1,18 @@ +"""评测台评审 LLM 装配契约(与 OpenAI/Chat Completions 兼容的供应商解耦于适配器层)。""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, Literal + +EvalJudgeProvider = Literal["zhipu", "deepseek"] + + +@dataclass(slots=True, frozen=True) +class EvalJudgeLlmSpec: + """智谱 / DeepSeek 等:装配结果供 dependencies 与评测服务使用。""" + + llm: Any | None + provider: EvalJudgeProvider + resolved_model: str + context_window_tokens: int diff --git a/api/app/core/llm_call.py b/api/app/core/llm_call.py index 8116a05..4b4ae7b 100644 --- a/api/app/core/llm_call.py +++ b/api/app/core/llm_call.py @@ -28,6 +28,8 @@ from app.core.langchain_llm import ( bind_json_object_mode, ensure_json_object_prompt_has_json_keyword, ) +from app.core.llm_errors import LlmHttpErrorVendor, format_llm_http_error_message +from app.core.llm_http_openai_chat_errors import should_log_openai_error_as_warning from app.core.logging import get_logger logger = get_logger(__name__) @@ -100,9 +102,14 @@ _LLM_MSG_CONTENT_FILTER = ( ) -def _format_llm_invoke_error_message(exc: BaseException) -> str: +def _format_llm_invoke_error_message( + exc: BaseException, *, http_error_vendor: LlmHttpErrorVendor = "deepseek" +) -> str: if _is_content_filter_refusal(exc): return _LLM_MSG_CONTENT_FILTER + friendly = format_llm_http_error_message(exc, http_error_vendor) + if friendly is not None: + return friendly return str(exc) @@ -116,6 +123,11 @@ def _log_invoke_failure(*, agent: str, exc: BaseException, sync: bool) -> None: ) return tag = "llm_json_call" if sync else "allm_json_call" + if should_log_openai_error_as_warning(exc): + logger.bind(agent=agent).warning( + "{} provider http error: {}", tag, str(exc)[:800] + ) + return logger.bind(agent=agent).exception("{} invoke error: {}", tag, exc) @@ -273,6 +285,7 @@ def llm_json_call( agent: str, fallback_factory: Callable[[], T] | None = None, retry_empty: bool = True, + http_error_vendor: LlmHttpErrorVendor = "deepseek", ) -> T: """同步:invoke → 解析 JSON → `schema.model_validate`;失败时 `fallback_factory` 或 `LLMCallError`。""" t0 = time.perf_counter() @@ -353,7 +366,7 @@ def llm_json_call( return fallback_factory() raise LLMCallError( "invoke", - _format_llm_invoke_error_message(e), + _format_llm_invoke_error_message(e, http_error_vendor=http_error_vendor), raw_content=raw[:4096] if raw else None, ) from e @@ -367,6 +380,7 @@ async def allm_json_call( agent: str, fallback_factory: Callable[[], T] | None = None, retry_empty: bool = True, + http_error_vendor: LlmHttpErrorVendor = "deepseek", ) -> T: """异步版,语义与 `llm_json_call` 一致。""" t0 = time.perf_counter() @@ -447,7 +461,7 @@ async def allm_json_call( return fallback_factory() raise LLMCallError( "invoke", - _format_llm_invoke_error_message(e), + _format_llm_invoke_error_message(e, http_error_vendor=http_error_vendor), raw_content=raw[:4096] if raw else None, ) from e @@ -455,6 +469,7 @@ async def allm_json_call( __all__ = [ "LLMCallError", "LLMCallMeta", + "LlmHttpErrorVendor", "allm_json_call", "llm_json_call", ] diff --git a/api/app/core/llm_errors/__init__.py b/api/app/core/llm_errors/__init__.py new file mode 100644 index 0000000..4f53ad1 --- /dev/null +++ b/api/app/core/llm_errors/__init__.py @@ -0,0 +1,32 @@ +"""大模型各供应商的 HTTP 错讯文案与文档链接。""" + +from __future__ import annotations + +from app.core.eval_judge_spec import EvalJudgeProvider +from app.core.llm_http_openai_chat_errors import format_openai_shape_http_error_message + +LlmHttpErrorVendor = EvalJudgeProvider | None + +__all__ = ["LlmHttpErrorVendor", "format_llm_http_error_message"] + + +def format_llm_http_error_message( + exc: BaseException, + vendor: LlmHttpErrorVendor, +) -> str | None: + """ + 先经 OpenAI-Chat-Completions 传输层解析,再按供应商加品牌与官方文档链。 + ``vendor is None`` 时仅返回中性传输层说明。 + """ + base = format_openai_shape_http_error_message(exc) + if base is None: + return None + if vendor is None: + return base + if vendor == "deepseek": + from app.core.llm_errors import deepseek as deepseek_meta + + return f"【{deepseek_meta.BRAND}】{base}({deepseek_meta.ERROR_CODES_DOC_URL})" + from app.core.llm_errors import zhipu as zhipu_meta + + return f"【{zhipu_meta.BRAND}】{base}({zhipu_meta.ERROR_CODES_DOC_URL})" diff --git a/api/app/core/llm_errors/deepseek.py b/api/app/core/llm_errors/deepseek.py new file mode 100644 index 0000000..4e58953 --- /dev/null +++ b/api/app/core/llm_errors/deepseek.py @@ -0,0 +1,4 @@ +"""DeepSeek API 错讯元数据(与传输层解耦)。""" + +BRAND = "DeepSeek" +ERROR_CODES_DOC_URL = "https://api-docs.deepseek.com/zh-cn/quick_start/error_codes" diff --git a/api/app/core/llm_errors/zhipu.py b/api/app/core/llm_errors/zhipu.py new file mode 100644 index 0000000..cd32d3c --- /dev/null +++ b/api/app/core/llm_errors/zhipu.py @@ -0,0 +1,5 @@ +"""智谱 BigModel 开放平台错讯元数据(与传输层解耦)。""" + +BRAND = "智谱 GLM" +# 开放平台「HTTP 状态码 / 鉴权与计费」等说明 +ERROR_CODES_DOC_URL = "https://docs.bigmodel.cn/cn/codinggude/notice" diff --git a/api/app/core/llm_http_openai_chat_errors.py b/api/app/core/llm_http_openai_chat_errors.py new file mode 100644 index 0000000..28a1fb9 --- /dev/null +++ b/api/app/core/llm_http_openai_chat_errors.py @@ -0,0 +1,108 @@ +""" +OpenAI Chat Completions 兼容 API 的 HTTP 传输层错讯(不绑定单一大模型厂商)。 + +解析 ``APIStatusError`` / ``httpx`` 状态、OpenAI 形 JSON body 中的 error.message。 +""" + +from __future__ import annotations + +import json +from collections.abc import Iterator + +import httpx +from openai import APIStatusError + +_CLIENT_RECOVERABLE_STATUSES = frozenset({400, 401, 402, 422, 429}) + + +def _iter_exception_chain(exc: BaseException) -> Iterator[BaseException]: + seen: set[int] = set() + cur: BaseException | None = exc + while cur is not None and id(cur) not in seen: + yield cur + seen.add(id(cur)) + cur = cur.__cause__ or cur.__context__ + + +def _detail_from_error_body(body: object | None, *, max_len: int = 400) -> str: + if body is None: + return "" + if isinstance(body, dict): + err = body.get("error") + if isinstance(err, dict): + msg = err.get("message") + if isinstance(msg, str) and msg.strip(): + return msg.strip()[:max_len] + msg2 = body.get("message") + if isinstance(msg2, str) and msg2.strip(): + return msg2.strip()[:max_len] + if isinstance(body, str) and body.strip(): + s = body.strip() + if len(s) > max_len: + return s[:max_len] + try: + return _detail_from_error_body(json.loads(s), max_len=max_len) + except json.JSONDecodeError: + return s[:max_len] + return "" + + +def _neutral_message_for_status(status: int) -> str: + m: dict[int, str] = { + 400: "请求体格式或字段不符合接口要求,请按返回说明修改(HTTP 400)。", + 401: "API 密钥错误或未通过认证,请检查密钥与 BASE_URL 配置(HTTP 401)。", + 402: "账户余额不足,请确认余额后充值再试(HTTP 402)。", + 403: "权限不足或密钥无权访问该资源(HTTP 403)。", + 404: "请求路径或资源不存在,请检查 BASE_URL/路由(HTTP 404)。", + 409: "与当前资源状态冲突,请重试或联系管理员(HTTP 409)。", + 422: "请求参数不合法,请按错误信息调整参数(HTTP 422)。", + 429: "请求速率达到上限(如 TPM/RPM),请降低频率或稍后再试(HTTP 429)。", + 500: "大模型服务内部故障,请等待后重试;若持续出现请联系服务商(HTTP 500)。", + 502: "网关在从上游大模型取数时失败,请稍后重试(HTTP 502)。", + 503: "大模型服务繁忙或暂不可用,请稍后重试(HTTP 503)。", + 504: "大模型或网关响应超时,请稍后重试(HTTP 504)。", + } + if status in m: + return m[status] + if 400 <= status < 500: + return f"大模型服务返回客户端错误(HTTP {status}),请根据返回信息修正后重试。" + if 500 <= status < 600: + return f"大模型服务返回服务端错误(HTTP {status}),请稍后重试或联系管理员。" + return f"大模型服务返回错误(HTTP {status})。" + + +def extract_openai_http_status(exc: BaseException) -> int | None: + """从异常链中取出 OpenAI SDK / httpx 的 HTTP 状态码,若无则 None。""" + for e in _iter_exception_chain(exc): + if isinstance(e, APIStatusError): + return int(e.status_code) + if isinstance(e, httpx.HTTPStatusError) and e.response is not None: + return int(e.response.status_code) + return None + + +def should_log_openai_error_as_warning(exc: BaseException) -> bool: + """4xx/429/402 等可预期客户端问题,用 warning 少打 ERROR 堆栈。""" + s = extract_openai_http_status(exc) + return s in _CLIENT_RECOVERABLE_STATUSES if s is not None else False + + +def format_openai_shape_http_error_message(exc: BaseException) -> str | None: + """可识别为 OpenAI/httpx 的 HTTP 错误时返回中性中文说明;否则 None。""" + for e in _iter_exception_chain(exc): + if isinstance(e, APIStatusError): + st = int(e.status_code) + base = _neutral_message_for_status(st) + detail = _detail_from_error_body(e.body) or (e.message or "").strip() + if ( + detail + and len(detail) <= 500 + and detail not in base + and "HTTP" not in detail[:80] + ): + return f"{base} 服务商说明:{detail}" + return base + if isinstance(e, httpx.HTTPStatusError) and e.response is not None: + st = int(e.response.status_code) + return _neutral_message_for_status(st) + return None diff --git a/api/app/core/openai_compatible_errors.py b/api/app/core/openai_compatible_errors.py new file mode 100644 index 0000000..001a0f4 --- /dev/null +++ b/api/app/core/openai_compatible_errors.py @@ -0,0 +1,15 @@ +"""兼容旧 import:实现已迁至 `llm_http_openai_chat_errors` / `llm_errors`。""" + +from app.core.llm_http_openai_chat_errors import ( + extract_openai_http_status, + should_log_openai_error_as_warning, +) +from app.core.llm_http_openai_chat_errors import ( + format_openai_shape_http_error_message as format_openai_compatible_http_error_message, +) + +__all__ = [ + "extract_openai_http_status", + "format_openai_compatible_http_error_message", + "should_log_openai_error_as_warning", +] diff --git a/api/app/features/evaluation/judge_manual_service.py b/api/app/features/evaluation/judge_manual_service.py index 01c0b43..9f02da9 100644 --- a/api/app/features/evaluation/judge_manual_service.py +++ b/api/app/features/evaluation/judge_manual_service.py @@ -71,6 +71,7 @@ def _make_eval_judge( EvalJudgeService( spec.llm, context_window_tokens=spec.context_window_tokens, + http_error_vendor=spec.provider, ), spec.resolved_model, ) diff --git a/api/app/features/evaluation/judge_service.py b/api/app/features/evaluation/judge_service.py index 2b93758..f8d7342 100644 --- a/api/app/features/evaluation/judge_service.py +++ b/api/app/features/evaluation/judge_service.py @@ -7,6 +7,7 @@ from dataclasses import dataclass from typing import Any, Generic, TypeVar from app.core.config import settings +from app.core.eval_judge_spec import EvalJudgeProvider from app.core.llm_call import LLMCallError, allm_json_call from app.core.logging import get_logger from app.features.evaluation.judge_schemas import ( @@ -284,8 +285,10 @@ class EvalJudgeService: judge_llm: Any | None, *, context_window_tokens: int | None = None, + http_error_vendor: EvalJudgeProvider = "deepseek", ) -> None: self._llm = judge_llm + self._http_error_vendor: EvalJudgeProvider = http_error_vendor self._ctx_tokens = int( context_window_tokens or settings.eval_judge_context_window_tokens ) @@ -329,6 +332,7 @@ class EvalJudgeService: TurnJudgeOutput, max_tokens=_TURN_MAX, agent="EvalJudgeService.judge_turn", + http_error_vendor=self._http_error_vendor, ) except LLMCallError as e: logger.warning("turn judge failed: {}", e) @@ -354,6 +358,7 @@ class EvalJudgeService: ConversationJudgeOutput, max_tokens=_CONV_JUDGE_JSON_MAX, agent="EvalJudgeService.judge_conversation", + http_error_vendor=self._http_error_vendor, ) return JudgeCallResult(output=out) except LLMCallError as e: @@ -496,6 +501,7 @@ class EvalJudgeService: 512, int(settings.eval_judge_memoir_completion_max_tokens) ), agent="EvalJudgeService.judge_memoir", + http_error_vendor=self._http_error_vendor, ) return JudgeCallResult(output=out) except LLMCallError as e: diff --git a/api/tests/test_eval_judge_llm_spec.py b/api/tests/test_eval_judge_llm_spec.py index 6b06daf..f48c4c9 100644 --- a/api/tests/test_eval_judge_llm_spec.py +++ b/api/tests/test_eval_judge_llm_spec.py @@ -51,7 +51,8 @@ def test_build_eval_judge_deepseek_context_budget( spec = build_eval_judge_llm_spec("deepseek", None) assert spec is not None assert spec.provider == "deepseek" - assert spec.resolved_model == "deepseek-reasoner" + # 旧名 deepseek-reasoner 规范为 v4-flash 思考模式 + assert spec.resolved_model == "deepseek-v4-flash" assert spec.context_window_tokens == 64_000 n = eval_judge_conversation_transcript_max_chars_for_context(64_000) glm_n = eval_judge_conversation_transcript_max_chars_for_context(200_000) diff --git a/api/tests/test_llm_json_call.py b/api/tests/test_llm_json_call.py index ae8c729..59af178 100644 --- a/api/tests/test_llm_json_call.py +++ b/api/tests/test_llm_json_call.py @@ -2,8 +2,10 @@ from __future__ import annotations +import httpx import pytest from langchain_core.messages import AIMessage +from openai import APIStatusError from pydantic import BaseModel, Field from app.core.langchain_llm import ensure_json_object_prompt_has_json_keyword @@ -128,6 +130,41 @@ def test_llm_json_call_no_fallback_raises() -> None: assert ei.value.kind == "validation" +def _api_status_402() -> APIStatusError: + req = httpx.Request("POST", "https://api.deepseek.com/v1/chat/completions") + resp = httpx.Response( + 402, request=req, json={"error": {"message": "Insufficient balance"}} + ) + return APIStatusError("Payment required", response=resp, body=resp.json()) + + +class _LlmInvokeRaises: + def bind(self, **_kwargs: object): + return self + + def invoke(self, _prompt: str) -> object: + raise _api_status_402() + + async def ainvoke(self, _prompt: str) -> object: + return self.invoke(_prompt) + + +def test_llm_json_call_openai_status_maps_to_friendly_chinese() -> None: + with pytest.raises(LLMCallError) as ei: + llm_json_call( + _LlmInvokeRaises(), + "p", + _SmallOut, + max_tokens=8, + agent="t", + ) + assert ei.value.kind == "invoke" + s = str(ei.value) + assert "402" in s + assert "余额" in s + assert "DeepSeek" in s + + @pytest.mark.asyncio async def test_allm_json_call_parity_with_sync() -> None: llm = _SyncFakeLlm(['{"answer": "async", "score": 7}']) diff --git a/api/tests/test_llm_vendor_errors.py b/api/tests/test_llm_vendor_errors.py new file mode 100644 index 0000000..3aec62b --- /dev/null +++ b/api/tests/test_llm_vendor_errors.py @@ -0,0 +1,36 @@ +"""供应商层 HTTP 错讯:DeepSeek / 智谱 品牌与文档链。""" + +import httpx +from openai import APIStatusError + +from app.core.llm_errors import format_llm_http_error_message + + +def _status_402() -> APIStatusError: + req = httpx.Request("POST", "https://x/v1/chat/completions") + resp = httpx.Response(402, request=req, json={"error": {"message": "x"}}) + return APIStatusError("u", response=resp, body=resp.json()) + + +def test_vendor_deepseek_includes_brand_and_doc() -> None: + e = _status_402() + m = format_llm_http_error_message(e, "deepseek") + assert m is not None + assert "DeepSeek" in m + assert "api-docs.deepseek.com" in m + + +def test_vendor_zhipu_includes_brand_and_doc() -> None: + e = _status_402() + m = format_llm_http_error_message(e, "zhipu") + assert m is not None + assert "智谱" in m + assert "bigmodel" in m + + +def test_vendor_none_is_transport_only() -> None: + e = _status_402() + m = format_llm_http_error_message(e, None) + assert m is not None + assert "【" not in m + assert "402" in m diff --git a/api/tests/test_openai_compatible_errors.py b/api/tests/test_openai_compatible_errors.py new file mode 100644 index 0000000..ca4a323 --- /dev/null +++ b/api/tests/test_openai_compatible_errors.py @@ -0,0 +1,68 @@ +"""传输层 `llm_http_openai_chat_errors` 的中性错讯;兼容 re-export 仍经 openai_compatible_errors。""" + +import httpx +import pytest +from openai import APIStatusError + +from app.core.openai_compatible_errors import ( + extract_openai_http_status, + format_openai_compatible_http_error_message, + should_log_openai_error_as_warning, +) + + +def _status_error(status: int, *, body: object | None = None) -> APIStatusError: + req = httpx.Request("POST", "https://api.deepseek.com/v1/chat/completions") + resp = httpx.Response(status, request=req, json=body if body is not None else {}) + return APIStatusError("upstream", response=resp, body=body) + + +def test_extract_status_from_api_status_error() -> None: + e = _status_error(429) + assert extract_openai_http_status(e) == 429 + + +def test_format_402_balance_chinese_message() -> None: + e = _status_error( + 402, + body={"error": {"message": "Insufficient balance", "type": "insufficient_quota"}}, + ) + msg = format_openai_compatible_http_error_message(e) + assert msg is not None + assert "402" in msg + assert "余额" in msg + + +def test_format_401_and_warning_flag() -> None: + e = _status_error(401, body={"error": {"message": "invalid api key"}}) + assert should_log_openai_error_as_warning(e) is True + m = format_openai_compatible_http_error_message(e) + assert m is not None + assert "401" in m + assert "密钥" in m + + +def test_format_503_server_busy() -> None: + e = _status_error(503) + m = format_openai_compatible_http_error_message(e) + assert m is not None + assert "503" in m + assert should_log_openai_error_as_warning(e) is False + + +def test_format_httpx_http_status_error() -> None: + req = httpx.Request("GET", "https://api.deepseek.com/v1/models") + resp = httpx.Response(429, request=req) + try: + resp.raise_for_status() + except httpx.HTTPStatusError as e: + m = format_openai_compatible_http_error_message(e) + assert m is not None + assert "429" in m + + +def test_unknown_status_418() -> None: + e = _status_error(418) + m = format_openai_compatible_http_error_message(e) + assert m is not None + assert "418" in m diff --git a/app-eval-web/src/pages/MemoirPage.tsx b/app-eval-web/src/pages/MemoirPage.tsx index 59d448f..00835af 100644 --- a/app-eval-web/src/pages/MemoirPage.tsx +++ b/app-eval-web/src/pages/MemoirPage.tsx @@ -358,8 +358,8 @@ export default function MemoirPage() { style={{ minWidth: "min(100%, 220px)" }} > - - + + diff --git a/app-eval-web/src/pages/PlaygroundPage.tsx b/app-eval-web/src/pages/PlaygroundPage.tsx index 011dc2b..6c8eca3 100644 --- a/app-eval-web/src/pages/PlaygroundPage.tsx +++ b/app-eval-web/src/pages/PlaygroundPage.tsx @@ -1147,7 +1147,7 @@ export default function PlaygroundPage() { aria-label="评测评审模型供应商" > - +