refactor(api): TOML 配置 SSOT、统一错误契约、Auth/事务加固与可观测性 (#33)

配置 SSOT(TOML + .env)
统一错误契约
Auth 与事务边界
Redis / Celery 可靠性:业务 Redis(DB/0)与 Celery broker/backend(DB/1)显式拆分;连接池、sync client
可观测性(OpenTelemetry + LGTM)
This commit is contained in:
Sully
2026-05-22 13:44:50 +08:00
committed by GitHub
parent f09ae248f9
commit 53e0065e3e
298 changed files with 15247 additions and 4344 deletions

View File

@@ -21,6 +21,8 @@ from app.features.evaluation.rubrics.conversation_v1 import (
TURN_JUDGE_INSTRUCTIONS,
)
from app.features.evaluation.rubrics.memoir_v1 import MEMOIR_JUDGE_INSTRUCTIONS
from app.features.evaluation.constants import eval_cfg
from app.features.memoir.constants import memoir
logger = get_logger(__name__)
@@ -38,23 +40,23 @@ def _eval_judge_prompt_char_pool_for_context(context_window_tokens: int) -> int:
"""整段请求的字符预算(由评审模型 context window 推导,保守)。"""
toks = (
int(context_window_tokens)
- settings.eval_judge_completion_reserve_tokens
- settings.eval_judge_prompt_budget_safety_tokens
- eval_cfg.judge_completion_reserve_tokens
- eval_cfg.judge_prompt_budget_safety_tokens
)
toks = max(1, toks)
return max(1, int(toks / settings.eval_judge_approx_tokens_per_char))
return max(1, int(toks / eval_cfg.judge_approx_tokens_per_char))
def _eval_judge_prompt_char_pool() -> int:
return _eval_judge_prompt_char_pool_for_context(
settings.eval_judge_context_window_tokens
eval_cfg.judge_context_window_tokens
)
def eval_judge_conversation_transcript_max_chars() -> int:
"""整段对话评审【完整对话】transcript 最大字符数(默认 GLM 上下文)。"""
if settings.eval_judge_max_transcript_chars > 0:
return settings.eval_judge_max_transcript_chars
if eval_cfg.judge_max_transcript_chars > 0:
return eval_cfg.judge_max_transcript_chars
overhead = len(CONV_JUDGE_INSTRUCTIONS) + len(_CONV_HEADER) + 32
return max(1, _eval_judge_prompt_char_pool() - overhead)
@@ -62,8 +64,8 @@ def eval_judge_conversation_transcript_max_chars() -> int:
def eval_judge_conversation_transcript_max_chars_for_context(
context_window_tokens: int,
) -> int:
if settings.eval_judge_max_transcript_chars > 0:
return settings.eval_judge_max_transcript_chars
if eval_cfg.judge_max_transcript_chars > 0:
return eval_cfg.judge_max_transcript_chars
overhead = len(CONV_JUDGE_INSTRUCTIONS) + len(_CONV_HEADER) + 32
pool = _eval_judge_prompt_char_pool_for_context(context_window_tokens)
return max(1, pool - overhead)
@@ -71,8 +73,8 @@ def eval_judge_conversation_transcript_max_chars_for_context(
def eval_judge_turn_prior_transcript_max_chars() -> int:
"""逐轮评审:截至上一轮的 transcript 节选上限(默认 GLM 上下文)。"""
if settings.eval_judge_max_transcript_chars > 0:
return settings.eval_judge_max_transcript_chars
if eval_cfg.judge_max_transcript_chars > 0:
return eval_cfg.judge_max_transcript_chars
static = len(TURN_JUDGE_INSTRUCTIONS) + 8800
return max(1, _eval_judge_prompt_char_pool() - static)
@@ -80,17 +82,17 @@ def eval_judge_turn_prior_transcript_max_chars() -> int:
def eval_judge_turn_prior_transcript_max_chars_for_context(
context_window_tokens: int,
) -> int:
if settings.eval_judge_max_transcript_chars > 0:
return settings.eval_judge_max_transcript_chars
if eval_cfg.judge_max_transcript_chars > 0:
return eval_cfg.judge_max_transcript_chars
static = len(TURN_JUDGE_INSTRUCTIONS) + 8800
pool = _eval_judge_prompt_char_pool_for_context(context_window_tokens)
return max(1, pool - static)
def eval_judge_compare_transcript_each_max_chars() -> int:
"""单侧对称参考上限(默认与 settings.eval_judge_context_window_tokens 一致)。"""
"""单侧对称参考上限(默认与 eval_cfg.judge_context_window_tokens 一致)。"""
return eval_judge_compare_transcript_each_max_chars_for_context(
settings.eval_judge_context_window_tokens
eval_cfg.judge_context_window_tokens
)
@@ -98,18 +100,18 @@ def eval_judge_compare_transcript_pair_total_budget_for_context(
context_window_tokens: int,
) -> int:
"""A/B 同 prompt 时,两份 transcript 合计最大字符数(已扣对比模板与双份 JSON 等开销)。"""
if settings.eval_judge_max_compare_transcript_chars_each > 0:
return max(1, 2 * int(settings.eval_judge_max_compare_transcript_chars_each))
if eval_cfg.judge_max_compare_transcript_chars_each > 0:
return max(1, 2 * int(eval_cfg.judge_max_compare_transcript_chars_each))
pool = _eval_judge_prompt_char_pool_for_context(context_window_tokens)
return max(1, pool - int(settings.eval_judge_compare_prompt_overhead_chars))
return max(1, pool - int(eval_cfg.judge_compare_prompt_overhead_chars))
def eval_judge_compare_transcript_each_max_chars_for_context(
context_window_tokens: int,
) -> int:
"""单侧对称上限的参考值auto 模式下约为合计预算的一半;供兼容与展示)。"""
if settings.eval_judge_max_compare_transcript_chars_each > 0:
return int(settings.eval_judge_max_compare_transcript_chars_each)
if eval_cfg.judge_max_compare_transcript_chars_each > 0:
return int(eval_cfg.judge_max_compare_transcript_chars_each)
total = eval_judge_compare_transcript_pair_total_budget_for_context(
context_window_tokens
)
@@ -120,7 +122,7 @@ def eval_judge_compare_bundle_caps(
context_window_tokens: int,
) -> tuple[int, int | None]:
"""返回 (compare_cap_total, per_side_cap|None),供 Playground 摘要与流式对比共用。"""
per = int(settings.eval_judge_max_compare_transcript_chars_each or 0)
per = int(eval_cfg.judge_max_compare_transcript_chars_each or 0)
if per > 0:
return max(1, 2 * per), per
return eval_judge_compare_transcript_pair_total_budget_for_context(
@@ -249,8 +251,8 @@ def _build_memoir_judge_prompt(
"若证据不足,须保守打分并写 `insufficient_evidence`。",
"",
]
ev_cap = max(1, int(settings.eval_judge_memoir_evidence_max_chars))
body_cap = max(1, int(settings.eval_judge_memoir_body_max_chars))
ev_cap = max(1, int(eval_cfg.judge_memoir_evidence_max_chars))
body_cap = max(1, int(eval_cfg.judge_memoir_body_max_chars))
if notes:
sections.extend(["【评审说明】", notes[:1200], ""])
if source:
@@ -290,7 +292,7 @@ class EvalJudgeService:
self._llm = judge_llm
self._http_error_vendor: EvalJudgeProvider = http_error_vendor
self._ctx_tokens = int(
context_window_tokens or settings.eval_judge_context_window_tokens
context_window_tokens or eval_cfg.judge_context_window_tokens
)
def _conv_transcript_cap(self) -> int:
@@ -382,7 +384,7 @@ class EvalJudgeService:
) -> AsyncIterator[str]:
"""流式输出中文对比与建议(非 JSON"""
if not self._llm:
yield "[错误] 未配置评审模型 API Key智谱eval_judge_api_key / zhipu_api_keyDeepSeekdeepseek_api_key"
yield "[错误] 未配置评审模型 API Key智谱ZHIPU_API_KEYDeepSeekDEEPSEEK_API_KEY"
return
cap_total, per_side = eval_judge_compare_bundle_caps(self._ctx_tokens)
cap_single = self._conv_transcript_cap()
@@ -507,7 +509,7 @@ class EvalJudgeService:
prompt,
MemoirJudgeOutput,
max_tokens=max(
512, int(settings.eval_judge_memoir_completion_max_tokens)
512, int(eval_cfg.judge_memoir_completion_max_tokens)
),
agent="EvalJudgeService.judge_memoir",
http_error_vendor=self._http_error_vendor,