配置 SSOT(TOML + .env) 统一错误契约 Auth 与事务边界 Redis / Celery 可靠性:业务 Redis(DB/0)与 Celery broker/backend(DB/1)显式拆分;连接池、sync client 可观测性(OpenTelemetry + LGTM)
41 lines
1.1 KiB
Bash
Executable File
41 lines
1.1 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# 校验本地 Prometheus 是否已暴露 OTel 导出指标(需 observability compose 运行中)。
|
|
set -euo pipefail
|
|
|
|
PROM_URL="${PROMETHEUS_URL:-http://127.0.0.1:49090}"
|
|
QUERY_ENDPOINT="${PROM_URL}/api/v1/query"
|
|
|
|
check_metric() {
|
|
local name="$1"
|
|
local result
|
|
result="$(curl -sf "${QUERY_ENDPOINT}?query=${name}" | python3 -c "
|
|
import json, sys
|
|
data = json.load(sys.stdin)
|
|
r = data.get('data', {}).get('result', [])
|
|
print('ok' if r else 'missing')
|
|
")"
|
|
if [[ "${result}" != "ok" ]]; then
|
|
echo "MISSING: ${name}"
|
|
return 1
|
|
fi
|
|
echo "OK: ${name}"
|
|
}
|
|
|
|
echo "Checking Prometheus at ${PROM_URL} ..."
|
|
fail=0
|
|
for m in \
|
|
"llm_call_duration_milliseconds_bucket" \
|
|
"llm_call_total" \
|
|
"business_operation_duration_milliseconds_bucket" \
|
|
"http_server_request_duration_seconds_bucket"
|
|
do
|
|
check_metric "${m}" || fail=1
|
|
done
|
|
|
|
if [[ "${fail}" -ne 0 ]]; then
|
|
echo ""
|
|
echo "Some metrics missing. Ensure config deploy.otel_enabled=true, observability compose running, API/worker up, and traffic generated."
|
|
exit 1
|
|
fi
|
|
echo "All required metrics present."
|