feat: OpenTelemetry LGTM observability, dev tooling, and memoir UX fixes (#31)
* add staging ios app build script * feat(api): add OpenTelemetry LGTM stack for local observability Wire OTel traces, metrics, and logs through a collector to Tempo, Prometheus, and Loki, with custom LLM instrumentation, dev compose overlay, Grafana provisioning, env templates, and development.sh auto-start. Co-authored-by: Cursor <cursoragent@cursor.com> * feat: expand observability, harden dev tooling, and fix expo staging UX Add business and LLM Prometheus metrics with Grafana dashboards, alerting, and a metrics verification script. Wire telemetry through adapters and core LLM paths, and document the local LGTM workflow. Fix development.sh for macOS bash 3.2, open Grafana and eval-web in Chrome, and repair eval-web auto-open (unbound EVAL_WEB_BROWSER_SCHEDULED). Merge internal-eval into the main dev script with improved compose handling. Require EXPO_PUBLIC_* at build time, improve iOS HTTP ATS for staging IPs, show memoir empty state instead of load errors when no chapters exist, and add jest env setup plus chapter list response normalization. Co-authored-by: Cursor <cursoragent@cursor.com> * chore: enable Grafana Assistant Cursor plugin Co-authored-by: Cursor <cursoragent@cursor.com> * fix: memoir empty state and repair withdrawn 0020_chapters_book_id stamp Show empty memoir UI when the chapter list succeeds with no items; treat auth/404 as non-fatal. Extend alembic revision repair so local dev DBs stamped with the removed 0020_chapters_book_id migration can roll back and upgrade to 0019. Co-authored-by: Cursor <cursoragent@cursor.com> --------- Co-authored-by: Kevin <kevin@brighteng.org> Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -25,10 +25,19 @@ API_PORT="${API_PORT:-8000}"
|
||||
CELERY_POOL="${CELERY_POOL:-solo}"
|
||||
SKIP_INSTALL="${SKIP_INSTALL:-0}"
|
||||
SKIP_INFRA="${SKIP_INFRA:-0}"
|
||||
# 可观测性:空=若 .env 中 OTEL_ENABLED=true 则启动 compose;0=不启;1=强制启动
|
||||
START_OBSERVABILITY="${START_OBSERVABILITY:-}"
|
||||
SHUTDOWN_TIMEOUT="${SHUTDOWN_TIMEOUT:-12}"
|
||||
|
||||
# 由 internal-eval.sh 开启:在 main:app + Celery 之外再启 internal_main(:8001) 与 app-eval-web
|
||||
LIFE_ECHO_WITH_INTERNAL_EVAL="${LIFE_ECHO_WITH_INTERNAL_EVAL:-0}"
|
||||
# 与 docker-compose.observability.yml / .env.example 默认宿主机端口一致
|
||||
OTEL_GRPC_HOST_PORT="${OTEL_GRPC_HOST_PORT:-48317}"
|
||||
GRAFANA_HOST_PORT="${GRAFANA_HOST_PORT:-48300}"
|
||||
PROMETHEUS_HOST_PORT="${PROMETHEUS_HOST_PORT:-49090}"
|
||||
|
||||
# 默认一并启动 internal_main + app-eval-web(设 0 可仅主站)
|
||||
LIFE_ECHO_WITH_INTERNAL_EVAL="${LIFE_ECHO_WITH_INTERNAL_EVAL:-1}"
|
||||
# 自动用 Google Chrome 打开 Grafana / 评测 Web(勿用 Vite --open,避免落到 Safari)
|
||||
OPEN_OBSERVABILITY_UI="${OPEN_OBSERVABILITY_UI:-1}"
|
||||
# 若 :8000 已由其他 development 实例占用,仅附加 :8001 + 前端(需自备同一份 Celery/主站)
|
||||
EVAL_ATTACH_ONLY="${EVAL_ATTACH_ONLY:-0}"
|
||||
INTERNAL_EVAL_HOST="${INTERNAL_EVAL_HOST:-0.0.0.0}"
|
||||
@@ -43,6 +52,9 @@ INTERNAL_EVAL_PID=""
|
||||
EVAL_WEB_PID=""
|
||||
CLEANED_UP=0
|
||||
INFRA_STARTED=0
|
||||
OBSERVABILITY_STARTED=0
|
||||
OBSERVABILITY_BROWSER_SCHEDULED=0
|
||||
EVAL_WEB_BROWSER_SCHEDULED=0
|
||||
|
||||
print_header() {
|
||||
echo -e "\n${BLUE}========================================${NC}"
|
||||
@@ -62,6 +74,64 @@ print_err() {
|
||||
echo -e "${RED}✗ $1${NC}"
|
||||
}
|
||||
|
||||
open_browser_url() {
|
||||
local url="$1"
|
||||
if command -v open >/dev/null 2>&1 && [[ "$(uname -s)" == "Darwin" ]]; then
|
||||
if open -a "Google Chrome" "${url}" >/dev/null 2>&1; then
|
||||
return 0
|
||||
fi
|
||||
print_warn "未找到 Google Chrome,请手动打开: ${url}"
|
||||
return 1
|
||||
fi
|
||||
if command -v google-chrome >/dev/null 2>&1; then
|
||||
google-chrome "${url}" >/dev/null 2>&1 &
|
||||
return 0
|
||||
fi
|
||||
if command -v chromium-browser >/dev/null 2>&1; then
|
||||
chromium-browser "${url}" >/dev/null 2>&1 &
|
||||
return 0
|
||||
fi
|
||||
if command -v chromium >/dev/null 2>&1; then
|
||||
chromium "${url}" >/dev/null 2>&1 &
|
||||
return 0
|
||||
fi
|
||||
print_warn "未找到 Chrome/Chromium,请手动打开: ${url}"
|
||||
return 1
|
||||
}
|
||||
|
||||
schedule_observability_browser() {
|
||||
if [[ "${OPEN_OBSERVABILITY_UI}" != "1" ]] || [[ "${OBSERVABILITY_BROWSER_SCHEDULED}" == "1" ]]; then
|
||||
return 0
|
||||
fi
|
||||
OBSERVABILITY_BROWSER_SCHEDULED=1
|
||||
local grafana_url="http://127.0.0.1:${GRAFANA_HOST_PORT}"
|
||||
(
|
||||
sleep 4
|
||||
open_browser_url "${grafana_url}"
|
||||
) &
|
||||
print_ok "将自动打开 Grafana: ${grafana_url}"
|
||||
}
|
||||
|
||||
schedule_eval_web_browser() {
|
||||
if [[ "${OPEN_EVAL_WEB}" != "1" ]] || [[ "${EVAL_WEB_BROWSER_SCHEDULED:-0}" == "1" ]]; then
|
||||
return 0
|
||||
fi
|
||||
EVAL_WEB_BROWSER_SCHEDULED=1
|
||||
local eval_url="http://127.0.0.1:${EVAL_WEB_PORT}/"
|
||||
(
|
||||
local i=0
|
||||
while (( i < 30 )); do
|
||||
if is_port_listening "${EVAL_WEB_PORT}"; then
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
i=$((i + 1))
|
||||
done
|
||||
open_browser_url "${eval_url}"
|
||||
) &
|
||||
print_ok "将自动打开评测 Web (Chrome): ${eval_url}"
|
||||
}
|
||||
|
||||
is_pid_alive() {
|
||||
local pid="$1"
|
||||
[[ -n "${pid}" ]] && kill -0 "${pid}" 2>/dev/null
|
||||
@@ -147,11 +217,9 @@ cleanup() {
|
||||
fi
|
||||
|
||||
if [[ "${INFRA_STARTED}" == "1" ]]; then
|
||||
print_warn "正在停止 PostgreSQL / Redis 容器..."
|
||||
(
|
||||
cd "${ROOT_DIR}" && docker compose -f docker-compose.dev.yml stop
|
||||
) >/dev/null 2>&1 || true
|
||||
print_ok "PostgreSQL/Redis 容器已停止"
|
||||
print_warn "正在停止 Docker 基础设施..."
|
||||
docker_compose_cmd stop >/dev/null 2>&1 || true
|
||||
print_ok "Docker 容器已停止"
|
||||
fi
|
||||
}
|
||||
|
||||
@@ -163,12 +231,107 @@ require_cmd() {
|
||||
fi
|
||||
}
|
||||
|
||||
read_env_bool() {
|
||||
local key="$1"
|
||||
local default="${2:-0}"
|
||||
local line val
|
||||
|
||||
if [[ -n "${!key:-}" ]]; then
|
||||
val="${!key}"
|
||||
case "${val}" in
|
||||
1 | true | TRUE | yes | YES | on | ON) return 0 ;;
|
||||
*) return 1 ;;
|
||||
esac
|
||||
fi
|
||||
|
||||
if [[ ! -f "${ROOT_DIR}/.env" ]]; then
|
||||
[[ "${default}" == "1" ]]
|
||||
return
|
||||
fi
|
||||
|
||||
line="$(grep -E "^${key}=" "${ROOT_DIR}/.env" | tail -1 | cut -d= -f2- | tr -d '\r' | sed 's/^"//;s/"$//')"
|
||||
case "${line}" in
|
||||
1 | true | TRUE | yes | YES | on | ON) return 0 ;;
|
||||
*) [[ "${default}" == "1" ]] ;;
|
||||
esac
|
||||
}
|
||||
|
||||
should_start_observability() {
|
||||
case "${START_OBSERVABILITY}" in
|
||||
0 | false | FALSE | no | NO | off | OFF) return 1 ;;
|
||||
1 | true | TRUE | yes | YES | on | ON) return 0 ;;
|
||||
esac
|
||||
read_env_bool "OTEL_ENABLED" "0"
|
||||
}
|
||||
|
||||
docker_compose_cmd() {
|
||||
# 统一 compose -f,兼容 macOS 自带 bash 3.2(勿用 local -n / local arr=(-f …))
|
||||
if should_start_observability; then
|
||||
(cd "${ROOT_DIR}" && docker compose \
|
||||
-f docker-compose.dev.yml \
|
||||
-f docker-compose.observability.yml \
|
||||
"$@")
|
||||
return
|
||||
fi
|
||||
if [[ "$1" == "up" ]]; then
|
||||
(cd "${ROOT_DIR}" && docker compose -f docker-compose.dev.yml "$@" --remove-orphans)
|
||||
else
|
||||
(cd "${ROOT_DIR}" && docker compose -f docker-compose.dev.yml "$@")
|
||||
fi
|
||||
}
|
||||
|
||||
wait_otel_collector_ready() {
|
||||
local retries="${1:-30}"
|
||||
local i=0
|
||||
while (( i < retries )); do
|
||||
if is_port_listening "${OTEL_GRPC_HOST_PORT}"; then
|
||||
return 0
|
||||
fi
|
||||
sleep 1
|
||||
i=$((i + 1))
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
check_otel_collector_ready() {
|
||||
if ! read_env_bool "OTEL_ENABLED" "0"; then
|
||||
return 0
|
||||
fi
|
||||
if is_port_listening "${OTEL_GRPC_HOST_PORT}"; then
|
||||
print_ok "OTel Collector 端口已监听 (:${OTEL_GRPC_HOST_PORT})"
|
||||
return 0
|
||||
fi
|
||||
if [[ "${OBSERVABILITY_STARTED}" == "1" ]]; then
|
||||
print_warn "等待 OTel Collector 端口 :${OTEL_GRPC_HOST_PORT} …"
|
||||
if wait_otel_collector_ready 45; then
|
||||
print_ok "OTel Collector 端口已监听 (:${OTEL_GRPC_HOST_PORT})"
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
print_warn "OTEL_ENABLED=true 但 :${OTEL_GRPC_HOST_PORT} 未监听"
|
||||
print_warn "请确认本次启动日志中有「启动可观测性栈」;或手动执行:"
|
||||
print_warn " docker compose -f docker-compose.dev.yml -f docker-compose.observability.yml up -d"
|
||||
print_warn "不需要可观测性时在 .env.development 设 OTEL_ENABLED=false"
|
||||
return 1
|
||||
}
|
||||
|
||||
start_infra() {
|
||||
print_header "启动 PostgreSQL 和 Redis"
|
||||
cd "${ROOT_DIR}"
|
||||
docker compose -f docker-compose.dev.yml up -d
|
||||
if should_start_observability; then
|
||||
print_header "启动 PostgreSQL、Redis 与可观测性栈 (OTel / Grafana LGTM)"
|
||||
OBSERVABILITY_STARTED=1
|
||||
else
|
||||
print_header "启动 PostgreSQL 和 Redis"
|
||||
fi
|
||||
docker_compose_cmd up -d
|
||||
INFRA_STARTED=1
|
||||
print_ok "PostgreSQL 127.0.0.1:48291,Redis 127.0.0.1:48307(见 docker-compose.dev.yml / .env.example)"
|
||||
if [[ "${OBSERVABILITY_STARTED}" == "1" ]]; then
|
||||
print_ok "Grafana http://127.0.0.1:${GRAFANA_HOST_PORT} (admin/admin)"
|
||||
print_ok "Prometheus http://127.0.0.1:${PROMETHEUS_HOST_PORT}"
|
||||
print_ok "OTLP gRPC 127.0.0.1:${OTEL_GRPC_HOST_PORT}(应用读 .env 中 OTEL_*,无需 export)"
|
||||
print_ok "详见 docs/observability.md"
|
||||
schedule_observability_browser
|
||||
fi
|
||||
print_ok "基础设施已就绪"
|
||||
}
|
||||
|
||||
@@ -467,19 +630,15 @@ start_eval_web() {
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local vite_extra=()
|
||||
if [[ "${OPEN_EVAL_WEB}" == "1" ]]; then
|
||||
vite_extra+=(--open)
|
||||
fi
|
||||
|
||||
(
|
||||
cd "${EVAL_WEB_DIR}"
|
||||
VITE_EVAL_API_KEY="${api_key}" \
|
||||
VITE_EVAL_PROXY_TARGET="http://127.0.0.1:${INTERNAL_EVAL_PORT}" \
|
||||
npm run dev -- --host 127.0.0.1 --port "${EVAL_WEB_PORT}" "${vite_extra[@]}"
|
||||
npm run dev -- --host 127.0.0.1 --port "${EVAL_WEB_PORT}"
|
||||
) &
|
||||
EVAL_WEB_PID=$!
|
||||
print_ok "eval-web 已启动 (PID: ${EVAL_WEB_PID}) → http://127.0.0.1:${EVAL_WEB_PORT}/"
|
||||
schedule_eval_web_browser
|
||||
}
|
||||
|
||||
start_internal_eval_http() {
|
||||
@@ -493,7 +652,8 @@ start_internal_eval_http() {
|
||||
exit 1
|
||||
fi
|
||||
|
||||
"${UVICORN_BIN}" app.internal_main:internal_app --reload \
|
||||
OTEL_SERVICE_NAME="${INTERNAL_EVAL_OTEL_SERVICE_NAME:-life-echo-internal-api}" \
|
||||
"${UVICORN_BIN}" app.internal_main:internal_app --reload \
|
||||
--reload-exclude 'alembic/**' \
|
||||
--reload-exclude 'alembic.ini' \
|
||||
--host "${INTERNAL_EVAL_HOST}" --port "${INTERNAL_EVAL_PORT}" &
|
||||
@@ -547,7 +707,7 @@ start_services() {
|
||||
fi
|
||||
|
||||
if [[ "${skip_main}" == "1" ]] && [[ "${LIFE_ECHO_WITH_INTERNAL_EVAL}" != "1" ]]; then
|
||||
print_err "EVAL_ATTACH_ONLY=1 仅用于在已有主站时附加内部评测;请使用 ./internal-eval.sh 或导出 LIFE_ECHO_WITH_INTERNAL_EVAL=1"
|
||||
print_err "EVAL_ATTACH_ONLY=1 仅用于在已有主站时附加内部评测;请设置 LIFE_ECHO_WITH_INTERNAL_EVAL=1"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -601,14 +761,27 @@ start_services() {
|
||||
echo "主站文档: http://localhost:${API_PORT}/docs"
|
||||
echo "健康检查: http://localhost:${API_PORT}/health"
|
||||
fi
|
||||
if [[ "${LIFE_ECHO_WITH_INTERNAL_EVAL}" == "1" ]]; then
|
||||
echo "评测 Web UI: http://127.0.0.1:${EVAL_WEB_PORT}/"
|
||||
echo "内部评测 API: http://127.0.0.1:${INTERNAL_EVAL_PORT}/health"
|
||||
fi
|
||||
if read_env_bool "OTEL_ENABLED" "0"; then
|
||||
echo "可观测性: Grafana http://127.0.0.1:${GRAFANA_HOST_PORT} | Prometheus http://127.0.0.1:${PROMETHEUS_HOST_PORT}"
|
||||
if is_port_listening "${GRAFANA_HOST_PORT}"; then
|
||||
schedule_observability_browser
|
||||
fi
|
||||
fi
|
||||
if [[ "${LIFE_ECHO_WITH_INTERNAL_EVAL}" == "1" ]] && is_pid_alive "${EVAL_WEB_PID}"; then
|
||||
schedule_eval_web_browser
|
||||
fi
|
||||
echo "按 Ctrl+C 停止所有进程"
|
||||
}
|
||||
|
||||
main() {
|
||||
if [[ "${LIFE_ECHO_WITH_INTERNAL_EVAL}" == "1" ]]; then
|
||||
print_header "Life Echo 开发环境 + 内部评测(主站 + :${INTERNAL_EVAL_PORT} + Eval Web)"
|
||||
print_header "Life Echo 开发环境(主站 + 内部评测 + 可观测性)"
|
||||
else
|
||||
print_header "Life Echo 开发环境一键启动"
|
||||
print_header "Life Echo 开发环境一键启动(无内部评测)"
|
||||
fi
|
||||
|
||||
require_cmd "uv"
|
||||
@@ -618,16 +791,22 @@ main() {
|
||||
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
ensure_venv
|
||||
# 必须在 start_infra 之前同步,否则 should_start_observability 读不到 .env.development 里的 OTEL_ENABLED
|
||||
ensure_dotenv_from_development
|
||||
|
||||
if [[ "${SKIP_INFRA}" != "1" ]]; then
|
||||
start_infra
|
||||
wait_postgres_ready || true
|
||||
else
|
||||
print_warn "已跳过 docker 基础设施 (SKIP_INFRA=1)"
|
||||
if should_start_observability; then
|
||||
print_warn "SKIP_INFRA=1 未自动启动 observability;若需 LGTM 请手动 docker compose up observability overlay"
|
||||
fi
|
||||
fi
|
||||
|
||||
ensure_venv
|
||||
ensure_dotenv_from_development
|
||||
check_env_file
|
||||
check_otel_collector_ready || true
|
||||
wait_host_infra_ready
|
||||
run_migrations
|
||||
start_services
|
||||
|
||||
Reference in New Issue
Block a user