* add staging ios app build script * feat(api): add OpenTelemetry LGTM stack for local observability Wire OTel traces, metrics, and logs through a collector to Tempo, Prometheus, and Loki, with custom LLM instrumentation, dev compose overlay, Grafana provisioning, env templates, and development.sh auto-start. * feat: expand observability, harden dev tooling, and fix expo staging UX Add business and LLM Prometheus metrics with Grafana dashboards, alerting, and a metrics verification script. Wire telemetry through adapters and core LLM paths, and document the local LGTM workflow. Fix development.sh for macOS bash 3.2, open Grafana and eval-web in Chrome, and repair eval-web auto-open (unbound EVAL_WEB_BROWSER_SCHEDULED). Merge internal-eval into the main dev script with improved compose handling. Require EXPO_PUBLIC_* at build time, improve iOS HTTP ATS for staging IPs, show memoir empty state instead of load errors when no chapters exist, and add jest env setup plus chapter list response normalization. * chore: enable Grafana Assistant Cursor plugin * fix: memoir empty state and repair withdrawn 0020_chapters_book_id stamp Show empty memoir UI when the chapter list succeeds with no items; treat auth/404 as non-fatal. Extend alembic revision repair so local dev DBs stamped with the removed 0020_chapters_book_id migration can roll back and upgrade to 0019. --------- Co-authored-by: Kevin <kevin@brighteng.org> Co-authored-by: Cursor <cursoragent@cursor.com>
827 lines
24 KiB
Bash
Executable File
827 lines
24 KiB
Bash
Executable File
#!/usr/bin/env bash
|
||
|
||
set -euo pipefail
|
||
|
||
RED='\033[0;31m'
|
||
GREEN='\033[0;32m'
|
||
YELLOW='\033[1;33m'
|
||
BLUE='\033[0;34m'
|
||
NC='\033[0m'
|
||
|
||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||
REPO_ROOT="$(cd "${ROOT_DIR}/.." && pwd)"
|
||
EVAL_WEB_DIR="${REPO_ROOT}/app-eval-web"
|
||
VENV_DIR="${ROOT_DIR}/.venv"
|
||
PYTHON_BIN="${VENV_DIR}/bin/python"
|
||
UVICORN_BIN="${VENV_DIR}/bin/uvicorn"
|
||
CELERY_BIN="${VENV_DIR}/bin/celery"
|
||
|
||
# 本地全栈:默认可在 API 启动时 purge Celery 队列;生产请勿使用此脚本
|
||
export APP_ENV="${APP_ENV:-development}"
|
||
export CELERY_PURGE_BROKER_ON_STARTUP="${CELERY_PURGE_BROKER_ON_STARTUP:-1}"
|
||
|
||
API_HOST="${API_HOST:-0.0.0.0}"
|
||
API_PORT="${API_PORT:-8000}"
|
||
CELERY_POOL="${CELERY_POOL:-solo}"
|
||
SKIP_INSTALL="${SKIP_INSTALL:-0}"
|
||
SKIP_INFRA="${SKIP_INFRA:-0}"
|
||
# 可观测性:空=若 .env 中 OTEL_ENABLED=true 则启动 compose;0=不启;1=强制启动
|
||
START_OBSERVABILITY="${START_OBSERVABILITY:-}"
|
||
SHUTDOWN_TIMEOUT="${SHUTDOWN_TIMEOUT:-12}"
|
||
|
||
# 与 docker-compose.observability.yml / .env.example 默认宿主机端口一致
|
||
OTEL_GRPC_HOST_PORT="${OTEL_GRPC_HOST_PORT:-48317}"
|
||
GRAFANA_HOST_PORT="${GRAFANA_HOST_PORT:-48300}"
|
||
PROMETHEUS_HOST_PORT="${PROMETHEUS_HOST_PORT:-49090}"
|
||
|
||
# 默认一并启动 internal_main + app-eval-web(设 0 可仅主站)
|
||
LIFE_ECHO_WITH_INTERNAL_EVAL="${LIFE_ECHO_WITH_INTERNAL_EVAL:-1}"
|
||
# 自动用 Google Chrome 打开 Grafana / 评测 Web(勿用 Vite --open,避免落到 Safari)
|
||
OPEN_OBSERVABILITY_UI="${OPEN_OBSERVABILITY_UI:-1}"
|
||
# 若 :8000 已由其他 development 实例占用,仅附加 :8001 + 前端(需自备同一份 Celery/主站)
|
||
EVAL_ATTACH_ONLY="${EVAL_ATTACH_ONLY:-0}"
|
||
INTERNAL_EVAL_HOST="${INTERNAL_EVAL_HOST:-0.0.0.0}"
|
||
INTERNAL_EVAL_PORT="${INTERNAL_EVAL_PORT:-7999}"
|
||
START_EVAL_WEB="${START_EVAL_WEB:-1}"
|
||
OPEN_EVAL_WEB="${OPEN_EVAL_WEB:-1}"
|
||
EVAL_WEB_PORT="${EVAL_WEB_PORT:-5174}"
|
||
|
||
API_PID=""
|
||
CELERY_PID=""
|
||
INTERNAL_EVAL_PID=""
|
||
EVAL_WEB_PID=""
|
||
CLEANED_UP=0
|
||
INFRA_STARTED=0
|
||
OBSERVABILITY_STARTED=0
|
||
OBSERVABILITY_BROWSER_SCHEDULED=0
|
||
EVAL_WEB_BROWSER_SCHEDULED=0
|
||
|
||
print_header() {
|
||
echo -e "\n${BLUE}========================================${NC}"
|
||
echo -e "${BLUE}$1${NC}"
|
||
echo -e "${BLUE}========================================${NC}"
|
||
}
|
||
|
||
print_ok() {
|
||
echo -e "${GREEN}✓ $1${NC}"
|
||
}
|
||
|
||
print_warn() {
|
||
echo -e "${YELLOW}⚠ $1${NC}"
|
||
}
|
||
|
||
print_err() {
|
||
echo -e "${RED}✗ $1${NC}"
|
||
}
|
||
|
||
open_browser_url() {
|
||
local url="$1"
|
||
if command -v open >/dev/null 2>&1 && [[ "$(uname -s)" == "Darwin" ]]; then
|
||
if open -a "Google Chrome" "${url}" >/dev/null 2>&1; then
|
||
return 0
|
||
fi
|
||
print_warn "未找到 Google Chrome,请手动打开: ${url}"
|
||
return 1
|
||
fi
|
||
if command -v google-chrome >/dev/null 2>&1; then
|
||
google-chrome "${url}" >/dev/null 2>&1 &
|
||
return 0
|
||
fi
|
||
if command -v chromium-browser >/dev/null 2>&1; then
|
||
chromium-browser "${url}" >/dev/null 2>&1 &
|
||
return 0
|
||
fi
|
||
if command -v chromium >/dev/null 2>&1; then
|
||
chromium "${url}" >/dev/null 2>&1 &
|
||
return 0
|
||
fi
|
||
print_warn "未找到 Chrome/Chromium,请手动打开: ${url}"
|
||
return 1
|
||
}
|
||
|
||
schedule_observability_browser() {
|
||
if [[ "${OPEN_OBSERVABILITY_UI}" != "1" ]] || [[ "${OBSERVABILITY_BROWSER_SCHEDULED}" == "1" ]]; then
|
||
return 0
|
||
fi
|
||
OBSERVABILITY_BROWSER_SCHEDULED=1
|
||
local grafana_url="http://127.0.0.1:${GRAFANA_HOST_PORT}"
|
||
(
|
||
sleep 4
|
||
open_browser_url "${grafana_url}"
|
||
) &
|
||
print_ok "将自动打开 Grafana: ${grafana_url}"
|
||
}
|
||
|
||
schedule_eval_web_browser() {
|
||
if [[ "${OPEN_EVAL_WEB}" != "1" ]] || [[ "${EVAL_WEB_BROWSER_SCHEDULED:-0}" == "1" ]]; then
|
||
return 0
|
||
fi
|
||
EVAL_WEB_BROWSER_SCHEDULED=1
|
||
local eval_url="http://127.0.0.1:${EVAL_WEB_PORT}/"
|
||
(
|
||
local i=0
|
||
while (( i < 30 )); do
|
||
if is_port_listening "${EVAL_WEB_PORT}"; then
|
||
break
|
||
fi
|
||
sleep 1
|
||
i=$((i + 1))
|
||
done
|
||
open_browser_url "${eval_url}"
|
||
) &
|
||
print_ok "将自动打开评测 Web (Chrome): ${eval_url}"
|
||
}
|
||
|
||
is_pid_alive() {
|
||
local pid="$1"
|
||
[[ -n "${pid}" ]] && kill -0 "${pid}" 2>/dev/null
|
||
}
|
||
|
||
wait_pid_exit() {
|
||
local pid="$1"
|
||
local timeout="$2"
|
||
local waited=0
|
||
|
||
while is_pid_alive "${pid}"; do
|
||
if (( waited >= timeout )); then
|
||
return 1
|
||
fi
|
||
sleep 1
|
||
waited=$((waited + 1))
|
||
done
|
||
return 0
|
||
}
|
||
|
||
kill_children_term() {
|
||
local pid="$1"
|
||
local children
|
||
|
||
children="$(pgrep -P "${pid}" 2>/dev/null || true)"
|
||
if [[ -n "${children}" ]]; then
|
||
# 先递归处理子进程,避免 reloader/server 残留
|
||
while IFS= read -r child_pid; do
|
||
[[ -z "${child_pid}" ]] && continue
|
||
kill_children_term "${child_pid}"
|
||
kill -TERM "${child_pid}" 2>/dev/null || true
|
||
done <<< "${children}"
|
||
fi
|
||
}
|
||
|
||
stop_process_gracefully() {
|
||
local name="$1"
|
||
local pid="$2"
|
||
local timeout="${3:-10}"
|
||
|
||
if ! is_pid_alive "${pid}"; then
|
||
print_ok "${name} 已退出"
|
||
return 0
|
||
fi
|
||
|
||
print_warn "正在停止 ${name}(PID: ${pid})..."
|
||
kill_children_term "${pid}"
|
||
kill -TERM "${pid}" 2>/dev/null || true
|
||
|
||
if wait_pid_exit "${pid}" "${timeout}"; then
|
||
print_ok "${name} 已停止"
|
||
return 0
|
||
fi
|
||
|
||
print_warn "${name} 在 ${timeout}s 内未退出,准备强制结束"
|
||
kill -KILL "${pid}" 2>/dev/null || true
|
||
wait_pid_exit "${pid}" 3 || true
|
||
print_ok "${name} 已强制结束"
|
||
}
|
||
|
||
cleanup() {
|
||
if [[ "${CLEANED_UP}" == "1" ]]; then
|
||
return 0
|
||
fi
|
||
CLEANED_UP=1
|
||
|
||
print_header "正在关闭开发环境"
|
||
|
||
if is_pid_alive "${EVAL_WEB_PID}"; then
|
||
stop_process_gracefully "eval-web (Vite)" "${EVAL_WEB_PID}" "${SHUTDOWN_TIMEOUT}"
|
||
fi
|
||
|
||
if is_pid_alive "${INTERNAL_EVAL_PID}"; then
|
||
stop_process_gracefully "Internal Eval API (:${INTERNAL_EVAL_PORT})" "${INTERNAL_EVAL_PID}" "${SHUTDOWN_TIMEOUT}"
|
||
fi
|
||
|
||
if is_pid_alive "${API_PID}"; then
|
||
stop_process_gracefully "FastAPI" "${API_PID}" "${SHUTDOWN_TIMEOUT}"
|
||
fi
|
||
|
||
if is_pid_alive "${CELERY_PID}"; then
|
||
stop_process_gracefully "Celery" "${CELERY_PID}" "${SHUTDOWN_TIMEOUT}"
|
||
fi
|
||
|
||
if [[ "${INFRA_STARTED}" == "1" ]]; then
|
||
print_warn "正在停止 Docker 基础设施..."
|
||
docker_compose_cmd stop >/dev/null 2>&1 || true
|
||
print_ok "Docker 容器已停止"
|
||
fi
|
||
}
|
||
|
||
require_cmd() {
|
||
local cmd="$1"
|
||
if ! command -v "${cmd}" >/dev/null 2>&1; then
|
||
print_err "未找到命令: ${cmd}"
|
||
exit 1
|
||
fi
|
||
}
|
||
|
||
read_env_bool() {
|
||
local key="$1"
|
||
local default="${2:-0}"
|
||
local line val
|
||
|
||
if [[ -n "${!key:-}" ]]; then
|
||
val="${!key}"
|
||
case "${val}" in
|
||
1 | true | TRUE | yes | YES | on | ON) return 0 ;;
|
||
*) return 1 ;;
|
||
esac
|
||
fi
|
||
|
||
if [[ ! -f "${ROOT_DIR}/.env" ]]; then
|
||
[[ "${default}" == "1" ]]
|
||
return
|
||
fi
|
||
|
||
line="$(grep -E "^${key}=" "${ROOT_DIR}/.env" | tail -1 | cut -d= -f2- | tr -d '\r' | sed 's/^"//;s/"$//')"
|
||
case "${line}" in
|
||
1 | true | TRUE | yes | YES | on | ON) return 0 ;;
|
||
*) [[ "${default}" == "1" ]] ;;
|
||
esac
|
||
}
|
||
|
||
should_start_observability() {
|
||
case "${START_OBSERVABILITY}" in
|
||
0 | false | FALSE | no | NO | off | OFF) return 1 ;;
|
||
1 | true | TRUE | yes | YES | on | ON) return 0 ;;
|
||
esac
|
||
read_env_bool "OTEL_ENABLED" "0"
|
||
}
|
||
|
||
docker_compose_cmd() {
|
||
# 统一 compose -f,兼容 macOS 自带 bash 3.2(勿用 local -n / local arr=(-f …))
|
||
if should_start_observability; then
|
||
(cd "${ROOT_DIR}" && docker compose \
|
||
-f docker-compose.dev.yml \
|
||
-f docker-compose.observability.yml \
|
||
"$@")
|
||
return
|
||
fi
|
||
if [[ "$1" == "up" ]]; then
|
||
(cd "${ROOT_DIR}" && docker compose -f docker-compose.dev.yml "$@" --remove-orphans)
|
||
else
|
||
(cd "${ROOT_DIR}" && docker compose -f docker-compose.dev.yml "$@")
|
||
fi
|
||
}
|
||
|
||
wait_otel_collector_ready() {
|
||
local retries="${1:-30}"
|
||
local i=0
|
||
while (( i < retries )); do
|
||
if is_port_listening "${OTEL_GRPC_HOST_PORT}"; then
|
||
return 0
|
||
fi
|
||
sleep 1
|
||
i=$((i + 1))
|
||
done
|
||
return 1
|
||
}
|
||
|
||
check_otel_collector_ready() {
|
||
if ! read_env_bool "OTEL_ENABLED" "0"; then
|
||
return 0
|
||
fi
|
||
if is_port_listening "${OTEL_GRPC_HOST_PORT}"; then
|
||
print_ok "OTel Collector 端口已监听 (:${OTEL_GRPC_HOST_PORT})"
|
||
return 0
|
||
fi
|
||
if [[ "${OBSERVABILITY_STARTED}" == "1" ]]; then
|
||
print_warn "等待 OTel Collector 端口 :${OTEL_GRPC_HOST_PORT} …"
|
||
if wait_otel_collector_ready 45; then
|
||
print_ok "OTel Collector 端口已监听 (:${OTEL_GRPC_HOST_PORT})"
|
||
return 0
|
||
fi
|
||
fi
|
||
print_warn "OTEL_ENABLED=true 但 :${OTEL_GRPC_HOST_PORT} 未监听"
|
||
print_warn "请确认本次启动日志中有「启动可观测性栈」;或手动执行:"
|
||
print_warn " docker compose -f docker-compose.dev.yml -f docker-compose.observability.yml up -d"
|
||
print_warn "不需要可观测性时在 .env.development 设 OTEL_ENABLED=false"
|
||
return 1
|
||
}
|
||
|
||
start_infra() {
|
||
if should_start_observability; then
|
||
print_header "启动 PostgreSQL、Redis 与可观测性栈 (OTel / Grafana LGTM)"
|
||
OBSERVABILITY_STARTED=1
|
||
else
|
||
print_header "启动 PostgreSQL 和 Redis"
|
||
fi
|
||
docker_compose_cmd up -d
|
||
INFRA_STARTED=1
|
||
print_ok "PostgreSQL 127.0.0.1:48291,Redis 127.0.0.1:48307(见 docker-compose.dev.yml / .env.example)"
|
||
if [[ "${OBSERVABILITY_STARTED}" == "1" ]]; then
|
||
print_ok "Grafana http://127.0.0.1:${GRAFANA_HOST_PORT} (admin/admin)"
|
||
print_ok "Prometheus http://127.0.0.1:${PROMETHEUS_HOST_PORT}"
|
||
print_ok "OTLP gRPC 127.0.0.1:${OTEL_GRPC_HOST_PORT}(应用读 .env 中 OTEL_*,无需 export)"
|
||
print_ok "详见 docs/observability.md"
|
||
schedule_observability_browser
|
||
fi
|
||
print_ok "基础设施已就绪"
|
||
}
|
||
|
||
# Docker 刚启动时 Postgres 可能尚未接受连接,立即跑 Alembic 会误报失败
|
||
wait_postgres_ready() {
|
||
local retries=30
|
||
local i=0
|
||
print_header "等待 PostgreSQL 就绪"
|
||
cd "${ROOT_DIR}"
|
||
while (( i < retries )); do
|
||
if docker compose -f docker-compose.dev.yml exec -T postgres \
|
||
pg_isready -U postgres >/dev/null 2>&1; then
|
||
print_ok "PostgreSQL 已就绪"
|
||
return 0
|
||
fi
|
||
sleep 1
|
||
i=$((i + 1))
|
||
done
|
||
print_warn "PostgreSQL 在 ${retries}s 内未就绪,迁移可能失败"
|
||
return 1
|
||
}
|
||
|
||
get_effective_database_url() {
|
||
if [[ -n "${DATABASE_URL:-}" ]]; then
|
||
printf '%s\n' "${DATABASE_URL}"
|
||
return 0
|
||
fi
|
||
|
||
if [[ -f "${ROOT_DIR}/.env" ]]; then
|
||
local line
|
||
line="$(sed -n 's/^DATABASE_URL=//p' "${ROOT_DIR}/.env" | sed -n '1p')"
|
||
line="${line%\"}"
|
||
line="${line#\"}"
|
||
line="${line%\'}"
|
||
line="${line#\'}"
|
||
if [[ -n "${line}" ]]; then
|
||
printf '%s\n' "${line}"
|
||
return 0
|
||
fi
|
||
fi
|
||
|
||
return 1
|
||
}
|
||
|
||
get_effective_redis_url() {
|
||
if [[ -n "${REDIS_URL:-}" ]]; then
|
||
printf '%s\n' "${REDIS_URL}"
|
||
return 0
|
||
fi
|
||
|
||
if [[ -f "${ROOT_DIR}/.env" ]]; then
|
||
local line
|
||
line="$(sed -n 's/^REDIS_URL=//p' "${ROOT_DIR}/.env" | sed -n '1p')"
|
||
line="${line%\"}"
|
||
line="${line#\"}"
|
||
line="${line%\'}"
|
||
line="${line#\'}"
|
||
if [[ -n "${line}" ]]; then
|
||
printf '%s\n' "${line}"
|
||
return 0
|
||
fi
|
||
fi
|
||
|
||
return 1
|
||
}
|
||
|
||
extract_url_port() {
|
||
local url="$1"
|
||
local default_port="$2"
|
||
|
||
if [[ "${url}" =~ :([0-9]+)(/|\?|$) ]]; then
|
||
printf '%s\n' "${BASH_REMATCH[1]}"
|
||
return 0
|
||
fi
|
||
|
||
printf '%s\n' "${default_port}"
|
||
}
|
||
|
||
wait_host_infra_ready() {
|
||
local database_url redis_url pg_port redis_port
|
||
|
||
if ! database_url="$(get_effective_database_url)"; then
|
||
print_warn "无法解析 DATABASE_URL,跳过宿主机 PostgreSQL 端口检查"
|
||
else
|
||
pg_port="$(extract_url_port "${database_url}" "5432")"
|
||
if wait_for_tcp_listener "$$" "${pg_port}" 12; then
|
||
print_ok "宿主机 PostgreSQL 端口已监听 (:${pg_port})"
|
||
else
|
||
print_warn "宿主机 PostgreSQL 端口未监听 (:${pg_port});请检查 .env 与 docker-compose.dev.yml 端口映射"
|
||
fi
|
||
fi
|
||
|
||
if ! redis_url="$(get_effective_redis_url)"; then
|
||
print_warn "无法解析 REDIS_URL,跳过宿主机 Redis 端口检查"
|
||
else
|
||
redis_port="$(extract_url_port "${redis_url}" "6379")"
|
||
if wait_for_tcp_listener "$$" "${redis_port}" 12; then
|
||
print_ok "宿主机 Redis 端口已监听 (:${redis_port})"
|
||
else
|
||
print_warn "宿主机 Redis 端口未监听 (:${redis_port});请检查 .env 与 docker-compose.dev.yml 端口映射"
|
||
fi
|
||
fi
|
||
}
|
||
|
||
warn_database_url_host_pitfall() {
|
||
local database_url
|
||
local host
|
||
|
||
if ! database_url="$(get_effective_database_url)"; then
|
||
return 0
|
||
fi
|
||
|
||
if [[ "${database_url}" =~ @([^:/?#]+) ]]; then
|
||
host="${BASH_REMATCH[1]}"
|
||
case "${host}" in
|
||
postgres|db|postgres-dev|postgresql)
|
||
print_warn "检测到 DATABASE_URL 主机为 ${host};在宿主机执行 Alembic/uvicorn 时通常应使用 localhost"
|
||
;;
|
||
esac
|
||
fi
|
||
}
|
||
|
||
print_alembic_failure_hint() {
|
||
local log_file="$1"
|
||
local log_output
|
||
|
||
log_output="$(sed -n '1,200p' "${log_file}")"
|
||
if [[ "${log_output}" == *'could not translate host name "postgres"'* ]] || [[ "${log_output}" == *"Name or service not known"* ]]; then
|
||
print_warn "看起来 DATABASE_URL 指向了容器内主机名;在宿主机运行时请改用 localhost:48291(见 docker-compose.dev.yml)"
|
||
elif [[ "${log_output}" == *"Connection refused"* ]] || [[ "${log_output}" == *"could not connect to server"* ]]; then
|
||
print_warn "PostgreSQL 连接被拒绝;请确认容器已启动且 DATABASE_URL 与 docker-compose.dev.yml 暴露端口一致"
|
||
elif [[ "${log_output}" == *"password authentication failed"* ]]; then
|
||
print_warn "PostgreSQL 用户名或密码不匹配;请核对 .env.development 中的 DATABASE_URL"
|
||
elif [[ "${log_output}" == *"No such file or directory"* ]] || [[ "${log_output}" == *"can't open file"* ]]; then
|
||
print_warn "Alembic 依赖的文件或工作目录可能不正确;请确认在 api/ 目录运行脚本"
|
||
elif [[ "${log_output}" == *"Can't locate revision"* ]]; then
|
||
print_warn "alembic_version 与当前迁移链不一致(常见于已撤回的 0020_*)"
|
||
print_warn "将自动修复:重启 development.sh,或执行: uv run python scripts/repair_alembic_version_after_withdrawn_0020.py && uv run alembic upgrade head"
|
||
fi
|
||
}
|
||
|
||
is_port_listening() {
|
||
local port="$1"
|
||
|
||
if command -v lsof >/dev/null 2>&1; then
|
||
lsof -nP -iTCP:"${port}" -sTCP:LISTEN >/dev/null 2>&1
|
||
return $?
|
||
fi
|
||
|
||
if [[ -x "${PYTHON_BIN}" ]]; then
|
||
"${PYTHON_BIN}" - "${port}" <<'PY' >/dev/null 2>&1
|
||
import socket
|
||
import sys
|
||
|
||
sock = socket.socket()
|
||
sock.settimeout(0.2)
|
||
try:
|
||
sock.connect(("127.0.0.1", int(sys.argv[1])))
|
||
except OSError:
|
||
raise SystemExit(1)
|
||
finally:
|
||
sock.close()
|
||
raise SystemExit(0)
|
||
PY
|
||
return $?
|
||
fi
|
||
|
||
return 1
|
||
}
|
||
|
||
wait_for_tcp_listener() {
|
||
local pid="$1"
|
||
local port="$2"
|
||
local timeout="${3:-8}"
|
||
local waited=0
|
||
|
||
while (( waited < timeout )); do
|
||
if is_port_listening "${port}"; then
|
||
return 0
|
||
fi
|
||
if ! is_pid_alive "${pid}"; then
|
||
return 1
|
||
fi
|
||
sleep 1
|
||
waited=$((waited + 1))
|
||
done
|
||
|
||
return 2
|
||
}
|
||
|
||
ensure_background_process_alive() {
|
||
local name="$1"
|
||
local pid="$2"
|
||
|
||
sleep 1
|
||
if ! is_pid_alive "${pid}"; then
|
||
print_err "${name} 启动后立即退出,请查看上方日志"
|
||
exit 1
|
||
fi
|
||
}
|
||
|
||
ensure_venv() {
|
||
print_header "检查 Python 虚拟环境"
|
||
|
||
if [[ ! -d "${VENV_DIR}" ]]; then
|
||
print_warn ".venv 不存在,正在创建"
|
||
uv venv "${VENV_DIR}"
|
||
fi
|
||
|
||
if [[ "${SKIP_INSTALL}" != "1" ]]; then
|
||
print_header "安装 Python 依赖"
|
||
uv sync
|
||
print_ok "依赖安装完成"
|
||
else
|
||
print_warn "已跳过依赖安装 (SKIP_INSTALL=1)"
|
||
fi
|
||
}
|
||
|
||
# 本地约定:以 .env.development 为真源;每次一键启动都从 .env.development 覆盖 .env,供 pydantic Settings(env_file=".env") 读取。
|
||
# 请勿仅在 .env 里改密钥而不同步回 .env.development,否则下次启动会被覆盖。
|
||
ensure_dotenv_from_development() {
|
||
print_header "准备本地 .env"
|
||
if [[ -f "${ROOT_DIR}/.env.development" ]]; then
|
||
cp "${ROOT_DIR}/.env.development" "${ROOT_DIR}/.env"
|
||
print_ok "已从 .env.development 同步为 .env"
|
||
return 0
|
||
fi
|
||
print_warn "未找到 .env.development,无法自动生成 .env"
|
||
print_warn "请执行: cp api/.env.example api/.env.development 后按说明填写,再运行 ./development.sh"
|
||
}
|
||
|
||
check_env_file() {
|
||
print_header "检查环境变量文件"
|
||
if [[ ! -f "${ROOT_DIR}/.env" ]]; then
|
||
print_warn "未找到 .env,应用可能因缺少配置启动失败"
|
||
else
|
||
print_ok "检测到 .env"
|
||
warn_database_url_host_pitfall
|
||
fi
|
||
}
|
||
|
||
run_migrations() {
|
||
print_header "执行数据库迁移"
|
||
cd "${ROOT_DIR}"
|
||
local log_file
|
||
log_file="$(mktemp -t life-echo-alembic.XXXXXX.log)"
|
||
|
||
uv run python scripts/repair_alembic_version_after_withdrawn_0020.py >>"${log_file}" 2>&1 || true
|
||
|
||
if uv run alembic upgrade head >"${log_file}" 2>&1; then
|
||
print_ok "Alembic 迁移已就绪"
|
||
rm -f "${log_file}"
|
||
else
|
||
print_warn "Alembic 迁移失败(可能数据库未启动或 DATABASE_URL 未配置),应用启动可能失败"
|
||
print_alembic_failure_hint "${log_file}"
|
||
print_warn "Alembic 输出(最近 40 行):"
|
||
tail -n 40 "${log_file}"
|
||
rm -f "${log_file}"
|
||
fi
|
||
}
|
||
|
||
check_internal_eval_key() {
|
||
print_header "检查内部评测密钥"
|
||
if [[ -f "${ROOT_DIR}/.env" ]] && grep -qE '^INTERNAL_EVAL_API_KEY=.+' "${ROOT_DIR}/.env" 2>/dev/null; then
|
||
print_ok "已在 .env 中配置 INTERNAL_EVAL_API_KEY"
|
||
return 0
|
||
fi
|
||
if [[ -n "${INTERNAL_EVAL_API_KEY:-}" ]]; then
|
||
print_ok "已从环境变量传入 INTERNAL_EVAL_API_KEY"
|
||
return 0
|
||
fi
|
||
print_err "未配置 INTERNAL_EVAL_API_KEY:内部评测接口将返回 503。"
|
||
print_err "请在 api/.env.development(或 .env)中加入一行,例如:"
|
||
print_err " INTERNAL_EVAL_API_KEY=\"your-long-random-secret\""
|
||
exit 1
|
||
}
|
||
|
||
start_eval_web() {
|
||
print_header "启动 app-eval-web (Vite)"
|
||
if [[ ! -d "${EVAL_WEB_DIR}" ]]; then
|
||
print_err "未找到 ${EVAL_WEB_DIR}"
|
||
exit 1
|
||
fi
|
||
if [[ ! -d "${EVAL_WEB_DIR}/node_modules" ]]; then
|
||
print_err "请先执行: cd app-eval-web && npm install"
|
||
exit 1
|
||
fi
|
||
require_cmd "npm"
|
||
|
||
local api_key="${INTERNAL_EVAL_API_KEY:-}"
|
||
if [[ -z "${api_key}" ]] && [[ -f "${ROOT_DIR}/.env" ]]; then
|
||
api_key="$(grep -E '^INTERNAL_EVAL_API_KEY=' "${ROOT_DIR}/.env" | head -1 | cut -d= -f2- | tr -d '\r' | sed 's/^"//;s/"$//')"
|
||
fi
|
||
if [[ -z "${api_key}" ]]; then
|
||
print_err "无法解析 INTERNAL_EVAL_API_KEY,无法为 Vite 注入 VITE_EVAL_API_KEY"
|
||
exit 1
|
||
fi
|
||
|
||
(
|
||
cd "${EVAL_WEB_DIR}"
|
||
VITE_EVAL_API_KEY="${api_key}" \
|
||
VITE_EVAL_PROXY_TARGET="http://127.0.0.1:${INTERNAL_EVAL_PORT}" \
|
||
npm run dev -- --host 127.0.0.1 --port "${EVAL_WEB_PORT}"
|
||
) &
|
||
EVAL_WEB_PID=$!
|
||
print_ok "eval-web 已启动 (PID: ${EVAL_WEB_PID}) → http://127.0.0.1:${EVAL_WEB_PORT}/"
|
||
schedule_eval_web_browser
|
||
}
|
||
|
||
start_internal_eval_http() {
|
||
check_internal_eval_key
|
||
print_header "启动内部评测 API (internal_main :${INTERNAL_EVAL_PORT})"
|
||
cd "${ROOT_DIR}"
|
||
|
||
if is_port_listening "${INTERNAL_EVAL_PORT}"; then
|
||
print_err "端口 ${INTERNAL_EVAL_PORT} 已被占用,无法启动内部评测 Uvicorn。"
|
||
print_err "请先结束占用进程,或设置 INTERNAL_EVAL_PORT 为其他端口"
|
||
exit 1
|
||
fi
|
||
|
||
OTEL_SERVICE_NAME="${INTERNAL_EVAL_OTEL_SERVICE_NAME:-life-echo-internal-api}" \
|
||
"${UVICORN_BIN}" app.internal_main:internal_app --reload \
|
||
--reload-exclude 'alembic/**' \
|
||
--reload-exclude 'alembic.ini' \
|
||
--host "${INTERNAL_EVAL_HOST}" --port "${INTERNAL_EVAL_PORT}" &
|
||
INTERNAL_EVAL_PID=$!
|
||
local api_start_status=0
|
||
if wait_for_tcp_listener "${INTERNAL_EVAL_PID}" "${INTERNAL_EVAL_PORT}" 8; then
|
||
api_start_status=0
|
||
else
|
||
api_start_status=$?
|
||
fi
|
||
|
||
case "${api_start_status}" in
|
||
0)
|
||
print_ok "Internal Eval API 已启动 (PID: ${INTERNAL_EVAL_PID})"
|
||
;;
|
||
1)
|
||
print_err "Internal Eval API 启动失败,进程已退出;请查看上方 Uvicorn 日志"
|
||
exit 1
|
||
;;
|
||
*)
|
||
print_err "Internal Eval API 进程仍存活,但端口 ${INTERNAL_EVAL_PORT} 未在预期时间内开始监听"
|
||
exit 1
|
||
;;
|
||
esac
|
||
|
||
if [[ "${START_EVAL_WEB}" == "1" ]]; then
|
||
start_eval_web
|
||
else
|
||
print_warn "已跳过 eval-web (START_EVAL_WEB=0)"
|
||
fi
|
||
|
||
echo
|
||
echo -e "${BLUE}── 内部评测${NC}"
|
||
echo "评测 Web UI: http://127.0.0.1:${EVAL_WEB_PORT}/ (Vite /internal → :${INTERNAL_EVAL_PORT})"
|
||
echo "内部评测 API: http://127.0.0.1:${INTERNAL_EVAL_PORT}/health"
|
||
echo "评测 REST: http://127.0.0.1:${INTERNAL_EVAL_PORT}/internal/api/evaluation"
|
||
if [[ "${INTERNAL_EVAL_ENABLE_DOCS:-}" == "1" ]] || grep -qE '^INTERNAL_EVAL_ENABLE_DOCS=true' "${ROOT_DIR}/.env" 2>/dev/null; then
|
||
echo "内部评测文档: http://127.0.0.1:${INTERNAL_EVAL_PORT}/docs"
|
||
fi
|
||
echo "说明: api/docs/internal-eval.md"
|
||
}
|
||
|
||
start_services() {
|
||
print_header "启动 FastAPI 和 Celery"
|
||
cd "${ROOT_DIR}"
|
||
|
||
local skip_main=0
|
||
if [[ "${EVAL_ATTACH_ONLY}" == "1" ]] && is_port_listening "${API_PORT}"; then
|
||
skip_main=1
|
||
print_warn "EVAL_ATTACH_ONLY=1::${API_PORT} 已在监听,跳过本脚本内的主站与 Celery(请确保别处已有 Worker)"
|
||
fi
|
||
|
||
if [[ "${skip_main}" == "1" ]] && [[ "${LIFE_ECHO_WITH_INTERNAL_EVAL}" != "1" ]]; then
|
||
print_err "EVAL_ATTACH_ONLY=1 仅用于在已有主站时附加内部评测;请设置 LIFE_ECHO_WITH_INTERNAL_EVAL=1"
|
||
exit 1
|
||
fi
|
||
|
||
if [[ "${skip_main}" == "0" ]]; then
|
||
if is_port_listening "${API_PORT}"; then
|
||
print_err "端口 ${API_PORT} 已被占用,无法启动新的 Uvicorn。"
|
||
print_err "请先结束占用进程,例如: lsof -nP -iTCP:${API_PORT} -sTCP:LISTEN"
|
||
print_err "若主站已在其他终端由本脚本启动,可改用: EVAL_ATTACH_ONLY=1 ./development.sh"
|
||
exit 1
|
||
fi
|
||
|
||
"${UVICORN_BIN}" main:app --reload \
|
||
--reload-exclude 'alembic/**' \
|
||
--reload-exclude 'alembic.ini' \
|
||
--host "${API_HOST}" --port "${API_PORT}" &
|
||
API_PID=$!
|
||
local api_start_status=0
|
||
if wait_for_tcp_listener "${API_PID}" "${API_PORT}" 8; then
|
||
api_start_status=0
|
||
else
|
||
api_start_status=$?
|
||
fi
|
||
|
||
case "${api_start_status}" in
|
||
0)
|
||
print_ok "FastAPI 已启动 (PID: ${API_PID})"
|
||
;;
|
||
1)
|
||
print_err "FastAPI 启动失败,进程已退出;请查看上方 Uvicorn 日志"
|
||
exit 1
|
||
;;
|
||
*)
|
||
print_err "FastAPI 进程仍存活,但端口 ${API_PORT} 未在预期时间内开始监听"
|
||
exit 1
|
||
;;
|
||
esac
|
||
|
||
"${CELERY_BIN}" -A app.tasks.celery_app worker --loglevel=info --pool="${CELERY_POOL}" -Q celery,memory_idle &
|
||
CELERY_PID=$!
|
||
ensure_background_process_alive "Celery" "${CELERY_PID}"
|
||
print_ok "Celery 已启动 (PID: ${CELERY_PID})"
|
||
fi
|
||
|
||
if [[ "${LIFE_ECHO_WITH_INTERNAL_EVAL}" == "1" ]]; then
|
||
start_internal_eval_http
|
||
fi
|
||
|
||
echo
|
||
echo -e "${GREEN}开发环境启动完成${NC}"
|
||
if [[ -n "${API_PID}" ]]; then
|
||
echo "主站文档: http://localhost:${API_PORT}/docs"
|
||
echo "健康检查: http://localhost:${API_PORT}/health"
|
||
fi
|
||
if [[ "${LIFE_ECHO_WITH_INTERNAL_EVAL}" == "1" ]]; then
|
||
echo "评测 Web UI: http://127.0.0.1:${EVAL_WEB_PORT}/"
|
||
echo "内部评测 API: http://127.0.0.1:${INTERNAL_EVAL_PORT}/health"
|
||
fi
|
||
if read_env_bool "OTEL_ENABLED" "0"; then
|
||
echo "可观测性: Grafana http://127.0.0.1:${GRAFANA_HOST_PORT} | Prometheus http://127.0.0.1:${PROMETHEUS_HOST_PORT}"
|
||
if is_port_listening "${GRAFANA_HOST_PORT}"; then
|
||
schedule_observability_browser
|
||
fi
|
||
fi
|
||
if [[ "${LIFE_ECHO_WITH_INTERNAL_EVAL}" == "1" ]] && is_pid_alive "${EVAL_WEB_PID}"; then
|
||
schedule_eval_web_browser
|
||
fi
|
||
echo "按 Ctrl+C 停止所有进程"
|
||
}
|
||
|
||
main() {
|
||
if [[ "${LIFE_ECHO_WITH_INTERNAL_EVAL}" == "1" ]]; then
|
||
print_header "Life Echo 开发环境(主站 + 内部评测 + 可观测性)"
|
||
else
|
||
print_header "Life Echo 开发环境一键启动(无内部评测)"
|
||
fi
|
||
|
||
require_cmd "uv"
|
||
if [[ "${SKIP_INFRA}" != "1" ]]; then
|
||
require_cmd "docker"
|
||
fi
|
||
|
||
trap cleanup EXIT INT TERM
|
||
|
||
ensure_venv
|
||
# 必须在 start_infra 之前同步,否则 should_start_observability 读不到 .env.development 里的 OTEL_ENABLED
|
||
ensure_dotenv_from_development
|
||
|
||
if [[ "${SKIP_INFRA}" != "1" ]]; then
|
||
start_infra
|
||
wait_postgres_ready || true
|
||
else
|
||
print_warn "已跳过 docker 基础设施 (SKIP_INFRA=1)"
|
||
if should_start_observability; then
|
||
print_warn "SKIP_INFRA=1 未自动启动 observability;若需 LGTM 请手动 docker compose up observability overlay"
|
||
fi
|
||
fi
|
||
|
||
check_env_file
|
||
check_otel_collector_ready || true
|
||
wait_host_infra_ready
|
||
run_migrations
|
||
start_services
|
||
|
||
local wait_pids=()
|
||
[[ -n "${API_PID}" ]] && wait_pids+=("${API_PID}")
|
||
[[ -n "${CELERY_PID}" ]] && wait_pids+=("${CELERY_PID}")
|
||
[[ -n "${INTERNAL_EVAL_PID}" ]] && wait_pids+=("${INTERNAL_EVAL_PID}")
|
||
[[ -n "${EVAL_WEB_PID}" ]] && wait_pids+=("${EVAL_WEB_PID}")
|
||
if (( ${#wait_pids[@]} == 0 )); then
|
||
print_err "没有可等待的进程,退出"
|
||
exit 1
|
||
fi
|
||
wait "${wait_pids[@]}"
|
||
}
|
||
|
||
main "$@"
|