Files
life-echo/api/development.sh
Sully f09ae248f9 feat: OpenTelemetry LGTM observability, dev tooling, and memoir UX fixes (#31) (#32)
* add staging ios app build script

* feat(api): add OpenTelemetry LGTM stack for local observability

Wire OTel traces, metrics, and logs through a collector to Tempo,
Prometheus, and Loki, with custom LLM instrumentation, dev compose overlay,
Grafana provisioning, env templates, and development.sh auto-start.



* feat: expand observability, harden dev tooling, and fix expo staging UX

Add business and LLM Prometheus metrics with Grafana dashboards, alerting,
and a metrics verification script. Wire telemetry through adapters and core
LLM paths, and document the local LGTM workflow.

Fix development.sh for macOS bash 3.2, open Grafana and eval-web in Chrome,
and repair eval-web auto-open (unbound EVAL_WEB_BROWSER_SCHEDULED). Merge
internal-eval into the main dev script with improved compose handling.

Require EXPO_PUBLIC_* at build time, improve iOS HTTP ATS for staging IPs,
show memoir empty state instead of load errors when no chapters exist, and
add jest env setup plus chapter list response normalization.



* chore: enable Grafana Assistant Cursor plugin



* fix: memoir empty state and repair withdrawn 0020_chapters_book_id stamp

Show empty memoir UI when the chapter list succeeds with no items; treat auth/404 as non-fatal. Extend alembic revision repair so local dev DBs stamped with the removed 0020_chapters_book_id migration can roll back and upgrade to 0019.



---------

Co-authored-by: Kevin <kevin@brighteng.org>
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-20 15:14:13 +08:00

827 lines
24 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env bash
set -euo pipefail
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${ROOT_DIR}/.." && pwd)"
EVAL_WEB_DIR="${REPO_ROOT}/app-eval-web"
VENV_DIR="${ROOT_DIR}/.venv"
PYTHON_BIN="${VENV_DIR}/bin/python"
UVICORN_BIN="${VENV_DIR}/bin/uvicorn"
CELERY_BIN="${VENV_DIR}/bin/celery"
# 本地全栈:默认可在 API 启动时 purge Celery 队列;生产请勿使用此脚本
export APP_ENV="${APP_ENV:-development}"
export CELERY_PURGE_BROKER_ON_STARTUP="${CELERY_PURGE_BROKER_ON_STARTUP:-1}"
API_HOST="${API_HOST:-0.0.0.0}"
API_PORT="${API_PORT:-8000}"
CELERY_POOL="${CELERY_POOL:-solo}"
SKIP_INSTALL="${SKIP_INSTALL:-0}"
SKIP_INFRA="${SKIP_INFRA:-0}"
# 可观测性:空=若 .env 中 OTEL_ENABLED=true 则启动 compose0=不启1=强制启动
START_OBSERVABILITY="${START_OBSERVABILITY:-}"
SHUTDOWN_TIMEOUT="${SHUTDOWN_TIMEOUT:-12}"
# 与 docker-compose.observability.yml / .env.example 默认宿主机端口一致
OTEL_GRPC_HOST_PORT="${OTEL_GRPC_HOST_PORT:-48317}"
GRAFANA_HOST_PORT="${GRAFANA_HOST_PORT:-48300}"
PROMETHEUS_HOST_PORT="${PROMETHEUS_HOST_PORT:-49090}"
# 默认一并启动 internal_main + app-eval-web设 0 可仅主站)
LIFE_ECHO_WITH_INTERNAL_EVAL="${LIFE_ECHO_WITH_INTERNAL_EVAL:-1}"
# 自动用 Google Chrome 打开 Grafana / 评测 Web勿用 Vite --open避免落到 Safari
OPEN_OBSERVABILITY_UI="${OPEN_OBSERVABILITY_UI:-1}"
# 若 :8000 已由其他 development 实例占用,仅附加 :8001 + 前端(需自备同一份 Celery/主站)
EVAL_ATTACH_ONLY="${EVAL_ATTACH_ONLY:-0}"
INTERNAL_EVAL_HOST="${INTERNAL_EVAL_HOST:-0.0.0.0}"
INTERNAL_EVAL_PORT="${INTERNAL_EVAL_PORT:-7999}"
START_EVAL_WEB="${START_EVAL_WEB:-1}"
OPEN_EVAL_WEB="${OPEN_EVAL_WEB:-1}"
EVAL_WEB_PORT="${EVAL_WEB_PORT:-5174}"
API_PID=""
CELERY_PID=""
INTERNAL_EVAL_PID=""
EVAL_WEB_PID=""
CLEANED_UP=0
INFRA_STARTED=0
OBSERVABILITY_STARTED=0
OBSERVABILITY_BROWSER_SCHEDULED=0
EVAL_WEB_BROWSER_SCHEDULED=0
print_header() {
echo -e "\n${BLUE}========================================${NC}"
echo -e "${BLUE}$1${NC}"
echo -e "${BLUE}========================================${NC}"
}
print_ok() {
echo -e "${GREEN}$1${NC}"
}
print_warn() {
echo -e "${YELLOW}$1${NC}"
}
print_err() {
echo -e "${RED}$1${NC}"
}
open_browser_url() {
local url="$1"
if command -v open >/dev/null 2>&1 && [[ "$(uname -s)" == "Darwin" ]]; then
if open -a "Google Chrome" "${url}" >/dev/null 2>&1; then
return 0
fi
print_warn "未找到 Google Chrome请手动打开: ${url}"
return 1
fi
if command -v google-chrome >/dev/null 2>&1; then
google-chrome "${url}" >/dev/null 2>&1 &
return 0
fi
if command -v chromium-browser >/dev/null 2>&1; then
chromium-browser "${url}" >/dev/null 2>&1 &
return 0
fi
if command -v chromium >/dev/null 2>&1; then
chromium "${url}" >/dev/null 2>&1 &
return 0
fi
print_warn "未找到 Chrome/Chromium请手动打开: ${url}"
return 1
}
schedule_observability_browser() {
if [[ "${OPEN_OBSERVABILITY_UI}" != "1" ]] || [[ "${OBSERVABILITY_BROWSER_SCHEDULED}" == "1" ]]; then
return 0
fi
OBSERVABILITY_BROWSER_SCHEDULED=1
local grafana_url="http://127.0.0.1:${GRAFANA_HOST_PORT}"
(
sleep 4
open_browser_url "${grafana_url}"
) &
print_ok "将自动打开 Grafana: ${grafana_url}"
}
schedule_eval_web_browser() {
if [[ "${OPEN_EVAL_WEB}" != "1" ]] || [[ "${EVAL_WEB_BROWSER_SCHEDULED:-0}" == "1" ]]; then
return 0
fi
EVAL_WEB_BROWSER_SCHEDULED=1
local eval_url="http://127.0.0.1:${EVAL_WEB_PORT}/"
(
local i=0
while (( i < 30 )); do
if is_port_listening "${EVAL_WEB_PORT}"; then
break
fi
sleep 1
i=$((i + 1))
done
open_browser_url "${eval_url}"
) &
print_ok "将自动打开评测 Web (Chrome): ${eval_url}"
}
is_pid_alive() {
local pid="$1"
[[ -n "${pid}" ]] && kill -0 "${pid}" 2>/dev/null
}
wait_pid_exit() {
local pid="$1"
local timeout="$2"
local waited=0
while is_pid_alive "${pid}"; do
if (( waited >= timeout )); then
return 1
fi
sleep 1
waited=$((waited + 1))
done
return 0
}
kill_children_term() {
local pid="$1"
local children
children="$(pgrep -P "${pid}" 2>/dev/null || true)"
if [[ -n "${children}" ]]; then
# 先递归处理子进程,避免 reloader/server 残留
while IFS= read -r child_pid; do
[[ -z "${child_pid}" ]] && continue
kill_children_term "${child_pid}"
kill -TERM "${child_pid}" 2>/dev/null || true
done <<< "${children}"
fi
}
stop_process_gracefully() {
local name="$1"
local pid="$2"
local timeout="${3:-10}"
if ! is_pid_alive "${pid}"; then
print_ok "${name} 已退出"
return 0
fi
print_warn "正在停止 ${name}PID: ${pid}..."
kill_children_term "${pid}"
kill -TERM "${pid}" 2>/dev/null || true
if wait_pid_exit "${pid}" "${timeout}"; then
print_ok "${name} 已停止"
return 0
fi
print_warn "${name}${timeout}s 内未退出,准备强制结束"
kill -KILL "${pid}" 2>/dev/null || true
wait_pid_exit "${pid}" 3 || true
print_ok "${name} 已强制结束"
}
cleanup() {
if [[ "${CLEANED_UP}" == "1" ]]; then
return 0
fi
CLEANED_UP=1
print_header "正在关闭开发环境"
if is_pid_alive "${EVAL_WEB_PID}"; then
stop_process_gracefully "eval-web (Vite)" "${EVAL_WEB_PID}" "${SHUTDOWN_TIMEOUT}"
fi
if is_pid_alive "${INTERNAL_EVAL_PID}"; then
stop_process_gracefully "Internal Eval API (:${INTERNAL_EVAL_PORT})" "${INTERNAL_EVAL_PID}" "${SHUTDOWN_TIMEOUT}"
fi
if is_pid_alive "${API_PID}"; then
stop_process_gracefully "FastAPI" "${API_PID}" "${SHUTDOWN_TIMEOUT}"
fi
if is_pid_alive "${CELERY_PID}"; then
stop_process_gracefully "Celery" "${CELERY_PID}" "${SHUTDOWN_TIMEOUT}"
fi
if [[ "${INFRA_STARTED}" == "1" ]]; then
print_warn "正在停止 Docker 基础设施..."
docker_compose_cmd stop >/dev/null 2>&1 || true
print_ok "Docker 容器已停止"
fi
}
require_cmd() {
local cmd="$1"
if ! command -v "${cmd}" >/dev/null 2>&1; then
print_err "未找到命令: ${cmd}"
exit 1
fi
}
read_env_bool() {
local key="$1"
local default="${2:-0}"
local line val
if [[ -n "${!key:-}" ]]; then
val="${!key}"
case "${val}" in
1 | true | TRUE | yes | YES | on | ON) return 0 ;;
*) return 1 ;;
esac
fi
if [[ ! -f "${ROOT_DIR}/.env" ]]; then
[[ "${default}" == "1" ]]
return
fi
line="$(grep -E "^${key}=" "${ROOT_DIR}/.env" | tail -1 | cut -d= -f2- | tr -d '\r' | sed 's/^"//;s/"$//')"
case "${line}" in
1 | true | TRUE | yes | YES | on | ON) return 0 ;;
*) [[ "${default}" == "1" ]] ;;
esac
}
should_start_observability() {
case "${START_OBSERVABILITY}" in
0 | false | FALSE | no | NO | off | OFF) return 1 ;;
1 | true | TRUE | yes | YES | on | ON) return 0 ;;
esac
read_env_bool "OTEL_ENABLED" "0"
}
docker_compose_cmd() {
# 统一 compose -f兼容 macOS 自带 bash 3.2(勿用 local -n / local arr=(-f …)
if should_start_observability; then
(cd "${ROOT_DIR}" && docker compose \
-f docker-compose.dev.yml \
-f docker-compose.observability.yml \
"$@")
return
fi
if [[ "$1" == "up" ]]; then
(cd "${ROOT_DIR}" && docker compose -f docker-compose.dev.yml "$@" --remove-orphans)
else
(cd "${ROOT_DIR}" && docker compose -f docker-compose.dev.yml "$@")
fi
}
wait_otel_collector_ready() {
local retries="${1:-30}"
local i=0
while (( i < retries )); do
if is_port_listening "${OTEL_GRPC_HOST_PORT}"; then
return 0
fi
sleep 1
i=$((i + 1))
done
return 1
}
check_otel_collector_ready() {
if ! read_env_bool "OTEL_ENABLED" "0"; then
return 0
fi
if is_port_listening "${OTEL_GRPC_HOST_PORT}"; then
print_ok "OTel Collector 端口已监听 (:${OTEL_GRPC_HOST_PORT})"
return 0
fi
if [[ "${OBSERVABILITY_STARTED}" == "1" ]]; then
print_warn "等待 OTel Collector 端口 :${OTEL_GRPC_HOST_PORT}"
if wait_otel_collector_ready 45; then
print_ok "OTel Collector 端口已监听 (:${OTEL_GRPC_HOST_PORT})"
return 0
fi
fi
print_warn "OTEL_ENABLED=true 但 :${OTEL_GRPC_HOST_PORT} 未监听"
print_warn "请确认本次启动日志中有「启动可观测性栈」;或手动执行:"
print_warn " docker compose -f docker-compose.dev.yml -f docker-compose.observability.yml up -d"
print_warn "不需要可观测性时在 .env.development 设 OTEL_ENABLED=false"
return 1
}
start_infra() {
if should_start_observability; then
print_header "启动 PostgreSQL、Redis 与可观测性栈 (OTel / Grafana LGTM)"
OBSERVABILITY_STARTED=1
else
print_header "启动 PostgreSQL 和 Redis"
fi
docker_compose_cmd up -d
INFRA_STARTED=1
print_ok "PostgreSQL 127.0.0.1:48291Redis 127.0.0.1:48307见 docker-compose.dev.yml / .env.example"
if [[ "${OBSERVABILITY_STARTED}" == "1" ]]; then
print_ok "Grafana http://127.0.0.1:${GRAFANA_HOST_PORT} admin/admin"
print_ok "Prometheus http://127.0.0.1:${PROMETHEUS_HOST_PORT}"
print_ok "OTLP gRPC 127.0.0.1:${OTEL_GRPC_HOST_PORT}(应用读 .env 中 OTEL_*,无需 export"
print_ok "详见 docs/observability.md"
schedule_observability_browser
fi
print_ok "基础设施已就绪"
}
# Docker 刚启动时 Postgres 可能尚未接受连接,立即跑 Alembic 会误报失败
wait_postgres_ready() {
local retries=30
local i=0
print_header "等待 PostgreSQL 就绪"
cd "${ROOT_DIR}"
while (( i < retries )); do
if docker compose -f docker-compose.dev.yml exec -T postgres \
pg_isready -U postgres >/dev/null 2>&1; then
print_ok "PostgreSQL 已就绪"
return 0
fi
sleep 1
i=$((i + 1))
done
print_warn "PostgreSQL 在 ${retries}s 内未就绪,迁移可能失败"
return 1
}
get_effective_database_url() {
if [[ -n "${DATABASE_URL:-}" ]]; then
printf '%s\n' "${DATABASE_URL}"
return 0
fi
if [[ -f "${ROOT_DIR}/.env" ]]; then
local line
line="$(sed -n 's/^DATABASE_URL=//p' "${ROOT_DIR}/.env" | sed -n '1p')"
line="${line%\"}"
line="${line#\"}"
line="${line%\'}"
line="${line#\'}"
if [[ -n "${line}" ]]; then
printf '%s\n' "${line}"
return 0
fi
fi
return 1
}
get_effective_redis_url() {
if [[ -n "${REDIS_URL:-}" ]]; then
printf '%s\n' "${REDIS_URL}"
return 0
fi
if [[ -f "${ROOT_DIR}/.env" ]]; then
local line
line="$(sed -n 's/^REDIS_URL=//p' "${ROOT_DIR}/.env" | sed -n '1p')"
line="${line%\"}"
line="${line#\"}"
line="${line%\'}"
line="${line#\'}"
if [[ -n "${line}" ]]; then
printf '%s\n' "${line}"
return 0
fi
fi
return 1
}
extract_url_port() {
local url="$1"
local default_port="$2"
if [[ "${url}" =~ :([0-9]+)(/|\?|$) ]]; then
printf '%s\n' "${BASH_REMATCH[1]}"
return 0
fi
printf '%s\n' "${default_port}"
}
wait_host_infra_ready() {
local database_url redis_url pg_port redis_port
if ! database_url="$(get_effective_database_url)"; then
print_warn "无法解析 DATABASE_URL跳过宿主机 PostgreSQL 端口检查"
else
pg_port="$(extract_url_port "${database_url}" "5432")"
if wait_for_tcp_listener "$$" "${pg_port}" 12; then
print_ok "宿主机 PostgreSQL 端口已监听 (:${pg_port})"
else
print_warn "宿主机 PostgreSQL 端口未监听 (:${pg_port});请检查 .env 与 docker-compose.dev.yml 端口映射"
fi
fi
if ! redis_url="$(get_effective_redis_url)"; then
print_warn "无法解析 REDIS_URL跳过宿主机 Redis 端口检查"
else
redis_port="$(extract_url_port "${redis_url}" "6379")"
if wait_for_tcp_listener "$$" "${redis_port}" 12; then
print_ok "宿主机 Redis 端口已监听 (:${redis_port})"
else
print_warn "宿主机 Redis 端口未监听 (:${redis_port});请检查 .env 与 docker-compose.dev.yml 端口映射"
fi
fi
}
warn_database_url_host_pitfall() {
local database_url
local host
if ! database_url="$(get_effective_database_url)"; then
return 0
fi
if [[ "${database_url}" =~ @([^:/?#]+) ]]; then
host="${BASH_REMATCH[1]}"
case "${host}" in
postgres|db|postgres-dev|postgresql)
print_warn "检测到 DATABASE_URL 主机为 ${host};在宿主机执行 Alembic/uvicorn 时通常应使用 localhost"
;;
esac
fi
}
print_alembic_failure_hint() {
local log_file="$1"
local log_output
log_output="$(sed -n '1,200p' "${log_file}")"
if [[ "${log_output}" == *'could not translate host name "postgres"'* ]] || [[ "${log_output}" == *"Name or service not known"* ]]; then
print_warn "看起来 DATABASE_URL 指向了容器内主机名;在宿主机运行时请改用 localhost:48291见 docker-compose.dev.yml"
elif [[ "${log_output}" == *"Connection refused"* ]] || [[ "${log_output}" == *"could not connect to server"* ]]; then
print_warn "PostgreSQL 连接被拒绝;请确认容器已启动且 DATABASE_URL 与 docker-compose.dev.yml 暴露端口一致"
elif [[ "${log_output}" == *"password authentication failed"* ]]; then
print_warn "PostgreSQL 用户名或密码不匹配;请核对 .env.development 中的 DATABASE_URL"
elif [[ "${log_output}" == *"No such file or directory"* ]] || [[ "${log_output}" == *"can't open file"* ]]; then
print_warn "Alembic 依赖的文件或工作目录可能不正确;请确认在 api/ 目录运行脚本"
elif [[ "${log_output}" == *"Can't locate revision"* ]]; then
print_warn "alembic_version 与当前迁移链不一致(常见于已撤回的 0020_*"
print_warn "将自动修复:重启 development.sh或执行: uv run python scripts/repair_alembic_version_after_withdrawn_0020.py && uv run alembic upgrade head"
fi
}
is_port_listening() {
local port="$1"
if command -v lsof >/dev/null 2>&1; then
lsof -nP -iTCP:"${port}" -sTCP:LISTEN >/dev/null 2>&1
return $?
fi
if [[ -x "${PYTHON_BIN}" ]]; then
"${PYTHON_BIN}" - "${port}" <<'PY' >/dev/null 2>&1
import socket
import sys
sock = socket.socket()
sock.settimeout(0.2)
try:
sock.connect(("127.0.0.1", int(sys.argv[1])))
except OSError:
raise SystemExit(1)
finally:
sock.close()
raise SystemExit(0)
PY
return $?
fi
return 1
}
wait_for_tcp_listener() {
local pid="$1"
local port="$2"
local timeout="${3:-8}"
local waited=0
while (( waited < timeout )); do
if is_port_listening "${port}"; then
return 0
fi
if ! is_pid_alive "${pid}"; then
return 1
fi
sleep 1
waited=$((waited + 1))
done
return 2
}
ensure_background_process_alive() {
local name="$1"
local pid="$2"
sleep 1
if ! is_pid_alive "${pid}"; then
print_err "${name} 启动后立即退出,请查看上方日志"
exit 1
fi
}
ensure_venv() {
print_header "检查 Python 虚拟环境"
if [[ ! -d "${VENV_DIR}" ]]; then
print_warn ".venv 不存在,正在创建"
uv venv "${VENV_DIR}"
fi
if [[ "${SKIP_INSTALL}" != "1" ]]; then
print_header "安装 Python 依赖"
uv sync
print_ok "依赖安装完成"
else
print_warn "已跳过依赖安装 (SKIP_INSTALL=1)"
fi
}
# 本地约定:以 .env.development 为真源;每次一键启动都从 .env.development 覆盖 .env供 pydantic Settings(env_file=".env") 读取。
# 请勿仅在 .env 里改密钥而不同步回 .env.development否则下次启动会被覆盖。
ensure_dotenv_from_development() {
print_header "准备本地 .env"
if [[ -f "${ROOT_DIR}/.env.development" ]]; then
cp "${ROOT_DIR}/.env.development" "${ROOT_DIR}/.env"
print_ok "已从 .env.development 同步为 .env"
return 0
fi
print_warn "未找到 .env.development无法自动生成 .env"
print_warn "请执行: cp api/.env.example api/.env.development 后按说明填写,再运行 ./development.sh"
}
check_env_file() {
print_header "检查环境变量文件"
if [[ ! -f "${ROOT_DIR}/.env" ]]; then
print_warn "未找到 .env应用可能因缺少配置启动失败"
else
print_ok "检测到 .env"
warn_database_url_host_pitfall
fi
}
run_migrations() {
print_header "执行数据库迁移"
cd "${ROOT_DIR}"
local log_file
log_file="$(mktemp -t life-echo-alembic.XXXXXX.log)"
uv run python scripts/repair_alembic_version_after_withdrawn_0020.py >>"${log_file}" 2>&1 || true
if uv run alembic upgrade head >"${log_file}" 2>&1; then
print_ok "Alembic 迁移已就绪"
rm -f "${log_file}"
else
print_warn "Alembic 迁移失败(可能数据库未启动或 DATABASE_URL 未配置),应用启动可能失败"
print_alembic_failure_hint "${log_file}"
print_warn "Alembic 输出(最近 40 行):"
tail -n 40 "${log_file}"
rm -f "${log_file}"
fi
}
check_internal_eval_key() {
print_header "检查内部评测密钥"
if [[ -f "${ROOT_DIR}/.env" ]] && grep -qE '^INTERNAL_EVAL_API_KEY=.+' "${ROOT_DIR}/.env" 2>/dev/null; then
print_ok "已在 .env 中配置 INTERNAL_EVAL_API_KEY"
return 0
fi
if [[ -n "${INTERNAL_EVAL_API_KEY:-}" ]]; then
print_ok "已从环境变量传入 INTERNAL_EVAL_API_KEY"
return 0
fi
print_err "未配置 INTERNAL_EVAL_API_KEY内部评测接口将返回 503。"
print_err "请在 api/.env.development或 .env中加入一行例如"
print_err " INTERNAL_EVAL_API_KEY=\"your-long-random-secret\""
exit 1
}
start_eval_web() {
print_header "启动 app-eval-web (Vite)"
if [[ ! -d "${EVAL_WEB_DIR}" ]]; then
print_err "未找到 ${EVAL_WEB_DIR}"
exit 1
fi
if [[ ! -d "${EVAL_WEB_DIR}/node_modules" ]]; then
print_err "请先执行: cd app-eval-web && npm install"
exit 1
fi
require_cmd "npm"
local api_key="${INTERNAL_EVAL_API_KEY:-}"
if [[ -z "${api_key}" ]] && [[ -f "${ROOT_DIR}/.env" ]]; then
api_key="$(grep -E '^INTERNAL_EVAL_API_KEY=' "${ROOT_DIR}/.env" | head -1 | cut -d= -f2- | tr -d '\r' | sed 's/^"//;s/"$//')"
fi
if [[ -z "${api_key}" ]]; then
print_err "无法解析 INTERNAL_EVAL_API_KEY无法为 Vite 注入 VITE_EVAL_API_KEY"
exit 1
fi
(
cd "${EVAL_WEB_DIR}"
VITE_EVAL_API_KEY="${api_key}" \
VITE_EVAL_PROXY_TARGET="http://127.0.0.1:${INTERNAL_EVAL_PORT}" \
npm run dev -- --host 127.0.0.1 --port "${EVAL_WEB_PORT}"
) &
EVAL_WEB_PID=$!
print_ok "eval-web 已启动 (PID: ${EVAL_WEB_PID}) → http://127.0.0.1:${EVAL_WEB_PORT}/"
schedule_eval_web_browser
}
start_internal_eval_http() {
check_internal_eval_key
print_header "启动内部评测 API (internal_main :${INTERNAL_EVAL_PORT})"
cd "${ROOT_DIR}"
if is_port_listening "${INTERNAL_EVAL_PORT}"; then
print_err "端口 ${INTERNAL_EVAL_PORT} 已被占用,无法启动内部评测 Uvicorn。"
print_err "请先结束占用进程,或设置 INTERNAL_EVAL_PORT 为其他端口"
exit 1
fi
OTEL_SERVICE_NAME="${INTERNAL_EVAL_OTEL_SERVICE_NAME:-life-echo-internal-api}" \
"${UVICORN_BIN}" app.internal_main:internal_app --reload \
--reload-exclude 'alembic/**' \
--reload-exclude 'alembic.ini' \
--host "${INTERNAL_EVAL_HOST}" --port "${INTERNAL_EVAL_PORT}" &
INTERNAL_EVAL_PID=$!
local api_start_status=0
if wait_for_tcp_listener "${INTERNAL_EVAL_PID}" "${INTERNAL_EVAL_PORT}" 8; then
api_start_status=0
else
api_start_status=$?
fi
case "${api_start_status}" in
0)
print_ok "Internal Eval API 已启动 (PID: ${INTERNAL_EVAL_PID})"
;;
1)
print_err "Internal Eval API 启动失败,进程已退出;请查看上方 Uvicorn 日志"
exit 1
;;
*)
print_err "Internal Eval API 进程仍存活,但端口 ${INTERNAL_EVAL_PORT} 未在预期时间内开始监听"
exit 1
;;
esac
if [[ "${START_EVAL_WEB}" == "1" ]]; then
start_eval_web
else
print_warn "已跳过 eval-web (START_EVAL_WEB=0)"
fi
echo
echo -e "${BLUE}── 内部评测${NC}"
echo "评测 Web UI: http://127.0.0.1:${EVAL_WEB_PORT}/ Vite /internal → :${INTERNAL_EVAL_PORT}"
echo "内部评测 API: http://127.0.0.1:${INTERNAL_EVAL_PORT}/health"
echo "评测 REST: http://127.0.0.1:${INTERNAL_EVAL_PORT}/internal/api/evaluation"
if [[ "${INTERNAL_EVAL_ENABLE_DOCS:-}" == "1" ]] || grep -qE '^INTERNAL_EVAL_ENABLE_DOCS=true' "${ROOT_DIR}/.env" 2>/dev/null; then
echo "内部评测文档: http://127.0.0.1:${INTERNAL_EVAL_PORT}/docs"
fi
echo "说明: api/docs/internal-eval.md"
}
start_services() {
print_header "启动 FastAPI 和 Celery"
cd "${ROOT_DIR}"
local skip_main=0
if [[ "${EVAL_ATTACH_ONLY}" == "1" ]] && is_port_listening "${API_PORT}"; then
skip_main=1
print_warn "EVAL_ATTACH_ONLY=1:${API_PORT} 已在监听,跳过本脚本内的主站与 Celery请确保别处已有 Worker"
fi
if [[ "${skip_main}" == "1" ]] && [[ "${LIFE_ECHO_WITH_INTERNAL_EVAL}" != "1" ]]; then
print_err "EVAL_ATTACH_ONLY=1 仅用于在已有主站时附加内部评测;请设置 LIFE_ECHO_WITH_INTERNAL_EVAL=1"
exit 1
fi
if [[ "${skip_main}" == "0" ]]; then
if is_port_listening "${API_PORT}"; then
print_err "端口 ${API_PORT} 已被占用,无法启动新的 Uvicorn。"
print_err "请先结束占用进程,例如: lsof -nP -iTCP:${API_PORT} -sTCP:LISTEN"
print_err "若主站已在其他终端由本脚本启动,可改用: EVAL_ATTACH_ONLY=1 ./development.sh"
exit 1
fi
"${UVICORN_BIN}" main:app --reload \
--reload-exclude 'alembic/**' \
--reload-exclude 'alembic.ini' \
--host "${API_HOST}" --port "${API_PORT}" &
API_PID=$!
local api_start_status=0
if wait_for_tcp_listener "${API_PID}" "${API_PORT}" 8; then
api_start_status=0
else
api_start_status=$?
fi
case "${api_start_status}" in
0)
print_ok "FastAPI 已启动 (PID: ${API_PID})"
;;
1)
print_err "FastAPI 启动失败,进程已退出;请查看上方 Uvicorn 日志"
exit 1
;;
*)
print_err "FastAPI 进程仍存活,但端口 ${API_PORT} 未在预期时间内开始监听"
exit 1
;;
esac
"${CELERY_BIN}" -A app.tasks.celery_app worker --loglevel=info --pool="${CELERY_POOL}" -Q celery,memory_idle &
CELERY_PID=$!
ensure_background_process_alive "Celery" "${CELERY_PID}"
print_ok "Celery 已启动 (PID: ${CELERY_PID})"
fi
if [[ "${LIFE_ECHO_WITH_INTERNAL_EVAL}" == "1" ]]; then
start_internal_eval_http
fi
echo
echo -e "${GREEN}开发环境启动完成${NC}"
if [[ -n "${API_PID}" ]]; then
echo "主站文档: http://localhost:${API_PORT}/docs"
echo "健康检查: http://localhost:${API_PORT}/health"
fi
if [[ "${LIFE_ECHO_WITH_INTERNAL_EVAL}" == "1" ]]; then
echo "评测 Web UI: http://127.0.0.1:${EVAL_WEB_PORT}/"
echo "内部评测 API: http://127.0.0.1:${INTERNAL_EVAL_PORT}/health"
fi
if read_env_bool "OTEL_ENABLED" "0"; then
echo "可观测性: Grafana http://127.0.0.1:${GRAFANA_HOST_PORT} | Prometheus http://127.0.0.1:${PROMETHEUS_HOST_PORT}"
if is_port_listening "${GRAFANA_HOST_PORT}"; then
schedule_observability_browser
fi
fi
if [[ "${LIFE_ECHO_WITH_INTERNAL_EVAL}" == "1" ]] && is_pid_alive "${EVAL_WEB_PID}"; then
schedule_eval_web_browser
fi
echo "按 Ctrl+C 停止所有进程"
}
main() {
if [[ "${LIFE_ECHO_WITH_INTERNAL_EVAL}" == "1" ]]; then
print_header "Life Echo 开发环境(主站 + 内部评测 + 可观测性)"
else
print_header "Life Echo 开发环境一键启动(无内部评测)"
fi
require_cmd "uv"
if [[ "${SKIP_INFRA}" != "1" ]]; then
require_cmd "docker"
fi
trap cleanup EXIT INT TERM
ensure_venv
# 必须在 start_infra 之前同步,否则 should_start_observability 读不到 .env.development 里的 OTEL_ENABLED
ensure_dotenv_from_development
if [[ "${SKIP_INFRA}" != "1" ]]; then
start_infra
wait_postgres_ready || true
else
print_warn "已跳过 docker 基础设施 (SKIP_INFRA=1)"
if should_start_observability; then
print_warn "SKIP_INFRA=1 未自动启动 observability若需 LGTM 请手动 docker compose up observability overlay"
fi
fi
check_env_file
check_otel_collector_ready || true
wait_host_infra_ready
run_migrations
start_services
local wait_pids=()
[[ -n "${API_PID}" ]] && wait_pids+=("${API_PID}")
[[ -n "${CELERY_PID}" ]] && wait_pids+=("${CELERY_PID}")
[[ -n "${INTERNAL_EVAL_PID}" ]] && wait_pids+=("${INTERNAL_EVAL_PID}")
[[ -n "${EVAL_WEB_PID}" ]] && wait_pids+=("${EVAL_WEB_PID}")
if (( ${#wait_pids[@]} == 0 )); then
print_err "没有可等待的进程,退出"
exit 1
fi
wait "${wait_pids[@]}"
}
main "$@"