配置 SSOT(TOML + .env) 统一错误契约 Auth 与事务边界 Redis / Celery 可靠性:业务 Redis(DB/0)与 Celery broker/backend(DB/1)显式拆分;连接池、sync client 可观测性(OpenTelemetry + LGTM)
898 lines
26 KiB
Bash
Executable File
898 lines
26 KiB
Bash
Executable File
#!/usr/bin/env bash
|
||
|
||
set -euo pipefail
|
||
|
||
RED='\033[0;31m'
|
||
GREEN='\033[0;32m'
|
||
YELLOW='\033[1;33m'
|
||
BLUE='\033[0;34m'
|
||
NC='\033[0m'
|
||
|
||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||
REPO_ROOT="$(cd "${ROOT_DIR}/.." && pwd)"
|
||
EVAL_WEB_DIR="${REPO_ROOT}/app-eval-web"
|
||
VENV_DIR="${ROOT_DIR}/.venv"
|
||
PYTHON_BIN="${VENV_DIR}/bin/python"
|
||
UVICORN_BIN="${VENV_DIR}/bin/uvicorn"
|
||
CELERY_BIN="${VENV_DIR}/bin/celery"
|
||
|
||
# 本地全栈:默认可在 API 启动时 purge Celery 队列;生产请勿使用此脚本
|
||
export APP_ENV="${APP_ENV:-development}"
|
||
export CELERY_PURGE_BROKER_ON_STARTUP="${CELERY_PURGE_BROKER_ON_STARTUP:-1}"
|
||
|
||
API_HOST="${API_HOST:-0.0.0.0}"
|
||
API_PORT="${API_PORT:-8000}"
|
||
CELERY_POOL="${CELERY_POOL:-solo}"
|
||
SKIP_INSTALL="${SKIP_INSTALL:-0}"
|
||
SKIP_INFRA="${SKIP_INFRA:-0}"
|
||
# 可观测性:空=读 config/*.toml deploy.otel_enabled(或 .env 中 OTEL_ENABLED 覆盖);0=不启;1=强制启动
|
||
START_OBSERVABILITY="${START_OBSERVABILITY:-}"
|
||
SHUTDOWN_TIMEOUT="${SHUTDOWN_TIMEOUT:-12}"
|
||
CELERY_SHUTDOWN_TIMEOUT="${CELERY_SHUTDOWN_TIMEOUT:-25}"
|
||
|
||
# 与 docker-compose.observability.yml / .env.example 默认宿主机端口一致
|
||
OTEL_GRPC_HOST_PORT="${OTEL_GRPC_HOST_PORT:-48317}"
|
||
GRAFANA_HOST_PORT="${GRAFANA_HOST_PORT:-48300}"
|
||
PROMETHEUS_HOST_PORT="${PROMETHEUS_HOST_PORT:-49090}"
|
||
|
||
# 默认一并启动 internal_main + app-eval-web(设 0 可仅主站)
|
||
LIFE_ECHO_WITH_INTERNAL_EVAL="${LIFE_ECHO_WITH_INTERNAL_EVAL:-1}"
|
||
# 自动用 Google Chrome 打开 Grafana / 评测 Web(勿用 Vite --open,避免落到 Safari)
|
||
OPEN_OBSERVABILITY_UI="${OPEN_OBSERVABILITY_UI:-1}"
|
||
# 若 :8000 已由其他 development 实例占用,仅附加 :8001 + 前端(需自备同一份 Celery/主站)
|
||
EVAL_ATTACH_ONLY="${EVAL_ATTACH_ONLY:-0}"
|
||
INTERNAL_EVAL_HOST="${INTERNAL_EVAL_HOST:-0.0.0.0}"
|
||
INTERNAL_EVAL_PORT="${INTERNAL_EVAL_PORT:-7999}"
|
||
START_EVAL_WEB="${START_EVAL_WEB:-1}"
|
||
OPEN_EVAL_WEB="${OPEN_EVAL_WEB:-1}"
|
||
EVAL_WEB_PORT="${EVAL_WEB_PORT:-5174}"
|
||
|
||
API_PID=""
|
||
CELERY_PID=""
|
||
INTERNAL_EVAL_PID=""
|
||
EVAL_WEB_PID=""
|
||
CLEANED_UP=0
|
||
INFRA_STARTED=0
|
||
OBSERVABILITY_STARTED=0
|
||
OBSERVABILITY_BROWSER_SCHEDULED=0
|
||
EVAL_WEB_BROWSER_SCHEDULED=0
|
||
|
||
print_header() {
|
||
echo -e "\n${BLUE}========================================${NC}"
|
||
echo -e "${BLUE}$1${NC}"
|
||
echo -e "${BLUE}========================================${NC}"
|
||
}
|
||
|
||
print_ok() {
|
||
echo -e "${GREEN}✓ $1${NC}"
|
||
}
|
||
|
||
print_warn() {
|
||
echo -e "${YELLOW}⚠ $1${NC}"
|
||
}
|
||
|
||
print_err() {
|
||
echo -e "${RED}✗ $1${NC}"
|
||
}
|
||
|
||
open_browser_url() {
|
||
local url="$1"
|
||
if command -v open >/dev/null 2>&1 && [[ "$(uname -s)" == "Darwin" ]]; then
|
||
if open -a "Google Chrome" "${url}" >/dev/null 2>&1; then
|
||
return 0
|
||
fi
|
||
print_warn "未找到 Google Chrome,请手动打开: ${url}"
|
||
return 1
|
||
fi
|
||
if command -v google-chrome >/dev/null 2>&1; then
|
||
google-chrome "${url}" >/dev/null 2>&1 &
|
||
return 0
|
||
fi
|
||
if command -v chromium-browser >/dev/null 2>&1; then
|
||
chromium-browser "${url}" >/dev/null 2>&1 &
|
||
return 0
|
||
fi
|
||
if command -v chromium >/dev/null 2>&1; then
|
||
chromium "${url}" >/dev/null 2>&1 &
|
||
return 0
|
||
fi
|
||
print_warn "未找到 Chrome/Chromium,请手动打开: ${url}"
|
||
return 1
|
||
}
|
||
|
||
schedule_observability_browser() {
|
||
if [[ "${OPEN_OBSERVABILITY_UI}" != "1" ]] || [[ "${OBSERVABILITY_BROWSER_SCHEDULED}" == "1" ]]; then
|
||
return 0
|
||
fi
|
||
OBSERVABILITY_BROWSER_SCHEDULED=1
|
||
local grafana_url="http://127.0.0.1:${GRAFANA_HOST_PORT}"
|
||
(
|
||
sleep 4
|
||
open_browser_url "${grafana_url}"
|
||
) &
|
||
print_ok "将自动打开 Grafana: ${grafana_url}"
|
||
}
|
||
|
||
schedule_eval_web_browser() {
|
||
if [[ "${OPEN_EVAL_WEB}" != "1" ]] || [[ "${EVAL_WEB_BROWSER_SCHEDULED:-0}" == "1" ]]; then
|
||
return 0
|
||
fi
|
||
EVAL_WEB_BROWSER_SCHEDULED=1
|
||
local eval_url="http://127.0.0.1:${EVAL_WEB_PORT}/"
|
||
(
|
||
local i=0
|
||
while (( i < 30 )); do
|
||
if is_port_listening "${EVAL_WEB_PORT}"; then
|
||
break
|
||
fi
|
||
sleep 1
|
||
i=$((i + 1))
|
||
done
|
||
open_browser_url "${eval_url}"
|
||
) &
|
||
print_ok "将自动打开评测 Web (Chrome): ${eval_url}"
|
||
}
|
||
|
||
is_pid_alive() {
|
||
local pid="$1"
|
||
[[ -n "${pid}" ]] && kill -0 "${pid}" 2>/dev/null
|
||
}
|
||
|
||
wait_pid_exit() {
|
||
local pid="$1"
|
||
local timeout="$2"
|
||
local waited=0
|
||
|
||
while is_pid_alive "${pid}"; do
|
||
if (( waited >= timeout )); then
|
||
return 1
|
||
fi
|
||
sleep 1
|
||
waited=$((waited + 1))
|
||
done
|
||
return 0
|
||
}
|
||
|
||
kill_children_term() {
|
||
local pid="$1"
|
||
local children
|
||
|
||
children="$(pgrep -P "${pid}" 2>/dev/null || true)"
|
||
if [[ -n "${children}" ]]; then
|
||
# 先递归处理子进程,避免 reloader/server 残留
|
||
while IFS= read -r child_pid; do
|
||
[[ -z "${child_pid}" ]] && continue
|
||
kill_children_term "${child_pid}"
|
||
kill -TERM "${child_pid}" 2>/dev/null || true
|
||
done <<< "${children}"
|
||
fi
|
||
}
|
||
|
||
stop_process_gracefully() {
|
||
local name="$1"
|
||
local pid="$2"
|
||
local timeout="${3:-10}"
|
||
local signal="${4:-TERM}"
|
||
|
||
if ! is_pid_alive "${pid}"; then
|
||
print_ok "${name} 已退出"
|
||
return 0
|
||
fi
|
||
|
||
print_warn "正在停止 ${name}(PID: ${pid})..."
|
||
kill_children_term "${pid}"
|
||
kill "-${signal}" "${pid}" 2>/dev/null || true
|
||
|
||
if wait_pid_exit "${pid}" "${timeout}"; then
|
||
print_ok "${name} 已停止"
|
||
return 0
|
||
fi
|
||
|
||
if [[ "${signal}" != "TERM" ]]; then
|
||
kill -TERM "${pid}" 2>/dev/null || true
|
||
if wait_pid_exit "${pid}" 5; then
|
||
print_ok "${name} 已停止"
|
||
return 0
|
||
fi
|
||
fi
|
||
|
||
print_warn "${name} 在 ${timeout}s 内未退出,准备强制结束"
|
||
kill -KILL "${pid}" 2>/dev/null || true
|
||
wait_pid_exit "${pid}" 3 || true
|
||
print_ok "${name} 已强制结束"
|
||
}
|
||
|
||
cleanup() {
|
||
if [[ "${CLEANED_UP}" == "1" ]]; then
|
||
return 0
|
||
fi
|
||
CLEANED_UP=1
|
||
|
||
print_header "正在关闭开发环境"
|
||
|
||
if is_pid_alive "${EVAL_WEB_PID}"; then
|
||
stop_process_gracefully "eval-web (Vite)" "${EVAL_WEB_PID}" "${SHUTDOWN_TIMEOUT}" INT
|
||
fi
|
||
|
||
if is_pid_alive "${API_PID}"; then
|
||
stop_process_gracefully "FastAPI" "${API_PID}" "${SHUTDOWN_TIMEOUT}" INT
|
||
fi
|
||
|
||
if is_pid_alive "${INTERNAL_EVAL_PID}"; then
|
||
stop_process_gracefully "Internal Eval API (:${INTERNAL_EVAL_PORT})" "${INTERNAL_EVAL_PID}" "${SHUTDOWN_TIMEOUT}" INT
|
||
fi
|
||
|
||
if is_pid_alive "${CELERY_PID}"; then
|
||
stop_process_gracefully "Celery" "${CELERY_PID}" "${CELERY_SHUTDOWN_TIMEOUT}" INT
|
||
fi
|
||
|
||
if [[ "${INFRA_STARTED}" == "1" ]]; then
|
||
print_warn "正在停止 Docker 基础设施..."
|
||
docker_compose_cmd stop >/dev/null 2>&1 || true
|
||
print_ok "Docker 容器已停止"
|
||
fi
|
||
}
|
||
|
||
require_cmd() {
|
||
local cmd="$1"
|
||
if ! command -v "${cmd}" >/dev/null 2>&1; then
|
||
print_err "未找到命令: ${cmd}"
|
||
exit 1
|
||
fi
|
||
}
|
||
|
||
read_env_bool() {
|
||
local key="$1"
|
||
local default="${2:-0}"
|
||
local line val
|
||
|
||
if [[ -n "${!key:-}" ]]; then
|
||
val="${!key}"
|
||
case "${val}" in
|
||
1 | true | TRUE | yes | YES | on | ON) return 0 ;;
|
||
*) return 1 ;;
|
||
esac
|
||
fi
|
||
|
||
if [[ ! -f "${ROOT_DIR}/.env" ]]; then
|
||
[[ "${default}" == "1" ]]
|
||
return
|
||
fi
|
||
|
||
line="$(grep -E "^${key}=" "${ROOT_DIR}/.env" | tail -1 | cut -d= -f2- | tr -d '\r' | sed 's/^"//;s/"$//')"
|
||
case "${line}" in
|
||
1 | true | TRUE | yes | YES | on | ON) return 0 ;;
|
||
*) [[ "${default}" == "1" ]] ;;
|
||
esac
|
||
}
|
||
|
||
read_app_env_from_dotenv() {
|
||
local app_env="${APP_ENV:-development}"
|
||
if [[ -f "${ROOT_DIR}/.env" ]]; then
|
||
local env_line
|
||
env_line="$(grep -E '^APP_ENV=' "${ROOT_DIR}/.env" | tail -1 | cut -d= -f2- | tr -d '\r' | sed 's/^"//;s/"$//')"
|
||
if [[ -n "${env_line}" ]]; then
|
||
app_env="${env_line}"
|
||
fi
|
||
fi
|
||
printf '%s\n' "${app_env}"
|
||
}
|
||
|
||
read_toml_bool_field() {
|
||
local section="$1"
|
||
local field="$2"
|
||
local default="${3:-0}"
|
||
local app_env
|
||
app_env="$(read_app_env_from_dotenv)"
|
||
local enabled
|
||
enabled="$(
|
||
cd "${ROOT_DIR}" && uv run python -c "
|
||
from app.core.app_config_loader import load_app_config
|
||
cfg = load_app_config('${app_env}')
|
||
print('1' if getattr(getattr(cfg, '${section}'), '${field}') else '0')
|
||
" 2>/dev/null | tail -1
|
||
)"
|
||
case "${enabled}" in
|
||
1) return 0 ;;
|
||
*) [[ "${default}" == "1" ]] ;;
|
||
esac
|
||
}
|
||
|
||
read_deploy_otel_enabled() {
|
||
local default="${1:-0}"
|
||
|
||
if [[ -n "${OTEL_ENABLED:-}" ]]; then
|
||
read_env_bool "OTEL_ENABLED" "${default}"
|
||
return
|
||
fi
|
||
if [[ -f "${ROOT_DIR}/.env" ]] && grep -qE '^OTEL_ENABLED=' "${ROOT_DIR}/.env" 2>/dev/null; then
|
||
read_env_bool "OTEL_ENABLED" "${default}"
|
||
return
|
||
fi
|
||
|
||
read_toml_bool_field "deploy" "otel_enabled" "${default}"
|
||
}
|
||
|
||
read_eval_internal_enable_docs() {
|
||
if [[ -n "${INTERNAL_EVAL_ENABLE_DOCS:-}" ]]; then
|
||
case "${INTERNAL_EVAL_ENABLE_DOCS}" in
|
||
1 | true | TRUE | yes | YES | on | ON) return 0 ;;
|
||
*) return 1 ;;
|
||
esac
|
||
fi
|
||
if [[ -f "${ROOT_DIR}/.env" ]] && grep -qE '^INTERNAL_EVAL_ENABLE_DOCS=' "${ROOT_DIR}/.env" 2>/dev/null; then
|
||
read_env_bool "INTERNAL_EVAL_ENABLE_DOCS" "0"
|
||
return
|
||
fi
|
||
read_toml_bool_field "eval" "internal_enable_docs" "0"
|
||
}
|
||
|
||
should_start_observability() {
|
||
case "${START_OBSERVABILITY}" in
|
||
0 | false | FALSE | no | NO | off | OFF) return 1 ;;
|
||
1 | true | TRUE | yes | YES | on | ON) return 0 ;;
|
||
esac
|
||
read_deploy_otel_enabled "0"
|
||
}
|
||
|
||
docker_compose_cmd() {
|
||
# 统一 compose -f,兼容 macOS 自带 bash 3.2(勿用 local -n / local arr=(-f …))
|
||
if should_start_observability; then
|
||
(cd "${ROOT_DIR}" && docker compose \
|
||
-f docker-compose.dev.yml \
|
||
-f docker-compose.observability.yml \
|
||
"$@")
|
||
return
|
||
fi
|
||
if [[ "$1" == "up" ]]; then
|
||
(cd "${ROOT_DIR}" && docker compose -f docker-compose.dev.yml "$@" --remove-orphans)
|
||
else
|
||
(cd "${ROOT_DIR}" && docker compose -f docker-compose.dev.yml "$@")
|
||
fi
|
||
}
|
||
|
||
wait_otel_collector_ready() {
|
||
local retries="${1:-30}"
|
||
local i=0
|
||
while (( i < retries )); do
|
||
if is_port_listening "${OTEL_GRPC_HOST_PORT}"; then
|
||
return 0
|
||
fi
|
||
sleep 1
|
||
i=$((i + 1))
|
||
done
|
||
return 1
|
||
}
|
||
|
||
check_otel_collector_ready() {
|
||
if ! read_deploy_otel_enabled "0"; then
|
||
return 0
|
||
fi
|
||
if is_port_listening "${OTEL_GRPC_HOST_PORT}"; then
|
||
print_ok "OTel Collector 端口已监听 (:${OTEL_GRPC_HOST_PORT})"
|
||
return 0
|
||
fi
|
||
if [[ "${OBSERVABILITY_STARTED}" == "1" ]]; then
|
||
print_warn "等待 OTel Collector 端口 :${OTEL_GRPC_HOST_PORT} …"
|
||
if wait_otel_collector_ready 45; then
|
||
print_ok "OTel Collector 端口已监听 (:${OTEL_GRPC_HOST_PORT})"
|
||
return 0
|
||
fi
|
||
fi
|
||
print_warn "deploy.otel_enabled=true 但 :${OTEL_GRPC_HOST_PORT} 未监听"
|
||
print_warn "请确认本次启动日志中有「启动可观测性栈」;或手动执行:"
|
||
print_warn " docker compose -f docker-compose.dev.yml -f docker-compose.observability.yml up -d"
|
||
print_warn "不需要可观测性时在 config/development.toml 设 otel_enabled=false"
|
||
return 1
|
||
}
|
||
|
||
start_infra() {
|
||
if should_start_observability; then
|
||
print_header "启动 PostgreSQL、Redis 与可观测性栈 (OTel / Grafana LGTM)"
|
||
OBSERVABILITY_STARTED=1
|
||
else
|
||
print_header "启动 PostgreSQL 和 Redis"
|
||
fi
|
||
docker_compose_cmd up -d
|
||
INFRA_STARTED=1
|
||
print_ok "PostgreSQL 127.0.0.1:48291,Redis 127.0.0.1:48307(见 docker-compose.dev.yml / .env.example)"
|
||
if [[ "${OBSERVABILITY_STARTED}" == "1" ]]; then
|
||
print_ok "Grafana http://127.0.0.1:${GRAFANA_HOST_PORT} (admin/admin)"
|
||
print_ok "Prometheus http://127.0.0.1:${PROMETHEUS_HOST_PORT}"
|
||
print_ok "OTLP gRPC 127.0.0.1:${OTEL_GRPC_HOST_PORT}(应用读 config/*.toml deploy.otel_*)"
|
||
print_ok "详见 docs/observability.md"
|
||
schedule_observability_browser
|
||
fi
|
||
print_ok "基础设施已就绪"
|
||
}
|
||
|
||
# Docker 刚启动时 Postgres 可能尚未接受连接,立即跑 Alembic 会误报失败
|
||
wait_postgres_ready() {
|
||
local retries=30
|
||
local i=0
|
||
print_header "等待 PostgreSQL 就绪"
|
||
cd "${ROOT_DIR}"
|
||
while (( i < retries )); do
|
||
if docker compose -f docker-compose.dev.yml exec -T postgres \
|
||
pg_isready -U postgres >/dev/null 2>&1; then
|
||
print_ok "PostgreSQL 已就绪"
|
||
return 0
|
||
fi
|
||
sleep 1
|
||
i=$((i + 1))
|
||
done
|
||
print_warn "PostgreSQL 在 ${retries}s 内未就绪,迁移可能失败"
|
||
return 1
|
||
}
|
||
|
||
get_effective_database_url() {
|
||
if [[ -n "${DATABASE_URL:-}" ]]; then
|
||
printf '%s\n' "${DATABASE_URL}"
|
||
return 0
|
||
fi
|
||
|
||
if [[ -f "${ROOT_DIR}/.env" ]]; then
|
||
local line
|
||
line="$(sed -n 's/^DATABASE_URL=//p' "${ROOT_DIR}/.env" | sed -n '1p')"
|
||
line="${line%\"}"
|
||
line="${line#\"}"
|
||
line="${line%\'}"
|
||
line="${line#\'}"
|
||
if [[ -n "${line}" ]]; then
|
||
printf '%s\n' "${line}"
|
||
return 0
|
||
fi
|
||
fi
|
||
|
||
return 1
|
||
}
|
||
|
||
get_effective_redis_url() {
|
||
if [[ -n "${REDIS_URL:-}" ]]; then
|
||
printf '%s\n' "${REDIS_URL}"
|
||
return 0
|
||
fi
|
||
|
||
if [[ -f "${ROOT_DIR}/.env" ]]; then
|
||
local line
|
||
line="$(sed -n 's/^REDIS_URL=//p' "${ROOT_DIR}/.env" | sed -n '1p')"
|
||
line="${line%\"}"
|
||
line="${line#\"}"
|
||
line="${line%\'}"
|
||
line="${line#\'}"
|
||
if [[ -n "${line}" ]]; then
|
||
printf '%s\n' "${line}"
|
||
return 0
|
||
fi
|
||
fi
|
||
|
||
return 1
|
||
}
|
||
|
||
extract_url_port() {
|
||
local url="$1"
|
||
local default_port="$2"
|
||
|
||
if [[ "${url}" =~ :([0-9]+)(/|\?|$) ]]; then
|
||
printf '%s\n' "${BASH_REMATCH[1]}"
|
||
return 0
|
||
fi
|
||
|
||
printf '%s\n' "${default_port}"
|
||
}
|
||
|
||
wait_host_infra_ready() {
|
||
local database_url redis_url pg_port redis_port
|
||
|
||
if ! database_url="$(get_effective_database_url)"; then
|
||
print_warn "无法解析 DATABASE_URL,跳过宿主机 PostgreSQL 端口检查"
|
||
else
|
||
pg_port="$(extract_url_port "${database_url}" "5432")"
|
||
if wait_for_tcp_listener "$$" "${pg_port}" 12; then
|
||
print_ok "宿主机 PostgreSQL 端口已监听 (:${pg_port})"
|
||
else
|
||
print_warn "宿主机 PostgreSQL 端口未监听 (:${pg_port});请检查 .env 与 docker-compose.dev.yml 端口映射"
|
||
fi
|
||
fi
|
||
|
||
if ! redis_url="$(get_effective_redis_url)"; then
|
||
print_warn "无法解析 REDIS_URL,跳过宿主机 Redis 端口检查"
|
||
else
|
||
redis_port="$(extract_url_port "${redis_url}" "6379")"
|
||
if wait_for_tcp_listener "$$" "${redis_port}" 12; then
|
||
print_ok "宿主机 Redis 端口已监听 (:${redis_port})"
|
||
else
|
||
print_warn "宿主机 Redis 端口未监听 (:${redis_port});请检查 .env 与 docker-compose.dev.yml 端口映射"
|
||
fi
|
||
fi
|
||
}
|
||
|
||
warn_database_url_host_pitfall() {
|
||
local database_url
|
||
local host
|
||
|
||
if ! database_url="$(get_effective_database_url)"; then
|
||
return 0
|
||
fi
|
||
|
||
if [[ "${database_url}" =~ @([^:/?#]+) ]]; then
|
||
host="${BASH_REMATCH[1]}"
|
||
case "${host}" in
|
||
postgres|db|postgres-dev|postgresql)
|
||
print_warn "检测到 DATABASE_URL 主机为 ${host};在宿主机执行 Alembic/uvicorn 时通常应使用 localhost"
|
||
;;
|
||
esac
|
||
fi
|
||
}
|
||
|
||
print_alembic_failure_hint() {
|
||
local log_file="$1"
|
||
local log_output
|
||
|
||
log_output="$(sed -n '1,200p' "${log_file}")"
|
||
if [[ "${log_output}" == *'could not translate host name "postgres"'* ]] || [[ "${log_output}" == *"Name or service not known"* ]]; then
|
||
print_warn "看起来 DATABASE_URL 指向了容器内主机名;在宿主机运行时请改用 localhost:48291(见 docker-compose.dev.yml)"
|
||
elif [[ "${log_output}" == *"Connection refused"* ]] || [[ "${log_output}" == *"could not connect to server"* ]]; then
|
||
print_warn "PostgreSQL 连接被拒绝;请确认容器已启动且 DATABASE_URL 与 docker-compose.dev.yml 暴露端口一致"
|
||
elif [[ "${log_output}" == *"password authentication failed"* ]]; then
|
||
print_warn "PostgreSQL 用户名或密码不匹配;请核对 .env.development 中的 DATABASE_URL"
|
||
elif [[ "${log_output}" == *"No such file or directory"* ]] || [[ "${log_output}" == *"can't open file"* ]]; then
|
||
print_warn "Alembic 依赖的文件或工作目录可能不正确;请确认在 api/ 目录运行脚本"
|
||
elif [[ "${log_output}" == *"Can't locate revision"* ]]; then
|
||
print_warn "alembic_version 与当前迁移链不一致(常见于已撤回的 0020_*)"
|
||
print_warn "将自动修复:重启 development.sh,或执行: uv run python scripts/repair_alembic_version_after_withdrawn_0020.py && uv run alembic upgrade head"
|
||
fi
|
||
}
|
||
|
||
is_port_listening() {
|
||
local port="$1"
|
||
|
||
if command -v lsof >/dev/null 2>&1; then
|
||
lsof -nP -iTCP:"${port}" -sTCP:LISTEN >/dev/null 2>&1
|
||
return $?
|
||
fi
|
||
|
||
if [[ -x "${PYTHON_BIN}" ]]; then
|
||
"${PYTHON_BIN}" - "${port}" <<'PY' >/dev/null 2>&1
|
||
import socket
|
||
import sys
|
||
|
||
sock = socket.socket()
|
||
sock.settimeout(0.2)
|
||
try:
|
||
sock.connect(("127.0.0.1", int(sys.argv[1])))
|
||
except OSError:
|
||
raise SystemExit(1)
|
||
finally:
|
||
sock.close()
|
||
raise SystemExit(0)
|
||
PY
|
||
return $?
|
||
fi
|
||
|
||
return 1
|
||
}
|
||
|
||
wait_for_tcp_listener() {
|
||
local pid="$1"
|
||
local port="$2"
|
||
local timeout="${3:-8}"
|
||
local waited=0
|
||
|
||
while (( waited < timeout )); do
|
||
if is_port_listening "${port}"; then
|
||
return 0
|
||
fi
|
||
if ! is_pid_alive "${pid}"; then
|
||
return 1
|
||
fi
|
||
sleep 1
|
||
waited=$((waited + 1))
|
||
done
|
||
|
||
return 2
|
||
}
|
||
|
||
ensure_background_process_alive() {
|
||
local name="$1"
|
||
local pid="$2"
|
||
|
||
sleep 1
|
||
if ! is_pid_alive "${pid}"; then
|
||
print_err "${name} 启动后立即退出,请查看上方日志"
|
||
exit 1
|
||
fi
|
||
}
|
||
|
||
ensure_venv() {
|
||
print_header "检查 Python 虚拟环境"
|
||
|
||
if [[ ! -d "${VENV_DIR}" ]]; then
|
||
print_warn ".venv 不存在,正在创建"
|
||
uv venv "${VENV_DIR}"
|
||
fi
|
||
|
||
if [[ "${SKIP_INSTALL}" != "1" ]]; then
|
||
print_header "安装 Python 依赖"
|
||
uv sync
|
||
print_ok "依赖安装完成"
|
||
else
|
||
print_warn "已跳过依赖安装 (SKIP_INSTALL=1)"
|
||
fi
|
||
}
|
||
|
||
# 本地约定:以 .env.development 为真源;每次一键启动都从 .env.development 覆盖 .env,供 pydantic Settings(env_file=".env") 读取。
|
||
# 请勿仅在 .env 里改密钥而不同步回 .env.development,否则下次启动会被覆盖。
|
||
ensure_dotenv_from_development() {
|
||
print_header "准备本地 .env"
|
||
if [[ -f "${ROOT_DIR}/.env.development" ]]; then
|
||
cp "${ROOT_DIR}/.env.development" "${ROOT_DIR}/.env"
|
||
print_ok "已从 .env.development 同步为 .env"
|
||
return 0
|
||
fi
|
||
print_warn "未找到 .env.development,无法自动生成 .env"
|
||
print_warn "请执行: cp api/.env.example api/.env.development 后按说明填写,再运行 ./development.sh"
|
||
}
|
||
|
||
check_env_file() {
|
||
print_header "检查环境变量文件"
|
||
if [[ ! -f "${ROOT_DIR}/.env" ]]; then
|
||
print_warn "未找到 .env,应用可能因缺少配置启动失败"
|
||
else
|
||
print_ok "检测到 .env"
|
||
warn_database_url_host_pitfall
|
||
fi
|
||
}
|
||
|
||
run_migrations() {
|
||
print_header "执行数据库迁移"
|
||
cd "${ROOT_DIR}"
|
||
local log_file
|
||
log_file="$(mktemp -t life-echo-alembic.XXXXXX.log)"
|
||
|
||
uv run python scripts/repair_alembic_version_after_withdrawn_0020.py >>"${log_file}" 2>&1 || true
|
||
|
||
if uv run alembic upgrade head >"${log_file}" 2>&1; then
|
||
print_ok "Alembic 迁移已就绪"
|
||
rm -f "${log_file}"
|
||
else
|
||
print_warn "Alembic 迁移失败(可能数据库未启动或 DATABASE_URL 未配置),应用启动可能失败"
|
||
print_alembic_failure_hint "${log_file}"
|
||
print_warn "Alembic 输出(最近 40 行):"
|
||
tail -n 40 "${log_file}"
|
||
rm -f "${log_file}"
|
||
fi
|
||
}
|
||
|
||
check_internal_eval_key() {
|
||
print_header "检查内部评测密钥"
|
||
if [[ -f "${ROOT_DIR}/.env" ]] && grep -qE '^INTERNAL_EVAL_API_KEY=.+' "${ROOT_DIR}/.env" 2>/dev/null; then
|
||
print_ok "已在 .env 中配置 INTERNAL_EVAL_API_KEY"
|
||
return 0
|
||
fi
|
||
if [[ -n "${INTERNAL_EVAL_API_KEY:-}" ]]; then
|
||
print_ok "已从环境变量传入 INTERNAL_EVAL_API_KEY"
|
||
return 0
|
||
fi
|
||
print_err "未配置 INTERNAL_EVAL_API_KEY:内部评测接口将返回 503。"
|
||
print_err "请在 api/.env.development(或 .env)中加入一行,例如:"
|
||
print_err " INTERNAL_EVAL_API_KEY=\"your-long-random-secret\""
|
||
exit 1
|
||
}
|
||
|
||
start_eval_web() {
|
||
print_header "启动 app-eval-web (Vite)"
|
||
if [[ ! -d "${EVAL_WEB_DIR}" ]]; then
|
||
print_err "未找到 ${EVAL_WEB_DIR}"
|
||
exit 1
|
||
fi
|
||
if [[ ! -d "${EVAL_WEB_DIR}/node_modules" ]]; then
|
||
print_err "请先执行: cd app-eval-web && npm install"
|
||
exit 1
|
||
fi
|
||
require_cmd "npm"
|
||
|
||
local api_key="${INTERNAL_EVAL_API_KEY:-}"
|
||
if [[ -z "${api_key}" ]] && [[ -f "${ROOT_DIR}/.env" ]]; then
|
||
api_key="$(grep -E '^INTERNAL_EVAL_API_KEY=' "${ROOT_DIR}/.env" | head -1 | cut -d= -f2- | tr -d '\r' | sed 's/^"//;s/"$//')"
|
||
fi
|
||
if [[ -z "${api_key}" ]]; then
|
||
print_err "无法解析 INTERNAL_EVAL_API_KEY,无法为 Vite 注入 VITE_EVAL_API_KEY"
|
||
exit 1
|
||
fi
|
||
|
||
(
|
||
cd "${EVAL_WEB_DIR}"
|
||
VITE_EVAL_API_KEY="${api_key}" \
|
||
VITE_EVAL_PROXY_TARGET="http://127.0.0.1:${INTERNAL_EVAL_PORT}" \
|
||
npm run dev -- --host 127.0.0.1 --port "${EVAL_WEB_PORT}"
|
||
) &
|
||
EVAL_WEB_PID=$!
|
||
print_ok "eval-web 已启动 (PID: ${EVAL_WEB_PID}) → http://127.0.0.1:${EVAL_WEB_PORT}/"
|
||
schedule_eval_web_browser
|
||
}
|
||
|
||
start_internal_eval_http() {
|
||
check_internal_eval_key
|
||
print_header "启动内部评测 API (internal_main :${INTERNAL_EVAL_PORT})"
|
||
cd "${ROOT_DIR}"
|
||
|
||
if is_port_listening "${INTERNAL_EVAL_PORT}"; then
|
||
print_err "端口 ${INTERNAL_EVAL_PORT} 已被占用,无法启动内部评测 Uvicorn。"
|
||
print_err "请先结束占用进程,或设置 INTERNAL_EVAL_PORT 为其他端口"
|
||
exit 1
|
||
fi
|
||
|
||
OTEL_SERVICE_NAME="${INTERNAL_EVAL_OTEL_SERVICE_NAME:-life-echo-internal-api}" \
|
||
"${UVICORN_BIN}" app.internal_main:internal_app --reload \
|
||
--reload-exclude 'alembic/**' \
|
||
--reload-exclude 'alembic.ini' \
|
||
--host "${INTERNAL_EVAL_HOST}" --port "${INTERNAL_EVAL_PORT}" &
|
||
INTERNAL_EVAL_PID=$!
|
||
local api_start_status=0
|
||
if wait_for_tcp_listener "${INTERNAL_EVAL_PID}" "${INTERNAL_EVAL_PORT}" 8; then
|
||
api_start_status=0
|
||
else
|
||
api_start_status=$?
|
||
fi
|
||
|
||
case "${api_start_status}" in
|
||
0)
|
||
print_ok "Internal Eval API 已启动 (PID: ${INTERNAL_EVAL_PID})"
|
||
;;
|
||
1)
|
||
print_err "Internal Eval API 启动失败,进程已退出;请查看上方 Uvicorn 日志"
|
||
exit 1
|
||
;;
|
||
*)
|
||
print_err "Internal Eval API 进程仍存活,但端口 ${INTERNAL_EVAL_PORT} 未在预期时间内开始监听"
|
||
exit 1
|
||
;;
|
||
esac
|
||
|
||
if [[ "${START_EVAL_WEB}" == "1" ]]; then
|
||
start_eval_web
|
||
else
|
||
print_warn "已跳过 eval-web (START_EVAL_WEB=0)"
|
||
fi
|
||
|
||
echo
|
||
echo -e "${BLUE}── 内部评测${NC}"
|
||
echo "评测 Web UI: http://127.0.0.1:${EVAL_WEB_PORT}/ (Vite /internal → :${INTERNAL_EVAL_PORT})"
|
||
echo "内部评测 API: http://127.0.0.1:${INTERNAL_EVAL_PORT}/health"
|
||
echo "评测 REST: http://127.0.0.1:${INTERNAL_EVAL_PORT}/internal/api/evaluation"
|
||
if read_eval_internal_enable_docs; then
|
||
echo "内部评测文档: http://127.0.0.1:${INTERNAL_EVAL_PORT}/docs"
|
||
fi
|
||
echo "说明: api/docs/internal-eval.md"
|
||
}
|
||
|
||
start_services() {
|
||
print_header "启动 FastAPI 和 Celery"
|
||
cd "${ROOT_DIR}"
|
||
|
||
local skip_main=0
|
||
if [[ "${EVAL_ATTACH_ONLY}" == "1" ]] && is_port_listening "${API_PORT}"; then
|
||
skip_main=1
|
||
print_warn "EVAL_ATTACH_ONLY=1::${API_PORT} 已在监听,跳过本脚本内的主站与 Celery(请确保别处已有 Worker)"
|
||
fi
|
||
|
||
if [[ "${skip_main}" == "1" ]] && [[ "${LIFE_ECHO_WITH_INTERNAL_EVAL}" != "1" ]]; then
|
||
print_err "EVAL_ATTACH_ONLY=1 仅用于在已有主站时附加内部评测;请设置 LIFE_ECHO_WITH_INTERNAL_EVAL=1"
|
||
exit 1
|
||
fi
|
||
|
||
if [[ "${skip_main}" == "0" ]]; then
|
||
if is_port_listening "${API_PORT}"; then
|
||
print_err "端口 ${API_PORT} 已被占用,无法启动新的 Uvicorn。"
|
||
print_err "请先结束占用进程,例如: lsof -nP -iTCP:${API_PORT} -sTCP:LISTEN"
|
||
print_err "若主站已在其他终端由本脚本启动,可改用: EVAL_ATTACH_ONLY=1 ./development.sh"
|
||
exit 1
|
||
fi
|
||
|
||
"${UVICORN_BIN}" main:app --reload \
|
||
--reload-exclude 'alembic/**' \
|
||
--reload-exclude 'alembic.ini' \
|
||
--host "${API_HOST}" --port "${API_PORT}" &
|
||
API_PID=$!
|
||
local api_start_status=0
|
||
if wait_for_tcp_listener "${API_PID}" "${API_PORT}" 8; then
|
||
api_start_status=0
|
||
else
|
||
api_start_status=$?
|
||
fi
|
||
|
||
case "${api_start_status}" in
|
||
0)
|
||
print_ok "FastAPI 已启动 (PID: ${API_PID})"
|
||
;;
|
||
1)
|
||
print_err "FastAPI 启动失败,进程已退出;请查看上方 Uvicorn 日志"
|
||
exit 1
|
||
;;
|
||
*)
|
||
print_err "FastAPI 进程仍存活,但端口 ${API_PORT} 未在预期时间内开始监听"
|
||
exit 1
|
||
;;
|
||
esac
|
||
|
||
"${CELERY_BIN}" -A app.tasks.celery_app worker --loglevel=info --pool="${CELERY_POOL}" -Q celery,memory_idle &
|
||
CELERY_PID=$!
|
||
ensure_background_process_alive "Celery" "${CELERY_PID}"
|
||
print_ok "Celery 已启动 (PID: ${CELERY_PID})"
|
||
fi
|
||
|
||
if [[ "${LIFE_ECHO_WITH_INTERNAL_EVAL}" == "1" ]]; then
|
||
start_internal_eval_http
|
||
fi
|
||
|
||
echo
|
||
echo -e "${GREEN}开发环境启动完成${NC}"
|
||
if [[ -n "${API_PID}" ]]; then
|
||
echo "主站文档: http://localhost:${API_PORT}/docs"
|
||
echo "健康检查: http://localhost:${API_PORT}/health"
|
||
fi
|
||
if [[ "${LIFE_ECHO_WITH_INTERNAL_EVAL}" == "1" ]]; then
|
||
echo "评测 Web UI: http://127.0.0.1:${EVAL_WEB_PORT}/"
|
||
echo "内部评测 API: http://127.0.0.1:${INTERNAL_EVAL_PORT}/health"
|
||
fi
|
||
if read_deploy_otel_enabled "0"; then
|
||
echo "可观测性: Grafana http://127.0.0.1:${GRAFANA_HOST_PORT} | Prometheus http://127.0.0.1:${PROMETHEUS_HOST_PORT}"
|
||
if is_port_listening "${GRAFANA_HOST_PORT}"; then
|
||
schedule_observability_browser
|
||
fi
|
||
fi
|
||
if [[ "${LIFE_ECHO_WITH_INTERNAL_EVAL}" == "1" ]] && is_pid_alive "${EVAL_WEB_PID}"; then
|
||
schedule_eval_web_browser
|
||
fi
|
||
echo "按 Ctrl+C 停止所有进程"
|
||
}
|
||
|
||
main() {
|
||
if [[ "${LIFE_ECHO_WITH_INTERNAL_EVAL}" == "1" ]]; then
|
||
print_header "Life Echo 开发环境(主站 + 内部评测 + 可观测性)"
|
||
else
|
||
print_header "Life Echo 开发环境一键启动(无内部评测)"
|
||
fi
|
||
|
||
require_cmd "uv"
|
||
if [[ "${SKIP_INFRA}" != "1" ]]; then
|
||
require_cmd "docker"
|
||
fi
|
||
|
||
trap cleanup EXIT INT TERM
|
||
|
||
ensure_venv
|
||
# 必须在 start_infra 之前同步 .env,以便 read_deploy_otel_enabled 读到 APP_ENV
|
||
ensure_dotenv_from_development
|
||
|
||
if [[ "${SKIP_INFRA}" != "1" ]]; then
|
||
start_infra
|
||
wait_postgres_ready || true
|
||
else
|
||
print_warn "已跳过 docker 基础设施 (SKIP_INFRA=1)"
|
||
if should_start_observability; then
|
||
print_warn "SKIP_INFRA=1 未自动启动 observability;若需 LGTM 请手动 docker compose up observability overlay"
|
||
fi
|
||
fi
|
||
|
||
check_env_file
|
||
check_otel_collector_ready || true
|
||
wait_host_infra_ready
|
||
run_migrations
|
||
start_services
|
||
|
||
local wait_pids=()
|
||
[[ -n "${API_PID}" ]] && wait_pids+=("${API_PID}")
|
||
[[ -n "${CELERY_PID}" ]] && wait_pids+=("${CELERY_PID}")
|
||
[[ -n "${INTERNAL_EVAL_PID}" ]] && wait_pids+=("${INTERNAL_EVAL_PID}")
|
||
[[ -n "${EVAL_WEB_PID}" ]] && wait_pids+=("${EVAL_WEB_PID}")
|
||
if (( ${#wait_pids[@]} == 0 )); then
|
||
print_err "没有可等待的进程,退出"
|
||
exit 1
|
||
fi
|
||
wait "${wait_pids[@]}"
|
||
}
|
||
|
||
main "$@"
|