#!/usr/bin/env bash set -euo pipefail RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "${ROOT_DIR}/.." && pwd)" EVAL_WEB_DIR="${REPO_ROOT}/app-eval-web" VENV_DIR="${ROOT_DIR}/.venv" PYTHON_BIN="${VENV_DIR}/bin/python" UVICORN_BIN="${VENV_DIR}/bin/uvicorn" CELERY_BIN="${VENV_DIR}/bin/celery" # 本地全栈:默认可在 API 启动时 purge Celery 队列;生产请勿使用此脚本 export APP_ENV="${APP_ENV:-development}" export CELERY_PURGE_BROKER_ON_STARTUP="${CELERY_PURGE_BROKER_ON_STARTUP:-1}" API_HOST="${API_HOST:-0.0.0.0}" API_PORT="${API_PORT:-8000}" CELERY_POOL="${CELERY_POOL:-solo}" SKIP_INSTALL="${SKIP_INSTALL:-0}" SKIP_INFRA="${SKIP_INFRA:-0}" SHUTDOWN_TIMEOUT="${SHUTDOWN_TIMEOUT:-12}" # 由 internal-eval.sh 开启:在 main:app + Celery 之外再启 internal_main(:8001) 与 app-eval-web LIFE_ECHO_WITH_INTERNAL_EVAL="${LIFE_ECHO_WITH_INTERNAL_EVAL:-0}" # 若 :8000 已由其他 development 实例占用,仅附加 :8001 + 前端(需自备同一份 Celery/主站) EVAL_ATTACH_ONLY="${EVAL_ATTACH_ONLY:-0}" INTERNAL_EVAL_HOST="${INTERNAL_EVAL_HOST:-0.0.0.0}" INTERNAL_EVAL_PORT="${INTERNAL_EVAL_PORT:-7999}" START_EVAL_WEB="${START_EVAL_WEB:-1}" OPEN_EVAL_WEB="${OPEN_EVAL_WEB:-1}" EVAL_WEB_PORT="${EVAL_WEB_PORT:-5174}" API_PID="" CELERY_PID="" INTERNAL_EVAL_PID="" EVAL_WEB_PID="" CLEANED_UP=0 INFRA_STARTED=0 print_header() { echo -e "\n${BLUE}========================================${NC}" echo -e "${BLUE}$1${NC}" echo -e "${BLUE}========================================${NC}" } print_ok() { echo -e "${GREEN}✓ $1${NC}" } print_warn() { echo -e "${YELLOW}⚠ $1${NC}" } print_err() { echo -e "${RED}✗ $1${NC}" } is_pid_alive() { local pid="$1" [[ -n "${pid}" ]] && kill -0 "${pid}" 2>/dev/null } wait_pid_exit() { local pid="$1" local timeout="$2" local waited=0 while is_pid_alive "${pid}"; do if (( waited >= timeout )); then return 1 fi sleep 1 waited=$((waited + 1)) done return 0 } kill_children_term() { local pid="$1" local children children="$(pgrep -P "${pid}" 2>/dev/null || true)" if [[ -n "${children}" ]]; then # 先递归处理子进程,避免 reloader/server 残留 while IFS= read -r child_pid; do [[ -z "${child_pid}" ]] && continue kill_children_term "${child_pid}" kill -TERM "${child_pid}" 2>/dev/null || true done <<< "${children}" fi } stop_process_gracefully() { local name="$1" local pid="$2" local timeout="${3:-10}" if ! is_pid_alive "${pid}"; then print_ok "${name} 已退出" return 0 fi print_warn "正在停止 ${name}(PID: ${pid})..." kill_children_term "${pid}" kill -TERM "${pid}" 2>/dev/null || true if wait_pid_exit "${pid}" "${timeout}"; then print_ok "${name} 已停止" return 0 fi print_warn "${name} 在 ${timeout}s 内未退出,准备强制结束" kill -KILL "${pid}" 2>/dev/null || true wait_pid_exit "${pid}" 3 || true print_ok "${name} 已强制结束" } cleanup() { if [[ "${CLEANED_UP}" == "1" ]]; then return 0 fi CLEANED_UP=1 print_header "正在关闭开发环境" if is_pid_alive "${EVAL_WEB_PID}"; then stop_process_gracefully "eval-web (Vite)" "${EVAL_WEB_PID}" "${SHUTDOWN_TIMEOUT}" fi if is_pid_alive "${INTERNAL_EVAL_PID}"; then stop_process_gracefully "Internal Eval API (:${INTERNAL_EVAL_PORT})" "${INTERNAL_EVAL_PID}" "${SHUTDOWN_TIMEOUT}" fi if is_pid_alive "${API_PID}"; then stop_process_gracefully "FastAPI" "${API_PID}" "${SHUTDOWN_TIMEOUT}" fi if is_pid_alive "${CELERY_PID}"; then stop_process_gracefully "Celery" "${CELERY_PID}" "${SHUTDOWN_TIMEOUT}" fi if [[ "${INFRA_STARTED}" == "1" ]]; then print_warn "正在停止 PostgreSQL / Redis 容器..." ( cd "${ROOT_DIR}" && docker compose -f docker-compose.dev.yml stop ) >/dev/null 2>&1 || true print_ok "PostgreSQL/Redis 容器已停止" fi } require_cmd() { local cmd="$1" if ! command -v "${cmd}" >/dev/null 2>&1; then print_err "未找到命令: ${cmd}" exit 1 fi } start_infra() { print_header "启动 PostgreSQL 和 Redis" cd "${ROOT_DIR}" docker compose -f docker-compose.dev.yml up -d INFRA_STARTED=1 print_ok "PostgreSQL 127.0.0.1:48291,Redis 127.0.0.1:48307(见 docker-compose.dev.yml / .env.example)" print_ok "基础设施已就绪" } # Docker 刚启动时 Postgres 可能尚未接受连接,立即跑 Alembic 会误报失败 wait_postgres_ready() { local retries=30 local i=0 print_header "等待 PostgreSQL 就绪" cd "${ROOT_DIR}" while (( i < retries )); do if docker compose -f docker-compose.dev.yml exec -T postgres \ pg_isready -U postgres >/dev/null 2>&1; then print_ok "PostgreSQL 已就绪" return 0 fi sleep 1 i=$((i + 1)) done print_warn "PostgreSQL 在 ${retries}s 内未就绪,迁移可能失败" return 1 } get_effective_database_url() { if [[ -n "${DATABASE_URL:-}" ]]; then printf '%s\n' "${DATABASE_URL}" return 0 fi if [[ -f "${ROOT_DIR}/.env" ]]; then local line line="$(sed -n 's/^DATABASE_URL=//p' "${ROOT_DIR}/.env" | sed -n '1p')" line="${line%\"}" line="${line#\"}" line="${line%\'}" line="${line#\'}" if [[ -n "${line}" ]]; then printf '%s\n' "${line}" return 0 fi fi return 1 } get_effective_redis_url() { if [[ -n "${REDIS_URL:-}" ]]; then printf '%s\n' "${REDIS_URL}" return 0 fi if [[ -f "${ROOT_DIR}/.env" ]]; then local line line="$(sed -n 's/^REDIS_URL=//p' "${ROOT_DIR}/.env" | sed -n '1p')" line="${line%\"}" line="${line#\"}" line="${line%\'}" line="${line#\'}" if [[ -n "${line}" ]]; then printf '%s\n' "${line}" return 0 fi fi return 1 } extract_url_port() { local url="$1" local default_port="$2" if [[ "${url}" =~ :([0-9]+)(/|\?|$) ]]; then printf '%s\n' "${BASH_REMATCH[1]}" return 0 fi printf '%s\n' "${default_port}" } wait_host_infra_ready() { local database_url redis_url pg_port redis_port if ! database_url="$(get_effective_database_url)"; then print_warn "无法解析 DATABASE_URL,跳过宿主机 PostgreSQL 端口检查" else pg_port="$(extract_url_port "${database_url}" "5432")" if wait_for_tcp_listener "$$" "${pg_port}" 12; then print_ok "宿主机 PostgreSQL 端口已监听 (:${pg_port})" else print_warn "宿主机 PostgreSQL 端口未监听 (:${pg_port});请检查 .env 与 docker-compose.dev.yml 端口映射" fi fi if ! redis_url="$(get_effective_redis_url)"; then print_warn "无法解析 REDIS_URL,跳过宿主机 Redis 端口检查" else redis_port="$(extract_url_port "${redis_url}" "6379")" if wait_for_tcp_listener "$$" "${redis_port}" 12; then print_ok "宿主机 Redis 端口已监听 (:${redis_port})" else print_warn "宿主机 Redis 端口未监听 (:${redis_port});请检查 .env 与 docker-compose.dev.yml 端口映射" fi fi } warn_database_url_host_pitfall() { local database_url local host if ! database_url="$(get_effective_database_url)"; then return 0 fi if [[ "${database_url}" =~ @([^:/?#]+) ]]; then host="${BASH_REMATCH[1]}" case "${host}" in postgres|db|postgres-dev|postgresql) print_warn "检测到 DATABASE_URL 主机为 ${host};在宿主机执行 Alembic/uvicorn 时通常应使用 localhost" ;; esac fi } print_alembic_failure_hint() { local log_file="$1" local log_output log_output="$(sed -n '1,200p' "${log_file}")" if [[ "${log_output}" == *'could not translate host name "postgres"'* ]] || [[ "${log_output}" == *"Name or service not known"* ]]; then print_warn "看起来 DATABASE_URL 指向了容器内主机名;在宿主机运行时请改用 localhost:48291(见 docker-compose.dev.yml)" elif [[ "${log_output}" == *"Connection refused"* ]] || [[ "${log_output}" == *"could not connect to server"* ]]; then print_warn "PostgreSQL 连接被拒绝;请确认容器已启动且 DATABASE_URL 与 docker-compose.dev.yml 暴露端口一致" elif [[ "${log_output}" == *"password authentication failed"* ]]; then print_warn "PostgreSQL 用户名或密码不匹配;请核对 .env.development 中的 DATABASE_URL" elif [[ "${log_output}" == *"No such file or directory"* ]] || [[ "${log_output}" == *"can't open file"* ]]; then print_warn "Alembic 依赖的文件或工作目录可能不正确;请确认在 api/ 目录运行脚本" fi } is_port_listening() { local port="$1" if command -v lsof >/dev/null 2>&1; then lsof -nP -iTCP:"${port}" -sTCP:LISTEN >/dev/null 2>&1 return $? fi if [[ -x "${PYTHON_BIN}" ]]; then "${PYTHON_BIN}" - "${port}" <<'PY' >/dev/null 2>&1 import socket import sys sock = socket.socket() sock.settimeout(0.2) try: sock.connect(("127.0.0.1", int(sys.argv[1]))) except OSError: raise SystemExit(1) finally: sock.close() raise SystemExit(0) PY return $? fi return 1 } wait_for_tcp_listener() { local pid="$1" local port="$2" local timeout="${3:-8}" local waited=0 while (( waited < timeout )); do if is_port_listening "${port}"; then return 0 fi if ! is_pid_alive "${pid}"; then return 1 fi sleep 1 waited=$((waited + 1)) done return 2 } ensure_background_process_alive() { local name="$1" local pid="$2" sleep 1 if ! is_pid_alive "${pid}"; then print_err "${name} 启动后立即退出,请查看上方日志" exit 1 fi } ensure_venv() { print_header "检查 Python 虚拟环境" if [[ ! -d "${VENV_DIR}" ]]; then print_warn ".venv 不存在,正在创建" uv venv "${VENV_DIR}" fi if [[ "${SKIP_INSTALL}" != "1" ]]; then print_header "安装 Python 依赖" uv sync print_ok "依赖安装完成" else print_warn "已跳过依赖安装 (SKIP_INSTALL=1)" fi } # 本地约定:以 .env.development 为真源;每次一键启动都从 .env.development 覆盖 .env,供 pydantic Settings(env_file=".env") 读取。 # 请勿仅在 .env 里改密钥而不同步回 .env.development,否则下次启动会被覆盖。 ensure_dotenv_from_development() { print_header "准备本地 .env" if [[ -f "${ROOT_DIR}/.env.development" ]]; then cp "${ROOT_DIR}/.env.development" "${ROOT_DIR}/.env" print_ok "已从 .env.development 同步为 .env" return 0 fi print_warn "未找到 .env.development,无法自动生成 .env" print_warn "请执行: cp api/.env.example api/.env.development 后按说明填写,再运行 ./development.sh" } check_env_file() { print_header "检查环境变量文件" if [[ ! -f "${ROOT_DIR}/.env" ]]; then print_warn "未找到 .env,应用可能因缺少配置启动失败" else print_ok "检测到 .env" warn_database_url_host_pitfall fi } run_migrations() { print_header "执行数据库迁移" cd "${ROOT_DIR}" local log_file log_file="$(mktemp -t life-echo-alembic.XXXXXX.log)" if uv run alembic upgrade head >"${log_file}" 2>&1; then print_ok "Alembic 迁移已就绪" rm -f "${log_file}" else print_warn "Alembic 迁移失败(可能数据库未启动或 DATABASE_URL 未配置),应用启动可能失败" print_alembic_failure_hint "${log_file}" print_warn "Alembic 输出(最近 40 行):" tail -n 40 "${log_file}" rm -f "${log_file}" fi } check_internal_eval_key() { print_header "检查内部评测密钥" if [[ -f "${ROOT_DIR}/.env" ]] && grep -qE '^INTERNAL_EVAL_API_KEY=.+' "${ROOT_DIR}/.env" 2>/dev/null; then print_ok "已在 .env 中配置 INTERNAL_EVAL_API_KEY" return 0 fi if [[ -n "${INTERNAL_EVAL_API_KEY:-}" ]]; then print_ok "已从环境变量传入 INTERNAL_EVAL_API_KEY" return 0 fi print_err "未配置 INTERNAL_EVAL_API_KEY:内部评测接口将返回 503。" print_err "请在 api/.env.development(或 .env)中加入一行,例如:" print_err " INTERNAL_EVAL_API_KEY=\"your-long-random-secret\"" exit 1 } start_eval_web() { print_header "启动 app-eval-web (Vite)" if [[ ! -d "${EVAL_WEB_DIR}" ]]; then print_err "未找到 ${EVAL_WEB_DIR}" exit 1 fi if [[ ! -d "${EVAL_WEB_DIR}/node_modules" ]]; then print_err "请先执行: cd app-eval-web && npm install" exit 1 fi require_cmd "npm" local api_key="${INTERNAL_EVAL_API_KEY:-}" if [[ -z "${api_key}" ]] && [[ -f "${ROOT_DIR}/.env" ]]; then api_key="$(grep -E '^INTERNAL_EVAL_API_KEY=' "${ROOT_DIR}/.env" | head -1 | cut -d= -f2- | tr -d '\r' | sed 's/^"//;s/"$//')" fi if [[ -z "${api_key}" ]]; then print_err "无法解析 INTERNAL_EVAL_API_KEY,无法为 Vite 注入 VITE_EVAL_API_KEY" exit 1 fi local vite_extra=() if [[ "${OPEN_EVAL_WEB}" == "1" ]]; then vite_extra+=(--open) fi ( cd "${EVAL_WEB_DIR}" VITE_EVAL_API_KEY="${api_key}" \ VITE_EVAL_PROXY_TARGET="http://127.0.0.1:${INTERNAL_EVAL_PORT}" \ npm run dev -- --host 127.0.0.1 --port "${EVAL_WEB_PORT}" "${vite_extra[@]}" ) & EVAL_WEB_PID=$! print_ok "eval-web 已启动 (PID: ${EVAL_WEB_PID}) → http://127.0.0.1:${EVAL_WEB_PORT}/" } start_internal_eval_http() { check_internal_eval_key print_header "启动内部评测 API (internal_main :${INTERNAL_EVAL_PORT})" cd "${ROOT_DIR}" if is_port_listening "${INTERNAL_EVAL_PORT}"; then print_err "端口 ${INTERNAL_EVAL_PORT} 已被占用,无法启动内部评测 Uvicorn。" print_err "请先结束占用进程,或设置 INTERNAL_EVAL_PORT 为其他端口" exit 1 fi "${UVICORN_BIN}" app.internal_main:internal_app --reload \ --reload-exclude 'alembic/**' \ --reload-exclude 'alembic.ini' \ --host "${INTERNAL_EVAL_HOST}" --port "${INTERNAL_EVAL_PORT}" & INTERNAL_EVAL_PID=$! local api_start_status=0 if wait_for_tcp_listener "${INTERNAL_EVAL_PID}" "${INTERNAL_EVAL_PORT}" 8; then api_start_status=0 else api_start_status=$? fi case "${api_start_status}" in 0) print_ok "Internal Eval API 已启动 (PID: ${INTERNAL_EVAL_PID})" ;; 1) print_err "Internal Eval API 启动失败,进程已退出;请查看上方 Uvicorn 日志" exit 1 ;; *) print_err "Internal Eval API 进程仍存活,但端口 ${INTERNAL_EVAL_PORT} 未在预期时间内开始监听" exit 1 ;; esac if [[ "${START_EVAL_WEB}" == "1" ]]; then start_eval_web else print_warn "已跳过 eval-web (START_EVAL_WEB=0)" fi echo echo -e "${BLUE}── 内部评测${NC}" echo "评测 Web UI: http://127.0.0.1:${EVAL_WEB_PORT}/ (Vite /internal → :${INTERNAL_EVAL_PORT})" echo "内部评测 API: http://127.0.0.1:${INTERNAL_EVAL_PORT}/health" echo "评测 REST: http://127.0.0.1:${INTERNAL_EVAL_PORT}/internal/api/evaluation" if [[ "${INTERNAL_EVAL_ENABLE_DOCS:-}" == "1" ]] || grep -qE '^INTERNAL_EVAL_ENABLE_DOCS=true' "${ROOT_DIR}/.env" 2>/dev/null; then echo "内部评测文档: http://127.0.0.1:${INTERNAL_EVAL_PORT}/docs" fi echo "说明: api/docs/internal-eval.md" } start_services() { print_header "启动 FastAPI 和 Celery" cd "${ROOT_DIR}" local skip_main=0 if [[ "${EVAL_ATTACH_ONLY}" == "1" ]] && is_port_listening "${API_PORT}"; then skip_main=1 print_warn "EVAL_ATTACH_ONLY=1::${API_PORT} 已在监听,跳过本脚本内的主站与 Celery(请确保别处已有 Worker)" fi if [[ "${skip_main}" == "1" ]] && [[ "${LIFE_ECHO_WITH_INTERNAL_EVAL}" != "1" ]]; then print_err "EVAL_ATTACH_ONLY=1 仅用于在已有主站时附加内部评测;请使用 ./internal-eval.sh 或导出 LIFE_ECHO_WITH_INTERNAL_EVAL=1" exit 1 fi if [[ "${skip_main}" == "0" ]]; then if is_port_listening "${API_PORT}"; then print_err "端口 ${API_PORT} 已被占用,无法启动新的 Uvicorn。" print_err "请先结束占用进程,例如: lsof -nP -iTCP:${API_PORT} -sTCP:LISTEN" print_err "若主站已在其他终端由本脚本启动,可改用: EVAL_ATTACH_ONLY=1 ./development.sh" exit 1 fi "${UVICORN_BIN}" main:app --reload \ --reload-exclude 'alembic/**' \ --reload-exclude 'alembic.ini' \ --host "${API_HOST}" --port "${API_PORT}" & API_PID=$! local api_start_status=0 if wait_for_tcp_listener "${API_PID}" "${API_PORT}" 8; then api_start_status=0 else api_start_status=$? fi case "${api_start_status}" in 0) print_ok "FastAPI 已启动 (PID: ${API_PID})" ;; 1) print_err "FastAPI 启动失败,进程已退出;请查看上方 Uvicorn 日志" exit 1 ;; *) print_err "FastAPI 进程仍存活,但端口 ${API_PORT} 未在预期时间内开始监听" exit 1 ;; esac "${CELERY_BIN}" -A app.tasks.celery_app worker --loglevel=info --pool="${CELERY_POOL}" -Q celery,memory_idle & CELERY_PID=$! ensure_background_process_alive "Celery" "${CELERY_PID}" print_ok "Celery 已启动 (PID: ${CELERY_PID})" fi if [[ "${LIFE_ECHO_WITH_INTERNAL_EVAL}" == "1" ]]; then start_internal_eval_http fi echo echo -e "${GREEN}开发环境启动完成${NC}" if [[ -n "${API_PID}" ]]; then echo "主站文档: http://localhost:${API_PORT}/docs" echo "健康检查: http://localhost:${API_PORT}/health" fi echo "按 Ctrl+C 停止所有进程" } main() { if [[ "${LIFE_ECHO_WITH_INTERNAL_EVAL}" == "1" ]]; then print_header "Life Echo 开发环境 + 内部评测(主站 + :${INTERNAL_EVAL_PORT} + Eval Web)" else print_header "Life Echo 开发环境一键启动" fi require_cmd "uv" if [[ "${SKIP_INFRA}" != "1" ]]; then require_cmd "docker" fi trap cleanup EXIT INT TERM if [[ "${SKIP_INFRA}" != "1" ]]; then start_infra wait_postgres_ready || true else print_warn "已跳过 docker 基础设施 (SKIP_INFRA=1)" fi ensure_venv ensure_dotenv_from_development check_env_file wait_host_infra_ready run_migrations start_services local wait_pids=() [[ -n "${API_PID}" ]] && wait_pids+=("${API_PID}") [[ -n "${CELERY_PID}" ]] && wait_pids+=("${CELERY_PID}") [[ -n "${INTERNAL_EVAL_PID}" ]] && wait_pids+=("${INTERNAL_EVAL_PID}") [[ -n "${EVAL_WEB_PID}" ]] && wait_pids+=("${EVAL_WEB_PID}") if (( ${#wait_pids[@]} == 0 )); then print_err "没有可等待的进程,退出" exit 1 fi wait "${wait_pids[@]}" } main "$@"