#!/usr/bin/env bash # 仅启动「内部回归评测」栈(app/internal_main.py),不启动主站 consumer API。 # # 与 development.sh 的区别: # - development.sh:main:app + Celery(通常 :8000),面向 App/主业务。 # - internal-eval.sh:internal_app + Celery(:8001),仅评测/回放/GLM 打分/门禁。 # 二者共用数据库与 Redis;不会拉起第二份 main:app。 # # 若本机已在跑 ./development.sh,只想多开评测 HTTP(推荐,避免第二套 worker/docker): # SKIP_INFRA=1 SKIP_INSTALL=1 SKIP_CELERY=1 ./internal-eval.sh # # 用法:cd api && ./internal-eval.sh # 可选环境变量: # SKIP_INFRA=1 已起好 Postgres/Redis 时跳过 docker compose # SKIP_INSTALL=1 跳过 uv sync # SKIP_CELERY=1 仅起内部 API(别处已有 Celery worker 时) # START_EVAL_WEB=0 不起评测前端(默认会起 app-eval-web,需已 npm install) # OPEN_EVAL_WEB=0 起前端但不自动打开浏览器(默认 Vite --open) # EVAL_WEB_PORT 打印提示用,默认 5174(与 app-eval-web/vite.config.ts 一致) # INTERNAL_EVAL_PORT 默认 8001 # CELERY_POOL 默认 solo(与 development.sh 一致) set -euo pipefail RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "${ROOT_DIR}/.." && pwd)" EVAL_WEB_DIR="${REPO_ROOT}/app-eval-web" VENV_DIR="${ROOT_DIR}/.venv" UVICORN_BIN="${VENV_DIR}/bin/uvicorn" CELERY_BIN="${VENV_DIR}/bin/celery" INTERNAL_EVAL_HOST="${INTERNAL_EVAL_HOST:-0.0.0.0}" INTERNAL_EVAL_PORT="${INTERNAL_EVAL_PORT:-8001}" CELERY_POOL="${CELERY_POOL:-solo}" SKIP_INSTALL="${SKIP_INSTALL:-0}" SKIP_INFRA="${SKIP_INFRA:-0}" SKIP_CELERY="${SKIP_CELERY:-0}" START_EVAL_WEB="${START_EVAL_WEB:-1}" OPEN_EVAL_WEB="${OPEN_EVAL_WEB:-1}" EVAL_WEB_PORT="${EVAL_WEB_PORT:-5174}" SHUTDOWN_TIMEOUT="${SHUTDOWN_TIMEOUT:-12}" API_PID="" CELERY_PID="" EVAL_WEB_PID="" CLEANED_UP=0 INFRA_STARTED=0 print_header() { echo -e "\n${BLUE}========================================${NC}" echo -e "${BLUE}$1${NC}" echo -e "${BLUE}========================================${NC}" } print_ok() { echo -e "${GREEN}✓ $1${NC}" } print_warn() { echo -e "${YELLOW}⚠ $1${NC}" } print_err() { echo -e "${RED}✗ $1${NC}" } is_pid_alive() { local pid="$1" [[ -n "${pid}" ]] && kill -0 "${pid}" 2>/dev/null } wait_pid_exit() { local pid="$1" local timeout="$2" local waited=0 while is_pid_alive "${pid}"; do if (( waited >= timeout )); then return 1 fi sleep 1 waited=$((waited + 1)) done return 0 } kill_children_term() { local pid="$1" local children children="$(pgrep -P "${pid}" 2>/dev/null || true)" if [[ -n "${children}" ]]; then while IFS= read -r child_pid; do [[ -z "${child_pid}" ]] && continue kill_children_term "${child_pid}" kill -TERM "${child_pid}" 2>/dev/null || true done <<< "${children}" fi } stop_process_gracefully() { local name="$1" local pid="$2" local timeout="${3:-10}" if ! is_pid_alive "${pid}"; then print_ok "${name} 已退出" return 0 fi print_warn "正在停止 ${name}(PID: ${pid})..." kill_children_term "${pid}" kill -TERM "${pid}" 2>/dev/null || true if wait_pid_exit "${pid}" "${timeout}"; then print_ok "${name} 已停止" return 0 fi print_warn "${name} 在 ${timeout}s 内未退出,准备强制结束" kill -KILL "${pid}" 2>/dev/null || true wait_pid_exit "${pid}" 3 || true print_ok "${name} 已强制结束" } cleanup() { if [[ "${CLEANED_UP}" == "1" ]]; then return 0 fi CLEANED_UP=1 print_header "正在关闭内部评测环境" if is_pid_alive "${EVAL_WEB_PID}"; then stop_process_gracefully "eval-web (Vite)" "${EVAL_WEB_PID}" "${SHUTDOWN_TIMEOUT}" fi if is_pid_alive "${API_PID}"; then stop_process_gracefully "Internal Eval API" "${API_PID}" "${SHUTDOWN_TIMEOUT}" fi if is_pid_alive "${CELERY_PID}"; then stop_process_gracefully "Celery" "${CELERY_PID}" "${SHUTDOWN_TIMEOUT}" fi if [[ "${INFRA_STARTED}" == "1" ]]; then print_warn "正在停止 PostgreSQL / Redis 容器..." ( cd "${ROOT_DIR}" && docker compose -f docker-compose.dev.yml stop ) >/dev/null 2>&1 || true print_ok "PostgreSQL/Redis 容器已停止" fi } require_cmd() { local cmd="$1" if ! command -v "${cmd}" >/dev/null 2>&1; then print_err "未找到命令: ${cmd}" exit 1 fi } start_infra() { print_header "启动 PostgreSQL 和 Redis" cd "${ROOT_DIR}" docker compose -f docker-compose.dev.yml up -d INFRA_STARTED=1 print_ok "基础设施已就绪" } wait_postgres_ready() { local retries=30 local i=0 print_header "等待 PostgreSQL 就绪" cd "${ROOT_DIR}" while (( i < retries )); do if docker compose -f docker-compose.dev.yml exec -T postgres \ pg_isready -U postgres >/dev/null 2>&1; then print_ok "PostgreSQL 已就绪" return 0 fi sleep 1 i=$((i + 1)) done print_warn "PostgreSQL 在 ${retries}s 内未就绪,迁移可能失败" return 1 } get_effective_database_url() { if [[ -n "${DATABASE_URL:-}" ]]; then printf '%s\n' "${DATABASE_URL}" return 0 fi if [[ -f "${ROOT_DIR}/.env" ]]; then local line line="$(sed -n 's/^DATABASE_URL=//p' "${ROOT_DIR}/.env" | sed -n '1p')" line="${line%\"}" line="${line#\"}" line="${line%\'}" line="${line#\'}" if [[ -n "${line}" ]]; then printf '%s\n' "${line}" return 0 fi fi return 1 } warn_database_url_host_pitfall() { local database_url local host if ! database_url="$(get_effective_database_url)"; then return 0 fi if [[ "${database_url}" =~ @([^:/?#]+) ]]; then host="${BASH_REMATCH[1]}" case "${host}" in postgres|db|postgres-dev|postgresql) print_warn "检测到 DATABASE_URL 主机为 ${host};在宿主机执行 Alembic/uvicorn 时通常应使用 localhost" ;; esac fi } print_alembic_failure_hint() { local log_file="$1" local log_output log_output="$(sed -n '1,200p' "${log_file}")" if [[ "${log_output}" == *'could not translate host name "postgres"'* ]] || [[ "${log_output}" == *"Name or service not known"* ]]; then print_warn "看起来 DATABASE_URL 指向了容器内主机名;在宿主机运行时请改用 localhost:5432" elif [[ "${log_output}" == *"Connection refused"* ]] || [[ "${log_output}" == *"could not connect to server"* ]]; then print_warn "PostgreSQL 连接被拒绝;请确认容器已启动且 DATABASE_URL 与 docker-compose.dev.yml 暴露端口一致" elif [[ "${log_output}" == *"password authentication failed"* ]]; then print_warn "PostgreSQL 用户名或密码不匹配;请核对 .env.development 中的 DATABASE_URL" elif [[ "${log_output}" == *"No such file or directory"* ]] || [[ "${log_output}" == *"can't open file"* ]]; then print_warn "Alembic 依赖的文件或工作目录可能不正确;请确认在 api/ 目录运行脚本" fi } is_port_listening() { local port="$1" if command -v lsof >/dev/null 2>&1; then lsof -nP -iTCP:"${port}" -sTCP:LISTEN >/dev/null 2>&1 return $? fi if [[ -x "${VENV_DIR}/bin/python" ]]; then "${VENV_DIR}/bin/python" - "${port}" <<'PY' >/dev/null 2>&1 import socket import sys sock = socket.socket() sock.settimeout(0.2) try: sock.connect(("127.0.0.1", int(sys.argv[1]))) except OSError: raise SystemExit(1) finally: sock.close() raise SystemExit(0) PY return $? fi return 1 } wait_for_tcp_listener() { local pid="$1" local port="$2" local timeout="${3:-8}" local waited=0 while (( waited < timeout )); do if is_port_listening "${port}"; then return 0 fi if ! is_pid_alive "${pid}"; then return 1 fi sleep 1 waited=$((waited + 1)) done return 2 } ensure_background_process_alive() { local name="$1" local pid="$2" sleep 1 if ! is_pid_alive "${pid}"; then print_err "${name} 启动后立即退出,请查看上方日志" exit 1 fi } ensure_venv() { print_header "检查 Python 虚拟环境" if [[ ! -d "${VENV_DIR}" ]]; then print_warn ".venv 不存在,正在创建" uv venv "${VENV_DIR}" fi if [[ "${SKIP_INSTALL}" != "1" ]]; then print_header "安装 Python 依赖" uv sync print_ok "依赖安装完成" else print_warn "已跳过依赖安装 (SKIP_INSTALL=1)" fi } ensure_dotenv_from_development() { print_header "准备本地 .env" if [[ -f "${ROOT_DIR}/.env.development" ]]; then cp "${ROOT_DIR}/.env.development" "${ROOT_DIR}/.env" print_ok "已从 .env.development 同步为 .env" return 0 fi print_warn "未找到 .env.development,将使用现有 .env(若存在)" } check_internal_eval_key() { print_header "检查内部评测密钥" if [[ -f "${ROOT_DIR}/.env" ]] && grep -qE '^INTERNAL_EVAL_API_KEY=.+' "${ROOT_DIR}/.env" 2>/dev/null; then print_ok "已在 .env 中配置 INTERNAL_EVAL_API_KEY" return 0 fi if [[ -n "${INTERNAL_EVAL_API_KEY:-}" ]]; then print_ok "已从环境变量传入 INTERNAL_EVAL_API_KEY" return 0 fi print_err "未配置 INTERNAL_EVAL_API_KEY:内部评测接口将返回 503。" print_err "请在 api/.env.development(或 .env)中加入一行,例如:" print_err " INTERNAL_EVAL_API_KEY=\"your-long-random-secret\"" exit 1 } check_env_file() { print_header "检查环境变量文件" if [[ ! -f "${ROOT_DIR}/.env" ]]; then print_warn "未找到 .env,应用可能因缺少配置启动失败" else print_ok "检测到 .env" warn_database_url_host_pitfall fi } run_migrations() { print_header "执行数据库迁移" cd "${ROOT_DIR}" local log_file log_file="$(mktemp -t life-echo-alembic.XXXXXX.log)" if uv run alembic upgrade head >"${log_file}" 2>&1; then print_ok "Alembic 迁移已就绪" rm -f "${log_file}" else print_warn "Alembic 迁移失败(可能数据库未启动或 DATABASE_URL 未配置),应用启动可能失败" print_alembic_failure_hint "${log_file}" print_warn "Alembic 输出(最近 40 行):" tail -n 40 "${log_file}" rm -f "${log_file}" fi } start_eval_web() { print_header "启动 app-eval-web (Vite)" if [[ ! -d "${EVAL_WEB_DIR}" ]]; then print_err "未找到 ${EVAL_WEB_DIR}" exit 1 fi if [[ ! -d "${EVAL_WEB_DIR}/node_modules" ]]; then print_err "请先执行: cd app-eval-web && npm install" exit 1 fi require_cmd "npm" local api_key="${INTERNAL_EVAL_API_KEY:-}" if [[ -z "${api_key}" ]] && [[ -f "${ROOT_DIR}/.env" ]]; then api_key="$(grep -E '^INTERNAL_EVAL_API_KEY=' "${ROOT_DIR}/.env" | head -1 | cut -d= -f2- | tr -d '\r' | sed 's/^"//;s/"$//')" fi if [[ -z "${api_key}" ]]; then print_err "无法解析 INTERNAL_EVAL_API_KEY,无法为 Vite 注入 VITE_EVAL_API_KEY" exit 1 fi local vite_extra=() if [[ "${OPEN_EVAL_WEB}" == "1" ]]; then vite_extra+=(--open) fi # 不设 VITE_EVAL_API_BASE:前端走 Vite proxy(app-eval-web/vite.config.ts)转发到 :${INTERNAL_EVAL_PORT},减少直连/CORS/误指主站问题。 # 若需直连远端 API:export VITE_EVAL_API_BASE=https://... 后再手动 npm run dev。 ( cd "${EVAL_WEB_DIR}" VITE_EVAL_API_KEY="${api_key}" \ npm run dev -- --host 127.0.0.1 --port "${EVAL_WEB_PORT}" "${vite_extra[@]}" ) & EVAL_WEB_PID=$! print_ok "eval-web 已启动 (PID: ${EVAL_WEB_PID}) → http://127.0.0.1:${EVAL_WEB_PORT}/" } start_services() { print_header "启动 Internal Eval API 与 Celery" cd "${ROOT_DIR}" if is_port_listening "${INTERNAL_EVAL_PORT}"; then print_err "端口 ${INTERNAL_EVAL_PORT} 已被占用,无法启动内部评测 Uvicorn。" print_err "请先结束占用进程,或设置 INTERNAL_EVAL_PORT 为其他端口" exit 1 fi # 与主开发脚本一致:评审/生产 LLM 等从 .env 读取;文档默认关闭,本地可 export INTERNAL_EVAL_ENABLE_DOCS=1 "${UVICORN_BIN}" app.internal_main:internal_app --reload \ --reload-exclude 'alembic/**' \ --reload-exclude 'alembic.ini' \ --host "${INTERNAL_EVAL_HOST}" --port "${INTERNAL_EVAL_PORT}" & API_PID=$! local api_start_status=0 if wait_for_tcp_listener "${API_PID}" "${INTERNAL_EVAL_PORT}" 8; then api_start_status=0 else api_start_status=$? fi case "${api_start_status}" in 0) print_ok "Internal Eval API 已启动 (PID: ${API_PID})" ;; 1) print_err "Internal Eval API 启动失败,进程已退出;请查看上方 Uvicorn 日志" exit 1 ;; *) print_err "Internal Eval API 进程仍存活,但端口 ${INTERNAL_EVAL_PORT} 未在预期时间内开始监听" exit 1 ;; esac if [[ "${SKIP_CELERY}" != "1" ]]; then "${CELERY_BIN}" -A app.tasks.celery_app worker --loglevel=info --pool="${CELERY_POOL}" & CELERY_PID=$! ensure_background_process_alive "Celery" "${CELERY_PID}" print_ok "Celery 已启动 (PID: ${CELERY_PID})" else print_warn "已跳过 Celery (SKIP_CELERY=1);实验 run 接口需要 worker 才能执行" fi if [[ "${START_EVAL_WEB}" == "1" ]]; then start_eval_web fi echo echo -e "${GREEN}内部评测环境启动完成${NC}" echo "【请用浏览器打开】评测 Web UI: http://127.0.0.1:${EVAL_WEB_PORT}/ (/internal 会代理到 API :${INTERNAL_EVAL_PORT})" echo "内部评测 API: http://127.0.0.1:${INTERNAL_EVAL_PORT}/health" echo "评测 REST 前缀: http://127.0.0.1:${INTERNAL_EVAL_PORT}/internal/api/evaluation" if [[ "${INTERNAL_EVAL_ENABLE_DOCS:-}" == "1" ]] || grep -qE '^INTERNAL_EVAL_ENABLE_DOCS=true' "${ROOT_DIR}/.env" 2>/dev/null; then echo "API 文档: http://127.0.0.1:${INTERNAL_EVAL_PORT}/docs" fi echo "说明文档: api/docs/internal-eval.md" echo "按 Ctrl+C 停止所有进程" } main() { print_header "Life Echo 内部回归评测 — 一键启动" echo -e "${BLUE}说明:${NC} 不启动主站 API(main:app / 默认 8000);仅启动 internal_main(:${INTERNAL_EVAL_PORT})。" echo "" require_cmd "uv" trap cleanup EXIT INT TERM if [[ "${SKIP_INFRA}" != "1" ]]; then require_cmd "docker" start_infra wait_postgres_ready || true else print_warn "已跳过 docker 基础设施 (SKIP_INFRA=1)" fi ensure_venv ensure_dotenv_from_development check_env_file check_internal_eval_key run_migrations start_services local wait_pids=("${API_PID}") if [[ "${SKIP_CELERY}" != "1" ]]; then wait_pids+=("${CELERY_PID}") fi if [[ "${START_EVAL_WEB}" == "1" ]] && [[ -n "${EVAL_WEB_PID}" ]]; then wait_pids+=("${EVAL_WEB_PID}") fi wait "${wait_pids[@]}" } main "$@"