Files
life-echo/api/internal-eval.sh
Kevin ea97427767 fix(dev): idempotent Alembic chain for squashed 0001 + clearer dev scripts
- Make migrations 0002–0008 skip schema changes already applied when
  0001_initial creates current ORM (rename segments column, timeline FK,
  memoir phase flags, drop content_tsv, eval_* tables).
- development.sh / internal-eval.sh: surface Alembic stderr, warn on
  docker-style DB hosts, TCP port checks without lsof, verify Uvicorn
  listens before claiming started.
2026-04-07 10:34:18 +08:00

518 lines
14 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env bash
# 仅启动「内部回归评测」栈app/internal_main.py不启动主站 consumer API。
#
# 与 development.sh 的区别:
# - development.shmain:app + Celery通常 :8000面向 App/主业务。
# - internal-eval.shinternal_app + Celery:8001仅评测/回放/GLM 打分/门禁。
# 二者共用数据库与 Redis不会拉起第二份 main:app。
#
# 若本机已在跑 ./development.sh只想多开评测 HTTP推荐避免第二套 worker/docker
# SKIP_INFRA=1 SKIP_INSTALL=1 SKIP_CELERY=1 ./internal-eval.sh
#
# 用法cd api && ./internal-eval.sh
# 可选环境变量:
# SKIP_INFRA=1 已起好 Postgres/Redis 时跳过 docker compose
# SKIP_INSTALL=1 跳过 uv sync
# SKIP_CELERY=1 仅起内部 API别处已有 Celery worker 时)
# START_EVAL_WEB=0 不起评测前端(默认会起 app-eval-web需已 npm install
# OPEN_EVAL_WEB=0 起前端但不自动打开浏览器(默认 Vite --open
# EVAL_WEB_PORT 打印提示用,默认 5174与 app-eval-web/vite.config.ts 一致)
# INTERNAL_EVAL_PORT 默认 8001
# CELERY_POOL 默认 solo与 development.sh 一致)
set -euo pipefail
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${ROOT_DIR}/.." && pwd)"
EVAL_WEB_DIR="${REPO_ROOT}/app-eval-web"
VENV_DIR="${ROOT_DIR}/.venv"
UVICORN_BIN="${VENV_DIR}/bin/uvicorn"
CELERY_BIN="${VENV_DIR}/bin/celery"
INTERNAL_EVAL_HOST="${INTERNAL_EVAL_HOST:-0.0.0.0}"
INTERNAL_EVAL_PORT="${INTERNAL_EVAL_PORT:-8001}"
CELERY_POOL="${CELERY_POOL:-solo}"
SKIP_INSTALL="${SKIP_INSTALL:-0}"
SKIP_INFRA="${SKIP_INFRA:-0}"
SKIP_CELERY="${SKIP_CELERY:-0}"
START_EVAL_WEB="${START_EVAL_WEB:-1}"
OPEN_EVAL_WEB="${OPEN_EVAL_WEB:-1}"
EVAL_WEB_PORT="${EVAL_WEB_PORT:-5174}"
SHUTDOWN_TIMEOUT="${SHUTDOWN_TIMEOUT:-12}"
API_PID=""
CELERY_PID=""
EVAL_WEB_PID=""
CLEANED_UP=0
INFRA_STARTED=0
print_header() {
echo -e "\n${BLUE}========================================${NC}"
echo -e "${BLUE}$1${NC}"
echo -e "${BLUE}========================================${NC}"
}
print_ok() {
echo -e "${GREEN}$1${NC}"
}
print_warn() {
echo -e "${YELLOW}$1${NC}"
}
print_err() {
echo -e "${RED}$1${NC}"
}
is_pid_alive() {
local pid="$1"
[[ -n "${pid}" ]] && kill -0 "${pid}" 2>/dev/null
}
wait_pid_exit() {
local pid="$1"
local timeout="$2"
local waited=0
while is_pid_alive "${pid}"; do
if (( waited >= timeout )); then
return 1
fi
sleep 1
waited=$((waited + 1))
done
return 0
}
kill_children_term() {
local pid="$1"
local children
children="$(pgrep -P "${pid}" 2>/dev/null || true)"
if [[ -n "${children}" ]]; then
while IFS= read -r child_pid; do
[[ -z "${child_pid}" ]] && continue
kill_children_term "${child_pid}"
kill -TERM "${child_pid}" 2>/dev/null || true
done <<< "${children}"
fi
}
stop_process_gracefully() {
local name="$1"
local pid="$2"
local timeout="${3:-10}"
if ! is_pid_alive "${pid}"; then
print_ok "${name} 已退出"
return 0
fi
print_warn "正在停止 ${name}PID: ${pid}..."
kill_children_term "${pid}"
kill -TERM "${pid}" 2>/dev/null || true
if wait_pid_exit "${pid}" "${timeout}"; then
print_ok "${name} 已停止"
return 0
fi
print_warn "${name}${timeout}s 内未退出,准备强制结束"
kill -KILL "${pid}" 2>/dev/null || true
wait_pid_exit "${pid}" 3 || true
print_ok "${name} 已强制结束"
}
cleanup() {
if [[ "${CLEANED_UP}" == "1" ]]; then
return 0
fi
CLEANED_UP=1
print_header "正在关闭内部评测环境"
if is_pid_alive "${EVAL_WEB_PID}"; then
stop_process_gracefully "eval-web (Vite)" "${EVAL_WEB_PID}" "${SHUTDOWN_TIMEOUT}"
fi
if is_pid_alive "${API_PID}"; then
stop_process_gracefully "Internal Eval API" "${API_PID}" "${SHUTDOWN_TIMEOUT}"
fi
if is_pid_alive "${CELERY_PID}"; then
stop_process_gracefully "Celery" "${CELERY_PID}" "${SHUTDOWN_TIMEOUT}"
fi
if [[ "${INFRA_STARTED}" == "1" ]]; then
print_warn "正在停止 PostgreSQL / Redis 容器..."
(
cd "${ROOT_DIR}" && docker compose -f docker-compose.dev.yml stop
) >/dev/null 2>&1 || true
print_ok "PostgreSQL/Redis 容器已停止"
fi
}
require_cmd() {
local cmd="$1"
if ! command -v "${cmd}" >/dev/null 2>&1; then
print_err "未找到命令: ${cmd}"
exit 1
fi
}
start_infra() {
print_header "启动 PostgreSQL 和 Redis"
cd "${ROOT_DIR}"
docker compose -f docker-compose.dev.yml up -d
INFRA_STARTED=1
print_ok "基础设施已就绪"
}
wait_postgres_ready() {
local retries=30
local i=0
print_header "等待 PostgreSQL 就绪"
cd "${ROOT_DIR}"
while (( i < retries )); do
if docker compose -f docker-compose.dev.yml exec -T postgres \
pg_isready -U postgres >/dev/null 2>&1; then
print_ok "PostgreSQL 已就绪"
return 0
fi
sleep 1
i=$((i + 1))
done
print_warn "PostgreSQL 在 ${retries}s 内未就绪,迁移可能失败"
return 1
}
get_effective_database_url() {
if [[ -n "${DATABASE_URL:-}" ]]; then
printf '%s\n' "${DATABASE_URL}"
return 0
fi
if [[ -f "${ROOT_DIR}/.env" ]]; then
local line
line="$(sed -n 's/^DATABASE_URL=//p' "${ROOT_DIR}/.env" | sed -n '1p')"
line="${line%\"}"
line="${line#\"}"
line="${line%\'}"
line="${line#\'}"
if [[ -n "${line}" ]]; then
printf '%s\n' "${line}"
return 0
fi
fi
return 1
}
warn_database_url_host_pitfall() {
local database_url
local host
if ! database_url="$(get_effective_database_url)"; then
return 0
fi
if [[ "${database_url}" =~ @([^:/?#]+) ]]; then
host="${BASH_REMATCH[1]}"
case "${host}" in
postgres|db|postgres-dev|postgresql)
print_warn "检测到 DATABASE_URL 主机为 ${host};在宿主机执行 Alembic/uvicorn 时通常应使用 localhost"
;;
esac
fi
}
print_alembic_failure_hint() {
local log_file="$1"
local log_output
log_output="$(sed -n '1,200p' "${log_file}")"
if [[ "${log_output}" == *'could not translate host name "postgres"'* ]] || [[ "${log_output}" == *"Name or service not known"* ]]; then
print_warn "看起来 DATABASE_URL 指向了容器内主机名;在宿主机运行时请改用 localhost:5432"
elif [[ "${log_output}" == *"Connection refused"* ]] || [[ "${log_output}" == *"could not connect to server"* ]]; then
print_warn "PostgreSQL 连接被拒绝;请确认容器已启动且 DATABASE_URL 与 docker-compose.dev.yml 暴露端口一致"
elif [[ "${log_output}" == *"password authentication failed"* ]]; then
print_warn "PostgreSQL 用户名或密码不匹配;请核对 .env.development 中的 DATABASE_URL"
elif [[ "${log_output}" == *"No such file or directory"* ]] || [[ "${log_output}" == *"can't open file"* ]]; then
print_warn "Alembic 依赖的文件或工作目录可能不正确;请确认在 api/ 目录运行脚本"
fi
}
is_port_listening() {
local port="$1"
if command -v lsof >/dev/null 2>&1; then
lsof -nP -iTCP:"${port}" -sTCP:LISTEN >/dev/null 2>&1
return $?
fi
if [[ -x "${VENV_DIR}/bin/python" ]]; then
"${VENV_DIR}/bin/python" - "${port}" <<'PY' >/dev/null 2>&1
import socket
import sys
sock = socket.socket()
sock.settimeout(0.2)
try:
sock.connect(("127.0.0.1", int(sys.argv[1])))
except OSError:
raise SystemExit(1)
finally:
sock.close()
raise SystemExit(0)
PY
return $?
fi
return 1
}
wait_for_tcp_listener() {
local pid="$1"
local port="$2"
local timeout="${3:-8}"
local waited=0
while (( waited < timeout )); do
if is_port_listening "${port}"; then
return 0
fi
if ! is_pid_alive "${pid}"; then
return 1
fi
sleep 1
waited=$((waited + 1))
done
return 2
}
ensure_background_process_alive() {
local name="$1"
local pid="$2"
sleep 1
if ! is_pid_alive "${pid}"; then
print_err "${name} 启动后立即退出,请查看上方日志"
exit 1
fi
}
ensure_venv() {
print_header "检查 Python 虚拟环境"
if [[ ! -d "${VENV_DIR}" ]]; then
print_warn ".venv 不存在,正在创建"
uv venv "${VENV_DIR}"
fi
if [[ "${SKIP_INSTALL}" != "1" ]]; then
print_header "安装 Python 依赖"
uv sync
print_ok "依赖安装完成"
else
print_warn "已跳过依赖安装 (SKIP_INSTALL=1)"
fi
}
ensure_dotenv_from_development() {
print_header "准备本地 .env"
if [[ -f "${ROOT_DIR}/.env.development" ]]; then
cp "${ROOT_DIR}/.env.development" "${ROOT_DIR}/.env"
print_ok "已从 .env.development 同步为 .env"
return 0
fi
print_warn "未找到 .env.development将使用现有 .env若存在"
}
check_internal_eval_key() {
print_header "检查内部评测密钥"
if [[ -f "${ROOT_DIR}/.env" ]] && grep -qE '^INTERNAL_EVAL_API_KEY=.+' "${ROOT_DIR}/.env" 2>/dev/null; then
print_ok "已在 .env 中配置 INTERNAL_EVAL_API_KEY"
return 0
fi
if [[ -n "${INTERNAL_EVAL_API_KEY:-}" ]]; then
print_ok "已从环境变量传入 INTERNAL_EVAL_API_KEY"
return 0
fi
print_err "未配置 INTERNAL_EVAL_API_KEY内部评测接口将返回 503。"
print_err "请在 api/.env.development或 .env中加入一行例如"
print_err " INTERNAL_EVAL_API_KEY=\"your-long-random-secret\""
exit 1
}
check_env_file() {
print_header "检查环境变量文件"
if [[ ! -f "${ROOT_DIR}/.env" ]]; then
print_warn "未找到 .env应用可能因缺少配置启动失败"
else
print_ok "检测到 .env"
warn_database_url_host_pitfall
fi
}
run_migrations() {
print_header "执行数据库迁移"
cd "${ROOT_DIR}"
local log_file
log_file="$(mktemp -t life-echo-alembic.XXXXXX.log)"
if uv run alembic upgrade head >"${log_file}" 2>&1; then
print_ok "Alembic 迁移已就绪"
rm -f "${log_file}"
else
print_warn "Alembic 迁移失败(可能数据库未启动或 DATABASE_URL 未配置),应用启动可能失败"
print_alembic_failure_hint "${log_file}"
print_warn "Alembic 输出(最近 40 行):"
tail -n 40 "${log_file}"
rm -f "${log_file}"
fi
}
start_eval_web() {
print_header "启动 app-eval-web (Vite)"
if [[ ! -d "${EVAL_WEB_DIR}" ]]; then
print_err "未找到 ${EVAL_WEB_DIR}"
exit 1
fi
if [[ ! -d "${EVAL_WEB_DIR}/node_modules" ]]; then
print_err "请先执行: cd app-eval-web && npm install"
exit 1
fi
require_cmd "npm"
local api_key="${INTERNAL_EVAL_API_KEY:-}"
if [[ -z "${api_key}" ]] && [[ -f "${ROOT_DIR}/.env" ]]; then
api_key="$(grep -E '^INTERNAL_EVAL_API_KEY=' "${ROOT_DIR}/.env" | head -1 | cut -d= -f2- | tr -d '\r' | sed 's/^"//;s/"$//')"
fi
if [[ -z "${api_key}" ]]; then
print_err "无法解析 INTERNAL_EVAL_API_KEY无法为 Vite 注入 VITE_EVAL_API_KEY"
exit 1
fi
local vite_extra=()
if [[ "${OPEN_EVAL_WEB}" == "1" ]]; then
vite_extra+=(--open)
fi
# 不设 VITE_EVAL_API_BASE前端走 Vite proxyapp-eval-web/vite.config.ts转发到 :${INTERNAL_EVAL_PORT},减少直连/CORS/误指主站问题。
# 若需直连远端 APIexport VITE_EVAL_API_BASE=https://... 后再手动 npm run dev。
(
cd "${EVAL_WEB_DIR}"
VITE_EVAL_API_KEY="${api_key}" \
npm run dev -- --host 127.0.0.1 --port "${EVAL_WEB_PORT}" "${vite_extra[@]}"
) &
EVAL_WEB_PID=$!
print_ok "eval-web 已启动 (PID: ${EVAL_WEB_PID}) → http://127.0.0.1:${EVAL_WEB_PORT}/"
}
start_services() {
print_header "启动 Internal Eval API 与 Celery"
cd "${ROOT_DIR}"
if is_port_listening "${INTERNAL_EVAL_PORT}"; then
print_err "端口 ${INTERNAL_EVAL_PORT} 已被占用,无法启动内部评测 Uvicorn。"
print_err "请先结束占用进程,或设置 INTERNAL_EVAL_PORT 为其他端口"
exit 1
fi
# 与主开发脚本一致:评审/生产 LLM 等从 .env 读取;文档默认关闭,本地可 export INTERNAL_EVAL_ENABLE_DOCS=1
"${UVICORN_BIN}" app.internal_main:internal_app --reload \
--reload-exclude 'alembic/**' \
--reload-exclude 'alembic.ini' \
--host "${INTERNAL_EVAL_HOST}" --port "${INTERNAL_EVAL_PORT}" &
API_PID=$!
local api_start_status=0
if wait_for_tcp_listener "${API_PID}" "${INTERNAL_EVAL_PORT}" 8; then
api_start_status=0
else
api_start_status=$?
fi
case "${api_start_status}" in
0)
print_ok "Internal Eval API 已启动 (PID: ${API_PID})"
;;
1)
print_err "Internal Eval API 启动失败,进程已退出;请查看上方 Uvicorn 日志"
exit 1
;;
*)
print_err "Internal Eval API 进程仍存活,但端口 ${INTERNAL_EVAL_PORT} 未在预期时间内开始监听"
exit 1
;;
esac
if [[ "${SKIP_CELERY}" != "1" ]]; then
"${CELERY_BIN}" -A app.tasks.celery_app worker --loglevel=info --pool="${CELERY_POOL}" &
CELERY_PID=$!
ensure_background_process_alive "Celery" "${CELERY_PID}"
print_ok "Celery 已启动 (PID: ${CELERY_PID})"
else
print_warn "已跳过 Celery (SKIP_CELERY=1);实验 run 接口需要 worker 才能执行"
fi
if [[ "${START_EVAL_WEB}" == "1" ]]; then
start_eval_web
fi
echo
echo -e "${GREEN}内部评测环境启动完成${NC}"
echo "【请用浏览器打开】评测 Web UI: http://127.0.0.1:${EVAL_WEB_PORT}/ /internal 会代理到 API :${INTERNAL_EVAL_PORT}"
echo "内部评测 API: http://127.0.0.1:${INTERNAL_EVAL_PORT}/health"
echo "评测 REST 前缀: http://127.0.0.1:${INTERNAL_EVAL_PORT}/internal/api/evaluation"
if [[ "${INTERNAL_EVAL_ENABLE_DOCS:-}" == "1" ]] || grep -qE '^INTERNAL_EVAL_ENABLE_DOCS=true' "${ROOT_DIR}/.env" 2>/dev/null; then
echo "API 文档: http://127.0.0.1:${INTERNAL_EVAL_PORT}/docs"
fi
echo "说明文档: api/docs/internal-eval.md"
echo "按 Ctrl+C 停止所有进程"
}
main() {
print_header "Life Echo 内部回归评测 — 一键启动"
echo -e "${BLUE}说明:${NC} 不启动主站 APImain:app / 默认 8000仅启动 internal_main:${INTERNAL_EVAL_PORT})。"
echo ""
require_cmd "uv"
trap cleanup EXIT INT TERM
if [[ "${SKIP_INFRA}" != "1" ]]; then
require_cmd "docker"
start_infra
wait_postgres_ready || true
else
print_warn "已跳过 docker 基础设施 (SKIP_INFRA=1)"
fi
ensure_venv
ensure_dotenv_from_development
check_env_file
check_internal_eval_key
run_migrations
start_services
local wait_pids=("${API_PID}")
if [[ "${SKIP_CELERY}" != "1" ]]; then
wait_pids+=("${CELERY_PID}")
fi
if [[ "${START_EVAL_WEB}" == "1" ]] && [[ -n "${EVAL_WEB_PID}" ]]; then
wait_pids+=("${EVAL_WEB_PID}")
fi
wait "${wait_pids[@]}"
}
main "$@"