From 1b5b6ee36d348f3aa5bc2c4ec441e5c955dbb64a Mon Sep 17 00:00:00 2001 From: Kevin Date: Sun, 22 Mar 2026 19:53:23 +0800 Subject: [PATCH] =?UTF-8?q?=E7=A7=BB=E9=99=A4docker=20workflow=E9=87=8C?= =?UTF-8?q?=E7=9A=84=E6=89=8B=E5=8A=A8sql=20=E8=BF=81=E7=A7=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/docker-build-deploy.yml | 274 +++++----------------- 1 file changed, 65 insertions(+), 209 deletions(-) diff --git a/.github/workflows/docker-build-deploy.yml b/.github/workflows/docker-build-deploy.yml index ea0b5dc..e7703ed 100644 --- a/.github/workflows/docker-build-deploy.yml +++ b/.github/workflows/docker-build-deploy.yml @@ -1,7 +1,7 @@ # API Docker:main → Dev 机(Repository secrets: DEV_*),Tag v*.*.* → Prod 机(PROD_*) # 在 Repo → Settings → Secrets and variables → Actions 中配置,无需 GitHub Environments。 -# 命名:DEV_SSH_HOST / DEV_SSH_USER / DEV_SSH_PRIVATE_KEY / DEV_SSH_PORT / DEV_DEPLOY_PATH / DEV_MIGRATION_DB_* -# PROD_SSH_HOST / PROD_SSH_USER / PROD_SSH_PRIVATE_KEY / PROD_SSH_PORT / PROD_DEPLOY_PATH / PROD_MIGRATION_DB_* +# 命名:DEV_SSH_HOST / DEV_SSH_USER / DEV_SSH_PRIVATE_KEY / DEV_SSH_PORT / DEV_DEPLOY_PATH +# PROD_SSH_HOST / PROD_SSH_USER / PROD_SSH_PRIVATE_KEY / PROD_SSH_PORT / PROD_DEPLOY_PATH # 阿里云镜像仍为仓库级:ALIYUN_CR_USERNAME / ALIYUN_CR_PASSWORD # # 从旧版迁移:若仓库里仍是 SSH_HOST、SSH_PRIVATE_KEY、DEPLOY_PATH 等无前缀名称, @@ -124,10 +124,38 @@ jobs: echo "target=dev" >> "$GITHUB_OUTPUT" fi - - name: Set up SSH + - name: Ensure production SSH secret is set + if: steps.deploy_target.outputs.target == 'prod' + env: + PROD_SSH_PRIVATE_KEY: ${{ secrets.PROD_SSH_PRIVATE_KEY }} + run: | + if [ -z "$PROD_SSH_PRIVATE_KEY" ]; then + echo "::error::PROD_SSH_PRIVATE_KEY 未配置或为空,无法部署生产。请在 Repository secrets 中设置 PROD_SSH_*。" + exit 1 + fi + + - name: Ensure development SSH secret is set + if: steps.deploy_target.outputs.target != 'prod' + env: + DEV_SSH_PRIVATE_KEY: ${{ secrets.DEV_SSH_PRIVATE_KEY }} + run: | + if [ -z "$DEV_SSH_PRIVATE_KEY" ]; then + echo "::error::DEV_SSH_PRIVATE_KEY 未配置或为空,无法部署开发机。请在 Repository secrets 中设置 DEV_SSH_*。" + exit 1 + fi + + # 勿用 `prod && PROD_KEY || DEV_KEY`:PROD 为空时会错误回退到 DEV 密钥,导致连生产机报 Permission denied。 + - name: Set up SSH (production) + if: steps.deploy_target.outputs.target == 'prod' uses: webfactory/ssh-agent@v0.9.0 with: - ssh-private-key: ${{ steps.deploy_target.outputs.target == 'prod' && secrets.PROD_SSH_PRIVATE_KEY || secrets.DEV_SSH_PRIVATE_KEY }} + ssh-private-key: ${{ secrets.PROD_SSH_PRIVATE_KEY }} + + - name: Set up SSH (development) + if: steps.deploy_target.outputs.target != 'prod' + uses: webfactory/ssh-agent@v0.9.0 + with: + ssh-private-key: ${{ secrets.DEV_SSH_PRIVATE_KEY }} - name: Export deploy connection env run: | @@ -137,9 +165,6 @@ jobs: echo "SSH_USER=${{ secrets.PROD_SSH_USER }}" echo "SSH_PORT=${{ secrets.PROD_SSH_PORT || '22' }}" echo "COMPOSE_DIR=${{ secrets.PROD_DEPLOY_PATH || '/opt/life-echo' }}" - echo "DB_USER=${{ secrets.PROD_MIGRATION_DB_USER || '' }}" - echo "DB_PASSWORD=${{ secrets.PROD_MIGRATION_DB_PASSWORD || '' }}" - echo "DB_NAME=${{ secrets.PROD_MIGRATION_DB_NAME || '' }}" } >> "$GITHUB_ENV" else { @@ -147,9 +172,6 @@ jobs: echo "SSH_USER=${{ secrets.DEV_SSH_USER }}" echo "SSH_PORT=${{ secrets.DEV_SSH_PORT || '22' }}" echo "COMPOSE_DIR=${{ secrets.DEV_DEPLOY_PATH || '/opt/life-echo' }}" - echo "DB_USER=${{ secrets.DEV_MIGRATION_DB_USER || '' }}" - echo "DB_PASSWORD=${{ secrets.DEV_MIGRATION_DB_PASSWORD || '' }}" - echo "DB_NAME=${{ secrets.DEV_MIGRATION_DB_NAME || '' }}" } >> "$GITHUB_ENV" fi @@ -199,15 +221,9 @@ jobs: docker network inspect api_life-echo-network >/dev/null 2>&1 || docker network create api_life-echo-network " - echo "上传候选 compose、环境变量与迁移文件..." + echo "上传候选 compose 与环境变量..." scp -P "$SSH_PORT" ./api/docker-compose.yml "$SSH_USER@$SSH_HOST:$COMPOSE_DIR/api/docker-compose.candidate.yml" scp -P "$SSH_PORT" ./api/.env.production "$SSH_USER@$SSH_HOST:$COMPOSE_DIR/api/.env.production.candidate" - ssh -p "$SSH_PORT" "$SSH_USER@$SSH_HOST" " - set -euo pipefail - rm -rf '$COMPOSE_DIR/api/migrations.candidate' - mkdir -p '$COMPOSE_DIR/api/migrations.candidate' - " - scp -P "$SSH_PORT" ./api/migrations/*.sql "$SSH_USER@$SSH_HOST:$COMPOSE_DIR/api/migrations.candidate/" ssh -p "$SSH_PORT" "$SSH_USER@$SSH_HOST" " set -euo pipefail @@ -219,198 +235,12 @@ jobs: rm -f docker-compose.candidate.yml.tmp 2>/dev/null || true " - - name: Backup and run database migrations safely - env: - IMAGE_TAG: ${{ env.REGISTRY }}/${{ env.REGISTRY_NAMESPACE }}/${{ env.IMAGE_NAME }}:${{ steps.image_tag.outputs.tag }} - COMPOSE_FILE: docker-compose.yml - run: | - set -euo pipefail - ssh -p "$SSH_PORT" "$SSH_USER@$SSH_HOST" \ - COMPOSE_DIR="$COMPOSE_DIR" \ - COMPOSE_FILE="$COMPOSE_FILE" \ - IMAGE_TAG="$IMAGE_TAG" \ - DB_USER="$DB_USER" \ - DB_PASSWORD="$DB_PASSWORD" \ - DB_NAME="$DB_NAME" \ - "bash -s" <<'REMOTE' - set -euo pipefail - - CURRENT_COMPOSE="$COMPOSE_DIR/api/$COMPOSE_FILE" - CANDIDATE_ENV="$COMPOSE_DIR/api/.env.production.candidate" - CANDIDATE_MIGRATIONS="$COMPOSE_DIR/api/migrations.candidate" - BACKUP_DIR="$COMPOSE_DIR/api/backups" - DB_CONTAINER="life-echo-postgres" - API_CONTAINER="life-echo-api-prod" - WORKER_CONTAINER="life-echo-celery-worker" - NETWORK_NAME="api_life-echo-network" - BACKUP_FILE="$BACKUP_DIR/life_echo_$(date +%Y%m%d_%H%M%S).dump" - ROLLBACK_REQUIRED=0 - CURRENT_API_RUNNING=0 - CURRENT_WORKER_RUNNING=0 - EFFECTIVE_DB_USER="" - EFFECTIVE_DB_PASSWORD="" - EFFECTIVE_DB_NAME="" - EFFECTIVE_MIGRATION_DATABASE_URL="" - - resolve_db_config() { - local database_url="" - - database_url="$(sed -n 's/^DATABASE_URL=//p' "$CANDIDATE_ENV" | head -n 1)" - if [ -z "$database_url" ]; then - echo "candidate env 中未找到 DATABASE_URL" - exit 1 - fi - - case "$database_url" in - \"*\") database_url="${database_url:1:${#database_url}-2}" ;; - \'*\') database_url="${database_url:1:${#database_url}-2}" ;; - esac - - mapfile -t parsed_db_parts < <( - python3 -c 'import sys; from urllib.parse import unquote, urlsplit; parts = urlsplit(sys.argv[1]); print(unquote(parts.username or "")); print(unquote(parts.password or "")); print((parts.path or "/").lstrip("/"))' "$database_url" - ) - - EFFECTIVE_DB_USER="${DB_USER:-${parsed_db_parts[0]}}" - EFFECTIVE_DB_PASSWORD="${DB_PASSWORD:-${parsed_db_parts[1]}}" - EFFECTIVE_DB_NAME="${DB_NAME:-${parsed_db_parts[2]}}" - - if [ -z "$EFFECTIVE_DB_USER" ] || [ -z "$EFFECTIVE_DB_NAME" ]; then - echo "无法解析有效的数据库用户名或数据库名" - exit 1 - fi - - EFFECTIVE_MIGRATION_DATABASE_URL="$( - EFFECTIVE_DB_USER="$EFFECTIVE_DB_USER" \ - EFFECTIVE_DB_PASSWORD="$EFFECTIVE_DB_PASSWORD" \ - EFFECTIVE_DB_NAME="$EFFECTIVE_DB_NAME" \ - DB_CONTAINER="$DB_CONTAINER" \ - python3 -c 'import os; from urllib.parse import quote; user = quote(os.environ["EFFECTIVE_DB_USER"], safe=""); password = os.environ.get("EFFECTIVE_DB_PASSWORD", ""); database = quote(os.environ["EFFECTIVE_DB_NAME"], safe=""); host = os.environ["DB_CONTAINER"]; auth = (user + ":" + quote(password, safe="") + "@") if password else (user + "@"); print("postgresql://%s%s:5432/%s" % (auth, host, database))' - )" - } - - wait_for_db() { - until docker exec "$DB_CONTAINER" pg_isready -U "$EFFECTIVE_DB_USER" -d "$EFFECTIVE_DB_NAME" >/dev/null 2>&1; do - echo "等待数据库就绪..." - sleep 2 - done - } - - start_current_data_services() { - if [ ! -f "$CURRENT_COMPOSE" ]; then - echo "未找到当前线上 compose 文件:$CURRENT_COMPOSE" - exit 1 - fi - cd "$COMPOSE_DIR/api" - docker-compose -f "$CURRENT_COMPOSE" up -d postgres redis - } - - restore_backup() { - if [ ! -f "$BACKUP_FILE" ]; then - echo "未找到数据库备份文件,无法自动恢复" - return 1 - fi - - if ! docker ps --format '{{.Names}}' | grep -qx "$DB_CONTAINER"; then - start_current_data_services - fi - - wait_for_db - docker exec "$DB_CONTAINER" psql -U "$EFFECTIVE_DB_USER" -d postgres -c \ - "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = '$EFFECTIVE_DB_NAME' AND pid <> pg_backend_pid();" \ - >/dev/null 2>&1 || true - docker exec "$DB_CONTAINER" dropdb -U "$EFFECTIVE_DB_USER" --if-exists "$EFFECTIVE_DB_NAME" || true - docker exec "$DB_CONTAINER" createdb -U "$EFFECTIVE_DB_USER" "$EFFECTIVE_DB_NAME" - docker exec -i "$DB_CONTAINER" \ - pg_restore -U "$EFFECTIVE_DB_USER" -d "$EFFECTIVE_DB_NAME" --clean --if-exists --no-owner --no-privileges < "$BACKUP_FILE" - } - - rollback() { - exit_code=$? - if [ "$ROLLBACK_REQUIRED" -eq 1 ]; then - echo "迁移失败,开始恢复数据库并重新拉起旧线上服务..." - restore_backup - if [ -f "$CURRENT_COMPOSE" ]; then - cd "$COMPOSE_DIR/api" - docker-compose -f "$CURRENT_COMPOSE" up -d postgres redis - docker-compose -f "$CURRENT_COMPOSE" up -d api celery-worker || true - fi - fi - exit "$exit_code" - } - - trap rollback ERR - - mkdir -p "$BACKUP_DIR" - resolve_db_config - - if docker ps --format '{{.Names}}' | grep -qx "$API_CONTAINER"; then - CURRENT_API_RUNNING=1 - fi - if docker ps --format '{{.Names}}' | grep -qx "$WORKER_CONTAINER"; then - CURRENT_WORKER_RUNNING=1 - fi - - if ! docker ps --format '{{.Names}}' | grep -qx "$DB_CONTAINER"; then - echo "当前数据库容器未运行,先拉起线上 postgres/redis..." - start_current_data_services - fi - - wait_for_db - - echo "备份生产数据库到 $BACKUP_FILE" - docker exec "$DB_CONTAINER" pg_dump -U "$EFFECTIVE_DB_USER" -d "$EFFECTIVE_DB_NAME" -F c > "$BACKUP_FILE" - - echo "停止线上 API 写入,准备执行迁移..." - ROLLBACK_REQUIRED=1 - cd "$COMPOSE_DIR/api" - if [ "$CURRENT_API_RUNNING" -eq 1 ] || [ "$CURRENT_WORKER_RUNNING" -eq 1 ]; then - docker-compose -f "$CURRENT_COMPOSE" stop api celery-worker || true - fi - docker rm -f "$API_CONTAINER" "$WORKER_CONTAINER" 2>/dev/null || true - - wait_for_db - - echo "执行幂等 SQL 迁移..." - docker exec -i "$DB_CONTAINER" psql -v ON_ERROR_STOP=1 -U "$EFFECTIVE_DB_USER" -d "$EFFECTIVE_DB_NAME" < "$CANDIDATE_MIGRATIONS/sync_schema_to_models.sql" - docker exec -i "$DB_CONTAINER" psql -v ON_ERROR_STOP=1 -U "$EFFECTIVE_DB_USER" -d "$EFFECTIVE_DB_NAME" < "$CANDIDATE_MIGRATIONS/fix_chapter_order_index.sql" - docker exec -i "$DB_CONTAINER" psql -v ON_ERROR_STOP=1 -U "$EFFECTIVE_DB_USER" -d "$EFFECTIVE_DB_NAME" < "$CANDIDATE_MIGRATIONS/add_chapter_is_active.sql" - docker exec -i "$DB_CONTAINER" psql -v ON_ERROR_STOP=1 -U "$EFFECTIVE_DB_USER" -d "$EFFECTIVE_DB_NAME" < "$CANDIDATE_MIGRATIONS/add_user_profile_fields.sql" - docker exec -i "$DB_CONTAINER" psql -v ON_ERROR_STOP=1 -U "$EFFECTIVE_DB_USER" -d "$EFFECTIVE_DB_NAME" < "$CANDIDATE_MIGRATIONS/fix_chapter_order_index_v2.sql" - - echo "执行 chapter_sections 数据迁移..." - docker run --rm \ - --network "$NETWORK_NAME" \ - --env-file "$CANDIDATE_ENV" \ - -e MIGRATION_DATABASE_URL="$EFFECTIVE_MIGRATION_DATABASE_URL" \ - --entrypoint python \ - "$IMAGE_TAG" -m scripts.run_chapter_sections_migration - - echo "执行 memoir_images 数据迁移..." - docker run --rm \ - --network "$NETWORK_NAME" \ - --env-file "$CANDIDATE_ENV" \ - -e MIGRATION_DATABASE_URL="$EFFECTIVE_MIGRATION_DATABASE_URL" \ - --entrypoint python \ - "$IMAGE_TAG" -m scripts.run_memoir_images_migration - - echo "执行 chapter_sections.image_id 外键迁移..." - docker exec -i "$DB_CONTAINER" psql -v ON_ERROR_STOP=1 -U "$EFFECTIVE_DB_USER" -d "$EFFECTIVE_DB_NAME" < "$CANDIDATE_MIGRATIONS/add_section_image_id_fk.sql" - - echo "验证关键表结构..." - docker exec "$DB_CONTAINER" psql -v ON_ERROR_STOP=1 -U "$EFFECTIVE_DB_USER" -d "$EFFECTIVE_DB_NAME" -c \ - "SELECT to_regclass('public.chapter_sections') AS chapter_sections, to_regclass('public.memoir_images') AS memoir_images;" - - trap - ERR - ROLLBACK_REQUIRED=0 - echo "数据库迁移全部完成" - REMOTE - - name: Promote candidate release env: COMPOSE_FILE: docker-compose.yml run: | set -euo pipefail - echo "迁移成功,切换线上版本..." + echo "切换线上版本,容器启动时将自动执行 Alembic..." ssh -p "$SSH_PORT" "$SSH_USER@$SSH_HOST" " set -euo pipefail cd '$COMPOSE_DIR/api' @@ -422,11 +252,9 @@ jobs: fi mv 'docker-compose.candidate.yml' '$COMPOSE_FILE' mv '.env.production.candidate' '.env.production' - rm -rf 'migrations' - mv 'migrations.candidate' 'migrations' docker-compose -f '$COMPOSE_FILE' up -d --remove-orphans echo '等待服务启动...' - sleep 15 + sleep 20 docker image prune -f || true docker-compose -f '$COMPOSE_FILE' ps " @@ -434,5 +262,33 @@ jobs: - name: Verify deployment run: | echo "验证部署状态..." - ssh -p "$SSH_PORT" "$SSH_USER@$SSH_HOST" \ - "cd $COMPOSE_DIR/api && docker-compose ps && docker-compose logs --tail=50 api" \ No newline at end of file + ssh -p "$SSH_PORT" "$SSH_USER@$SSH_HOST" " + set -euo pipefail + cd '$COMPOSE_DIR/api' + docker-compose ps + + API_CID=\$(docker-compose ps -q api) + if [ -z \"\$API_CID\" ]; then + echo '未找到 api 容器' + docker-compose logs --tail=80 api || true + exit 1 + fi + + API_HEALTH='' + for _ in \$(seq 1 24); do + API_HEALTH=\$(docker inspect -f '{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}' \"\$API_CID\") + echo \"api health: \$API_HEALTH\" + if [ \"\$API_HEALTH\" = 'healthy' ]; then + break + fi + sleep 5 + done + + if [ \"\$API_HEALTH\" != 'healthy' ]; then + echo 'api 容器未在预期时间内变为 healthy' + docker-compose logs --tail=80 api || true + exit 1 + fi + + docker-compose logs --tail=50 api + "