diff --git a/.github/workflows/docker-build-deploy.yml b/.github/workflows/docker-build-deploy.yml index bbb044f..58cf92d 100644 --- a/.github/workflows/docker-build-deploy.yml +++ b/.github/workflows/docker-build-deploy.yml @@ -96,136 +96,287 @@ jobs: - name: Add server to known hosts run: | mkdir -p ~/.ssh - ssh-keyscan -H -p ${{ secrets.SSH_PORT || 22 }} ${{ secrets.SSH_HOST }} >> ~/.ssh/known_hosts + ssh-keyscan -H -p "${{ secrets.SSH_PORT || 22 }}" "${{ secrets.SSH_HOST }}" >> ~/.ssh/known_hosts - name: Determine image tag id: image_tag run: | DEPLOY_BRANCH="${{ github.event.inputs.branch || github.ref_name }}" - echo "deploy_branch=$DEPLOY_BRANCH" >> $GITHUB_OUTPUT + echo "deploy_branch=$DEPLOY_BRANCH" >> "$GITHUB_OUTPUT" if [ "$DEPLOY_BRANCH" == "main" ] || [ "$DEPLOY_BRANCH" == "master" ]; then - echo "tag=latest" >> $GITHUB_OUTPUT + echo "tag=latest" >> "$GITHUB_OUTPUT" else BRANCH_TAG=$(echo "$DEPLOY_BRANCH" | sed 's/\//-/g') - echo "tag=$BRANCH_TAG" >> $GITHUB_OUTPUT + echo "tag=$BRANCH_TAG" >> "$GITHUB_OUTPUT" fi - - name: Deploy to remote server + - name: Prepare remote candidate release env: SSH_USER: ${{ secrets.SSH_USER }} SSH_HOST: ${{ secrets.SSH_HOST }} SSH_PORT: ${{ secrets.SSH_PORT || 22 }} IMAGE_TAG: ${{ env.REGISTRY }}/${{ env.REGISTRY_NAMESPACE }}/${{ env.IMAGE_NAME }}:${{ steps.image_tag.outputs.tag }} - COMPOSE_FILE: docker-compose.yml COMPOSE_DIR: ${{ secrets.DEPLOY_PATH || '/opt/life-echo' }} REGISTRY: ${{ env.REGISTRY }} ALIYUN_CR_USERNAME: ${{ secrets.ALIYUN_CR_USERNAME }} ALIYUN_CR_PASSWORD: ${{ secrets.ALIYUN_CR_PASSWORD }} run: | - echo "开始部署到远程服务器..." + set -euo pipefail + echo "准备候选版本..." echo "镜像标签: $IMAGE_TAG" echo "部署目录: $COMPOSE_DIR/api" - - # 登录到阿里云容器仓库 - echo "$ALIYUN_CR_PASSWORD" | ssh -p $SSH_PORT $SSH_USER@$SSH_HOST \ + + echo "$ALIYUN_CR_PASSWORD" | ssh -p "$SSH_PORT" "$SSH_USER@$SSH_HOST" \ "docker login $REGISTRY --username=$ALIYUN_CR_USERNAME --password-stdin" - - # 创建部署目录(如果不存在) - ssh -p $SSH_PORT $SSH_USER@$SSH_HOST \ - "mkdir -p $COMPOSE_DIR/api" - - # 第一步:强制停止并删除所有旧容器 - echo "停止并删除旧容器..." - ssh -p $SSH_PORT $SSH_USER@$SSH_HOST " - # 先尝试使用 docker-compose down - cd $COMPOSE_DIR/api 2>/dev/null && docker-compose -f '$COMPOSE_FILE' down --remove-orphans 2>/dev/null || true - - # 强制停止并删除所有 life-echo 相关容器(按名称匹配) - echo '强制清理所有 life-echo 容器...' - docker ps -a --filter 'name=life-echo' --format '{{.ID}}' | xargs -r docker rm -f 2>/dev/null || true - - # 再次确保指定容器被删除 - echo '确保指定容器被删除...' - docker rm -f life-echo-api-prod life-echo-celery-worker life-echo-postgres life-echo-redis life-echo-celery-beat life-echo-flower 2>/dev/null || true - - # 等待容器完全停止 - sleep 3 - - # 验证容器已删除 - echo '验证容器状态...' - docker ps -a --filter 'name=life-echo' || true - " - - # 第二步:先删除远程旧配置,再复制仓库中的 docker-compose.yml(强制覆盖) - echo "删除远程旧 docker-compose 配置以确保使用仓库版本..." - ssh -p $SSH_PORT $SSH_USER@$SSH_HOST \ - "rm -f $COMPOSE_DIR/api/$COMPOSE_FILE $COMPOSE_DIR/api/${COMPOSE_FILE}.bak 2>/dev/null || true" - echo "复制配置文件(覆盖远程 docker-compose.yml)..." - scp -P $SSH_PORT ./api/$COMPOSE_FILE $SSH_USER@$SSH_HOST:$COMPOSE_DIR/api/ - - # 复制 .env.production 到远程服务器(重命名为 .env.prod) - echo "复制 .env.production 文件..." - scp -P $SSH_PORT ./api/.env.production $SSH_USER@$SSH_HOST:$COMPOSE_DIR/api/.env.prod - scp -P $SSH_PORT ./api/.env.production $SSH_USER@$SSH_HOST:$COMPOSE_DIR/api/.env.production - - # 第三步:准备镜像和配置 - ssh -p $SSH_PORT $SSH_USER@$SSH_HOST " - set -e - cd $COMPOSE_DIR/api - - echo '拉取最新镜像: $IMAGE_TAG' - docker pull '$IMAGE_TAG' || true - - echo '备份并更新 docker-compose.yml 中的镜像标签...' - cp '$COMPOSE_FILE' '${COMPOSE_FILE}.bak' - - sed -i.tmp 's|image:.*lifecho-api.*|image: $IMAGE_TAG|g' '$COMPOSE_FILE' - sed -i.tmp 's|image:.*life-echo-api.*|image: $IMAGE_TAG|g' '$COMPOSE_FILE' - rm -f '${COMPOSE_FILE}.tmp' 2>/dev/null || true - - echo '先只启动数据库(确保迁移不受 API 连接干扰)...' - docker-compose -f '$COMPOSE_FILE' up -d postgres redis - - echo '等待数据库就绪...' - sleep 10 + + ssh -p "$SSH_PORT" "$SSH_USER@$SSH_HOST" " + set -euo pipefail + mkdir -p '$COMPOSE_DIR/api' + mkdir -p '$COMPOSE_DIR/api/backups' + docker network inspect api_life-echo-network >/dev/null 2>&1 || docker network create api_life-echo-network " - - name: Run database migration + echo "上传候选 compose、环境变量与迁移文件..." + scp -P "$SSH_PORT" ./api/docker-compose.yml "$SSH_USER@$SSH_HOST:$COMPOSE_DIR/api/docker-compose.candidate.yml" + scp -P "$SSH_PORT" ./api/.env.production "$SSH_USER@$SSH_HOST:$COMPOSE_DIR/api/.env.production.candidate" + ssh -p "$SSH_PORT" "$SSH_USER@$SSH_HOST" " + set -euo pipefail + rm -rf '$COMPOSE_DIR/api/migrations.candidate' + mkdir -p '$COMPOSE_DIR/api/migrations.candidate' + " + scp -P "$SSH_PORT" ./api/migrations/*.sql "$SSH_USER@$SSH_HOST:$COMPOSE_DIR/api/migrations.candidate/" + + ssh -p "$SSH_PORT" "$SSH_USER@$SSH_HOST" " + set -euo pipefail + cd '$COMPOSE_DIR/api' + echo '拉取候选镜像: $IMAGE_TAG' + docker pull '$IMAGE_TAG' + sed -i.tmp 's|image:.*lifecho-api.*|image: $IMAGE_TAG|g' docker-compose.candidate.yml + sed -i.tmp 's|image:.*life-echo-api.*|image: $IMAGE_TAG|g' docker-compose.candidate.yml + rm -f docker-compose.candidate.yml.tmp 2>/dev/null || true + " + + - name: Backup and run database migrations safely env: SSH_USER: ${{ secrets.SSH_USER }} SSH_HOST: ${{ secrets.SSH_HOST }} SSH_PORT: ${{ secrets.SSH_PORT || 22 }} - DB_USER: ${{ secrets.MIGRATION_DB_USER || 'postgres' }} - DB_NAME: ${{ secrets.MIGRATION_DB_NAME || 'life_echo' }} + IMAGE_TAG: ${{ env.REGISTRY }}/${{ env.REGISTRY_NAMESPACE }}/${{ env.IMAGE_NAME }}:${{ steps.image_tag.outputs.tag }} + COMPOSE_DIR: ${{ secrets.DEPLOY_PATH || '/opt/life-echo' }} + COMPOSE_FILE: docker-compose.yml + DB_USER: ${{ secrets.MIGRATION_DB_USER || '' }} + DB_PASSWORD: ${{ secrets.MIGRATION_DB_PASSWORD || '' }} + DB_NAME: ${{ secrets.MIGRATION_DB_NAME || '' }} run: | - echo "执行数据库结构同步迁移(幂等)..." - ssh -p $SSH_PORT $SSH_USER@$SSH_HOST \ - "docker exec -i life-echo-postgres psql -U $DB_USER -d $DB_NAME" \ - < api/migrations/sync_schema_to_models.sql - - echo "修复章节 order_index..." - ssh -p $SSH_PORT $SSH_USER@$SSH_HOST \ - "docker exec -i life-echo-postgres psql -U $DB_USER -d $DB_NAME" \ - < api/migrations/fix_chapter_order_index.sql - - echo "添加章节 is_active 字段..." - ssh -p $SSH_PORT $SSH_USER@$SSH_HOST \ - "docker exec -i life-echo-postgres psql -U $DB_USER -d $DB_NAME" \ - < api/migrations/add_chapter_is_active.sql - - echo "添加用户基础资料字段..." - ssh -p $SSH_PORT $SSH_USER@$SSH_HOST \ - "docker exec -i life-echo-postgres psql -U $DB_USER -d $DB_NAME" \ - < api/migrations/add_user_profile_fields.sql - - echo "修正章节排序索引 v2..." - ssh -p $SSH_PORT $SSH_USER@$SSH_HOST \ - "docker exec -i life-echo-postgres psql -U $DB_USER -d $DB_NAME" \ - < api/migrations/fix_chapter_order_index_v2.sql - - echo "数据库迁移完成" + set -euo pipefail + ssh -p "$SSH_PORT" "$SSH_USER@$SSH_HOST" \ + COMPOSE_DIR="$COMPOSE_DIR" \ + COMPOSE_FILE="$COMPOSE_FILE" \ + IMAGE_TAG="$IMAGE_TAG" \ + DB_USER="$DB_USER" \ + DB_PASSWORD="$DB_PASSWORD" \ + DB_NAME="$DB_NAME" \ + "bash -s" <<'REMOTE' + set -euo pipefail - - name: Start all services + CURRENT_COMPOSE="$COMPOSE_DIR/api/$COMPOSE_FILE" + CANDIDATE_ENV="$COMPOSE_DIR/api/.env.production.candidate" + CANDIDATE_MIGRATIONS="$COMPOSE_DIR/api/migrations.candidate" + BACKUP_DIR="$COMPOSE_DIR/api/backups" + DB_CONTAINER="life-echo-postgres" + API_CONTAINER="life-echo-api-prod" + WORKER_CONTAINER="life-echo-celery-worker" + NETWORK_NAME="api_life-echo-network" + BACKUP_FILE="$BACKUP_DIR/life_echo_$(date +%Y%m%d_%H%M%S).dump" + ROLLBACK_REQUIRED=0 + CURRENT_API_RUNNING=0 + CURRENT_WORKER_RUNNING=0 + EFFECTIVE_DB_USER="" + EFFECTIVE_DB_PASSWORD="" + EFFECTIVE_DB_NAME="" + EFFECTIVE_MIGRATION_DATABASE_URL="" + + resolve_db_config() { + mapfile -t parsed_db_parts < <( + CANDIDATE_ENV_PATH="$CANDIDATE_ENV" python3 - <<'PY' + import os + from pathlib import Path + from urllib.parse import unquote, urlsplit + + env_path = Path(os.environ["CANDIDATE_ENV_PATH"]) + database_url = None + for raw_line in env_path.read_text(encoding="utf-8").splitlines(): + line = raw_line.strip() + if not line or line.startswith("#"): + continue + if line.startswith("DATABASE_URL="): + value = line.split("=", 1)[1].strip() + if value[:1] == value[-1:] and value[:1] in {'"', "'"}: + value = value[1:-1] + database_url = value + break + + if not database_url: + raise SystemExit("DATABASE_URL not found in candidate env") + + parts = urlsplit(database_url) + print(unquote(parts.username or "")) + print(unquote(parts.password or "")) + print((parts.path or "/").lstrip("/")) + PY + ) + + EFFECTIVE_DB_USER="${DB_USER:-${parsed_db_parts[0]}}" + EFFECTIVE_DB_PASSWORD="${DB_PASSWORD:-${parsed_db_parts[1]}}" + EFFECTIVE_DB_NAME="${DB_NAME:-${parsed_db_parts[2]}}" + + if [ -z "$EFFECTIVE_DB_USER" ] || [ -z "$EFFECTIVE_DB_NAME" ]; then + echo "无法解析有效的数据库用户名或数据库名" + exit 1 + fi + + EFFECTIVE_MIGRATION_DATABASE_URL="$( + EFFECTIVE_DB_USER="$EFFECTIVE_DB_USER" \ + EFFECTIVE_DB_PASSWORD="$EFFECTIVE_DB_PASSWORD" \ + EFFECTIVE_DB_NAME="$EFFECTIVE_DB_NAME" \ + DB_CONTAINER="$DB_CONTAINER" \ + python3 - <<'PY' + import os + from urllib.parse import quote + + user = quote(os.environ["EFFECTIVE_DB_USER"], safe="") + password = os.environ.get("EFFECTIVE_DB_PASSWORD", "") + database = quote(os.environ["EFFECTIVE_DB_NAME"], safe="") + host = os.environ["DB_CONTAINER"] + + if password: + auth = f"{user}:{quote(password, safe='')}@" + else: + auth = f"{user}@" + + print(f"postgresql://{auth}{host}:5432/{database}") + PY + )" + } + + wait_for_db() { + until docker exec "$DB_CONTAINER" pg_isready -U "$EFFECTIVE_DB_USER" -d "$EFFECTIVE_DB_NAME" >/dev/null 2>&1; do + echo "等待数据库就绪..." + sleep 2 + done + } + + start_current_data_services() { + if [ ! -f "$CURRENT_COMPOSE" ]; then + echo "未找到当前线上 compose 文件:$CURRENT_COMPOSE" + exit 1 + fi + cd "$COMPOSE_DIR/api" + docker-compose -f "$CURRENT_COMPOSE" up -d postgres redis + } + + restore_backup() { + if [ ! -f "$BACKUP_FILE" ]; then + echo "未找到数据库备份文件,无法自动恢复" + return 1 + fi + + if ! docker ps --format '{{.Names}}' | grep -qx "$DB_CONTAINER"; then + start_current_data_services + fi + + wait_for_db + docker exec "$DB_CONTAINER" psql -U "$EFFECTIVE_DB_USER" -d postgres -c \ + "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = '$EFFECTIVE_DB_NAME' AND pid <> pg_backend_pid();" \ + >/dev/null 2>&1 || true + docker exec "$DB_CONTAINER" dropdb -U "$EFFECTIVE_DB_USER" --if-exists "$EFFECTIVE_DB_NAME" || true + docker exec "$DB_CONTAINER" createdb -U "$EFFECTIVE_DB_USER" "$EFFECTIVE_DB_NAME" + docker exec -i "$DB_CONTAINER" \ + pg_restore -U "$EFFECTIVE_DB_USER" -d "$EFFECTIVE_DB_NAME" --clean --if-exists --no-owner --no-privileges < "$BACKUP_FILE" + } + + rollback() { + exit_code=$? + if [ "$ROLLBACK_REQUIRED" -eq 1 ]; then + echo "迁移失败,开始恢复数据库并重新拉起旧线上服务..." + restore_backup + if [ -f "$CURRENT_COMPOSE" ]; then + cd "$COMPOSE_DIR/api" + docker-compose -f "$CURRENT_COMPOSE" up -d postgres redis + docker-compose -f "$CURRENT_COMPOSE" up -d api celery-worker || true + fi + fi + exit "$exit_code" + } + + trap rollback ERR + + mkdir -p "$BACKUP_DIR" + resolve_db_config + + if docker ps --format '{{.Names}}' | grep -qx "$API_CONTAINER"; then + CURRENT_API_RUNNING=1 + fi + if docker ps --format '{{.Names}}' | grep -qx "$WORKER_CONTAINER"; then + CURRENT_WORKER_RUNNING=1 + fi + + if ! docker ps --format '{{.Names}}' | grep -qx "$DB_CONTAINER"; then + echo "当前数据库容器未运行,先拉起线上 postgres/redis..." + start_current_data_services + fi + + wait_for_db + + echo "备份生产数据库到 $BACKUP_FILE" + docker exec "$DB_CONTAINER" pg_dump -U "$EFFECTIVE_DB_USER" -d "$EFFECTIVE_DB_NAME" -F c > "$BACKUP_FILE" + + echo "停止线上 API 写入,准备执行迁移..." + ROLLBACK_REQUIRED=1 + cd "$COMPOSE_DIR/api" + if [ "$CURRENT_API_RUNNING" -eq 1 ] || [ "$CURRENT_WORKER_RUNNING" -eq 1 ]; then + docker-compose -f "$CURRENT_COMPOSE" stop api celery-worker || true + fi + docker rm -f "$API_CONTAINER" "$WORKER_CONTAINER" 2>/dev/null || true + + wait_for_db + + echo "执行幂等 SQL 迁移..." + docker exec -i "$DB_CONTAINER" psql -v ON_ERROR_STOP=1 -U "$EFFECTIVE_DB_USER" -d "$EFFECTIVE_DB_NAME" < "$CANDIDATE_MIGRATIONS/sync_schema_to_models.sql" + docker exec -i "$DB_CONTAINER" psql -v ON_ERROR_STOP=1 -U "$EFFECTIVE_DB_USER" -d "$EFFECTIVE_DB_NAME" < "$CANDIDATE_MIGRATIONS/fix_chapter_order_index.sql" + docker exec -i "$DB_CONTAINER" psql -v ON_ERROR_STOP=1 -U "$EFFECTIVE_DB_USER" -d "$EFFECTIVE_DB_NAME" < "$CANDIDATE_MIGRATIONS/add_chapter_is_active.sql" + docker exec -i "$DB_CONTAINER" psql -v ON_ERROR_STOP=1 -U "$EFFECTIVE_DB_USER" -d "$EFFECTIVE_DB_NAME" < "$CANDIDATE_MIGRATIONS/add_user_profile_fields.sql" + docker exec -i "$DB_CONTAINER" psql -v ON_ERROR_STOP=1 -U "$EFFECTIVE_DB_USER" -d "$EFFECTIVE_DB_NAME" < "$CANDIDATE_MIGRATIONS/fix_chapter_order_index_v2.sql" + + echo "执行 chapter_sections 数据迁移..." + docker run --rm \ + --network "$NETWORK_NAME" \ + --env-file "$CANDIDATE_ENV" \ + -e MIGRATION_DATABASE_URL="$EFFECTIVE_MIGRATION_DATABASE_URL" \ + --entrypoint python \ + "$IMAGE_TAG" -m scripts.run_chapter_sections_migration + + echo "执行 memoir_images 数据迁移..." + docker run --rm \ + --network "$NETWORK_NAME" \ + --env-file "$CANDIDATE_ENV" \ + -e MIGRATION_DATABASE_URL="$EFFECTIVE_MIGRATION_DATABASE_URL" \ + --entrypoint python \ + "$IMAGE_TAG" -m scripts.run_memoir_images_migration + + echo "执行 chapter_sections.image_id 外键迁移..." + docker exec -i "$DB_CONTAINER" psql -v ON_ERROR_STOP=1 -U "$EFFECTIVE_DB_USER" -d "$EFFECTIVE_DB_NAME" < "$CANDIDATE_MIGRATIONS/add_section_image_id_fk.sql" + + echo "验证关键表结构..." + docker exec "$DB_CONTAINER" psql -v ON_ERROR_STOP=1 -U "$EFFECTIVE_DB_USER" -d "$EFFECTIVE_DB_NAME" -c \ + "SELECT to_regclass('public.chapter_sections') AS chapter_sections, to_regclass('public.memoir_images') AS memoir_images;" + + trap - ERR + ROLLBACK_REQUIRED=0 + echo "数据库迁移全部完成" + REMOTE + + - name: Promote candidate release env: SSH_USER: ${{ secrets.SSH_USER }} SSH_HOST: ${{ secrets.SSH_HOST }} @@ -233,19 +384,25 @@ jobs: COMPOSE_DIR: ${{ secrets.DEPLOY_PATH || '/opt/life-echo' }} COMPOSE_FILE: docker-compose.yml run: | - echo "迁移完成,启动全部服务..." - ssh -p $SSH_PORT $SSH_USER@$SSH_HOST " - set -e - cd $COMPOSE_DIR/api - docker-compose -f '$COMPOSE_FILE' up -d - + set -euo pipefail + echo "迁移成功,切换线上版本..." + ssh -p "$SSH_PORT" "$SSH_USER@$SSH_HOST" " + set -euo pipefail + cd '$COMPOSE_DIR/api' + if [ -f '$COMPOSE_FILE' ]; then + cp '$COMPOSE_FILE' '${COMPOSE_FILE}.predeploy' + fi + if [ -f '.env.production' ]; then + cp '.env.production' '.env.production.predeploy' + fi + mv 'docker-compose.candidate.yml' '$COMPOSE_FILE' + mv '.env.production.candidate' '.env.production' + rm -rf 'migrations' + mv 'migrations.candidate' 'migrations' + docker-compose -f '$COMPOSE_FILE' up -d --remove-orphans echo '等待服务启动...' sleep 15 - - echo '清理旧镜像...' docker image prune -f || true - - echo '部署完成!' docker-compose -f '$COMPOSE_FILE' ps " @@ -257,5 +414,5 @@ jobs: COMPOSE_DIR: ${{ secrets.DEPLOY_PATH || '/opt/life-echo' }} run: | echo "验证部署状态..." - ssh -p $SSH_PORT $SSH_USER@$SSH_HOST \ + ssh -p "$SSH_PORT" "$SSH_USER@$SSH_HOST" \ "cd $COMPOSE_DIR/api && docker-compose ps && docker-compose logs --tail=50 api" \ No newline at end of file