From 8bbe6367ae5f2a608efa48e6c91e35c1cb508a47 Mon Sep 17 00:00:00 2001 From: penghanyuan Date: Thu, 14 May 2026 17:23:31 +0200 Subject: [PATCH] fix(ci): retry SSH setup steps in remote candidate preparation Retry remote docker login, bootstrap SSH commands, and scp uploads to handle transient connection timeout and banner exchange failures in GitHub runner environments. Co-authored-by: Cursor --- .github/workflows/docker-build-deploy.yml | 56 +++++++++++++++++++---- 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/.github/workflows/docker-build-deploy.yml b/.github/workflows/docker-build-deploy.yml index 2135efd..16c2628 100644 --- a/.github/workflows/docker-build-deploy.yml +++ b/.github/workflows/docker-build-deploy.yml @@ -264,15 +264,39 @@ jobs: echo "镜像标签: $IMAGE_TAG" echo "部署目录: $COMPOSE_DIR/api" - echo "$ALIYUN_CR_PASSWORD" | ssh ${SSH_COMMON_OPTS:-} -p "$SSH_PORT" "$SSH_USER@$SSH_HOST" \ - "docker login $REGISTRY --username=$ALIYUN_CR_USERNAME --password-stdin" + LOGIN_OK=0 + for i in 1 2 3; do + echo "远端 docker login 尝试 ${i}/3..." + if echo "$ALIYUN_CR_PASSWORD" | ssh ${SSH_COMMON_OPTS:-} -p "$SSH_PORT" "$SSH_USER@$SSH_HOST" \ + "docker login $REGISTRY --username=$ALIYUN_CR_USERNAME --password-stdin"; then + LOGIN_OK=1 + break + fi + sleep 3 + done + if [ "$LOGIN_OK" -ne 1 ]; then + echo "::error::远端 docker login 连续 3 次失败。" + exit 1 + fi - ssh ${SSH_COMMON_OPTS:-} -p "$SSH_PORT" "$SSH_USER@$SSH_HOST" " - set -euo pipefail - mkdir -p '$COMPOSE_DIR/api' - mkdir -p '$COMPOSE_DIR/api/backups' - docker network inspect api_life-echo-network >/dev/null 2>&1 || docker network create api_life-echo-network - " + BOOTSTRAP_OK=0 + for i in 1 2 3; do + echo "远端目录与网络初始化尝试 ${i}/3..." + if ssh ${SSH_COMMON_OPTS:-} -p "$SSH_PORT" "$SSH_USER@$SSH_HOST" " + set -euo pipefail + mkdir -p '$COMPOSE_DIR/api' + mkdir -p '$COMPOSE_DIR/api/backups' + docker network inspect api_life-echo-network >/dev/null 2>&1 || docker network create api_life-echo-network + "; then + BOOTSTRAP_OK=1 + break + fi + sleep 3 + done + if [ "$BOOTSTRAP_OK" -ne 1 ]; then + echo "::error::远端目录与网络初始化连续 3 次失败。" + exit 1 + fi if [ "${{ needs.resolve-deploy-target.outputs.target }}" = "prod" ]; then ENV_SRC="api/.env.production" @@ -298,8 +322,20 @@ jobs: fi echo "上传候选 compose 与环境文件..." - scp ${SSH_COMMON_OPTS:-} -P "$SSH_PORT" ./api/docker-compose.yml "$SSH_USER@$SSH_HOST:$COMPOSE_DIR/api/docker-compose.candidate.yml" - scp ${SSH_COMMON_OPTS:-} -P "$SSH_PORT" "$ENV_SRC" "$SSH_USER@$SSH_HOST:$COMPOSE_DIR/api/.env.candidate" + SCP_OK=0 + for i in 1 2 3; do + echo "上传候选文件尝试 ${i}/3..." + if scp ${SSH_COMMON_OPTS:-} -P "$SSH_PORT" ./api/docker-compose.yml "$SSH_USER@$SSH_HOST:$COMPOSE_DIR/api/docker-compose.candidate.yml" \ + && scp ${SSH_COMMON_OPTS:-} -P "$SSH_PORT" "$ENV_SRC" "$SSH_USER@$SSH_HOST:$COMPOSE_DIR/api/.env.candidate"; then + SCP_OK=1 + break + fi + sleep 3 + done + if [ "$SCP_OK" -ne 1 ]; then + echo "::error::上传候选文件连续 3 次失败。" + exit 1 + fi PULL_OK=0 for i in 1 2 3; do