Files
life-echo/.github/workflows/docker-build-deploy.yml
Kevin 309a051038 feat: 回忆录证据血缘与内部评测可追溯,顺带对齐本地评测台与 CI
数据库与模型:新增多版迁移(章节证据快照、对话血缘、记忆事实/时间线 lineage 等),把「成稿 ↔ 对话/记忆」的溯源信息落到表结构里。
业务链路:会话与 WS、回忆录/故事流水线、记忆写入与 enrichment 等跟着接上线索与快照;新增章节证据快照与评测侧 EvalTraceService 等模块,方便组评审用的证据包。
内部评测:自动化 run 与手工 memoir 评审共用可追溯证据;rubric/ judge 相关脚本与文档有配套调整。
app-eval-web:Memoir/实验详情里能展开看证据摘要与 evidence_trace(含对话轮次 id);Vite 代理与 development.sh 注入的 API 端口与当前默认内部评测端口一致,避免改端口后页面连错服务。
工程杂项:GitHub Actions / 仓库说明有更新;各适配器与支付/配额/plan 等多处为小改动或跟随主改动的收尾;新增/扩充了?
2026-04-08 15:37:09 +08:00

354 lines
14 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# API Dockermain → Staging 机Repository secrets: STAGING_*Tag v*.*.* → Prod 机PROD_*
# 在 Repo → Settings → Secrets and variables → Actions 中配置,无需 GitHub Environments。
# 命名STAGING_SSH_HOST / STAGING_SSH_USER / STAGING_SSH_PRIVATE_KEY / STAGING_SSH_PORT / STAGING_DEPLOY_PATH
# PROD_SSH_HOST / PROD_SSH_USER / PROD_SSH_PRIVATE_KEY / PROD_SSH_PORT / PROD_DEPLOY_PATH
# 阿里云镜像仍为仓库级ALIYUN_CR_USERNAME / ALIYUN_CR_PASSWORD
#
# 从旧版迁移:若仓库里仍是 SSH_HOST、SSH_PRIVATE_KEY、DEPLOY_PATH 等无前缀名称,
# 请把「预发机」对应值迁移为 STAGING_*,「新生产机」填 PROD_*,并删除旧的无前缀 Secret。
#
# 旧库 pg_dump 一次性迁入当前 schema见 workflow「Legacy DB migrate (one-shot)」(手动运行,非每次构建)。
#
# 发布策略:
# - merge / push 到 main构建并部署到 Staging 机;使用仓库中的 api/.env.staging上传后切换为运行时 .env
# - 手动创建并推送 tag vMAJOR.MINOR.PATCH构建并部署到 Production使用仓库中的 api/.env.production上传后切换为运行时 .env
#
# 注意paths 过滤在 tag push 时按「被指向的 commit」判断若该 commit 未改 api/ 与本 workflow不会触发。
# 此时可用 workflow_dispatch 选择对应 tag/ref 手动部署。
name: Docker Build and Deploy
on:
push:
branches:
- main
tags:
- 'v*.*.*'
paths:
- 'api/**'
- '.github/workflows/**'
workflow_dispatch:
inputs:
branch:
description: '部署 ref分支名或 tag如 main / v1.0.0);留空则使用当前运行所选 ref'
required: false
type: string
default: ''
concurrency:
group: docker-api-${{ github.ref }}
cancel-in-progress: false
env:
IMAGE_NAME: lifecho-api
REGISTRY: crpi-u2903xccyzd6nqnc.cn-shanghai.personal.cr.aliyuncs.com
REGISTRY_NAMESPACE: huaga
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
jobs:
test:
name: API tests
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- uses: actions/checkout@v5
- name: Install uv
uses: astral-sh/setup-uv@v5
with:
version: "0.7.3"
- name: Sync deps and run pytest
working-directory: api
run: |
uv sync --dev
uv run pytest --tb=short -q
build-and-push:
name: Build and Push Docker Image
needs: test
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Checkout code
uses: actions/checkout@v5
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to Alibaba Cloud Container Registry
env:
REGISTRY: ${{ env.REGISTRY }}
USERNAME: ${{ secrets.ALIYUN_CR_USERNAME }}
PASSWORD: ${{ secrets.ALIYUN_CR_PASSWORD }}
run: |
echo "正在登录到阿里云容器镜像服务..."
echo "Registry: $REGISTRY"
echo "Username: $USERNAME"
echo "Password length: ${#PASSWORD}"
# 使用 printf 确保密码正确传递(包括特殊字符)
printf '%s\n' "$PASSWORD" | docker login "$REGISTRY" --username="$USERNAME" --password-stdin
echo "✅ 登录成功!"
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.REGISTRY_NAMESPACE }}/${{ env.IMAGE_NAME }}
tags: |
type=ref,event=branch
type=ref,event=pr
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=sha,prefix=sha-
type=raw,value=latest,enable={{is_default_branch}}
- name: Build and push Docker image
uses: docker/build-push-action@v6
with:
context: ./api
file: ./api/Dockerfile
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
deploy:
name: Deploy to Remote Server
runs-on: ubuntu-latest
needs: build-and-push
if: github.event_name != 'pull_request'
steps:
- name: Checkout code
uses: actions/checkout@v5
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Determine deploy target
id: deploy_target
run: |
if [ -n "${{ github.event.inputs.branch }}" ]; then
REF_NAME="${{ github.event.inputs.branch }}"
else
REF_NAME="${{ github.ref_name }}"
fi
if [[ "$REF_NAME" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
echo "target=prod" >> "$GITHUB_OUTPUT"
else
echo "target=staging" >> "$GITHUB_OUTPUT"
fi
- name: Ensure production SSH secret is set
if: steps.deploy_target.outputs.target == 'prod'
env:
PROD_SSH_PRIVATE_KEY: ${{ secrets.PROD_SSH_PRIVATE_KEY }}
run: |
if [ -z "$PROD_SSH_PRIVATE_KEY" ]; then
echo "::error::PROD_SSH_PRIVATE_KEY 未配置或为空,无法部署生产。请在 Repository secrets 中设置 PROD_SSH_*。"
exit 1
fi
- name: Ensure staging SSH secret is set
if: steps.deploy_target.outputs.target != 'prod'
env:
STAGING_SSH_PRIVATE_KEY: ${{ secrets.STAGING_SSH_PRIVATE_KEY }}
run: |
if [ -z "$STAGING_SSH_PRIVATE_KEY" ]; then
echo "::error::STAGING_SSH_PRIVATE_KEY 未配置或为空,无法部署 staging。请在 Repository secrets 中设置 STAGING_SSH_*。"
exit 1
fi
# 勿用 `prod && PROD_KEY || STAGING_KEY`PROD 为空时会错误回退到 staging 密钥,导致连生产机报 Permission denied。
- name: Set up SSH (production)
if: steps.deploy_target.outputs.target == 'prod'
uses: webfactory/ssh-agent@v0.9.1
with:
ssh-private-key: ${{ secrets.PROD_SSH_PRIVATE_KEY }}
- name: Set up SSH (staging)
if: steps.deploy_target.outputs.target != 'prod'
uses: webfactory/ssh-agent@v0.9.1
with:
ssh-private-key: ${{ secrets.STAGING_SSH_PRIVATE_KEY }}
- name: Export deploy connection env
run: |
if [ "${{ steps.deploy_target.outputs.target }}" = "prod" ]; then
{
echo "SSH_HOST=${{ secrets.PROD_SSH_HOST }}"
echo "SSH_USER=${{ secrets.PROD_SSH_USER }}"
echo "SSH_PORT=${{ secrets.PROD_SSH_PORT || '22' }}"
echo "COMPOSE_DIR=${{ secrets.PROD_DEPLOY_PATH || '/opt/life-echo' }}"
} >> "$GITHUB_ENV"
else
{
echo "SSH_HOST=${{ secrets.STAGING_SSH_HOST }}"
echo "SSH_USER=${{ secrets.STAGING_SSH_USER }}"
echo "SSH_PORT=${{ secrets.STAGING_SSH_PORT || '22' }}"
echo "COMPOSE_DIR=${{ secrets.STAGING_DEPLOY_PATH || '/opt/life-echo' }}"
} >> "$GITHUB_ENV"
fi
- name: Add server to known hosts
run: |
mkdir -p ~/.ssh
ssh-keyscan -H -p "${SSH_PORT:-22}" "${SSH_HOST}" >> ~/.ssh/known_hosts
- name: Determine image tag
id: image_tag
run: |
# 与 docker/metadata-action 的 semver 标签一致v1.2.3 → 镜像 :1.2.3
if [ -n "${{ github.event.inputs.branch }}" ]; then
REF_NAME="${{ github.event.inputs.branch }}"
else
REF_NAME="${{ github.ref_name }}"
fi
echo "deploy_ref=$REF_NAME" >> "$GITHUB_OUTPUT"
if [[ "$REF_NAME" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
echo "tag=${REF_NAME#v}" >> "$GITHUB_OUTPUT"
elif [ "$REF_NAME" == "main" ] || [ "$REF_NAME" == "master" ]; then
echo "tag=latest" >> "$GITHUB_OUTPUT"
else
BRANCH_TAG=$(echo "$REF_NAME" | sed 's/\//-/g')
echo "tag=$BRANCH_TAG" >> "$GITHUB_OUTPUT"
fi
- name: Prepare remote candidate release
env:
IMAGE_TAG: ${{ env.REGISTRY }}/${{ env.REGISTRY_NAMESPACE }}/${{ env.IMAGE_NAME }}:${{ steps.image_tag.outputs.tag }}
REGISTRY: ${{ env.REGISTRY }}
ALIYUN_CR_USERNAME: ${{ secrets.ALIYUN_CR_USERNAME }}
ALIYUN_CR_PASSWORD: ${{ secrets.ALIYUN_CR_PASSWORD }}
run: |
set -euo pipefail
echo "准备候选版本..."
echo "镜像标签: $IMAGE_TAG"
echo "部署目录: $COMPOSE_DIR/api"
echo "$ALIYUN_CR_PASSWORD" | ssh -p "$SSH_PORT" "$SSH_USER@$SSH_HOST" \
"docker login $REGISTRY --username=$ALIYUN_CR_USERNAME --password-stdin"
ssh -p "$SSH_PORT" "$SSH_USER@$SSH_HOST" "
set -euo pipefail
mkdir -p '$COMPOSE_DIR/api'
mkdir -p '$COMPOSE_DIR/api/backups'
docker network inspect api_life-echo-network >/dev/null 2>&1 || docker network create api_life-echo-network
"
if [ "${{ steps.deploy_target.outputs.target }}" = "prod" ]; then
ENV_SRC="api/.env.production"
else
ENV_SRC="api/.env.staging"
fi
if [ ! -f "$ENV_SRC" ]; then
echo "::error::缺少 $ENV_SRC无法部署。"
exit 1
fi
if grep -Eq '=(your_|replace_with_|\\.{3}$)' "$ENV_SRC"; then
echo "::error::$ENV_SRC 仍包含占位符值,请先完善环境文件。"
exit 1
fi
if grep -Eq '^DATABASE_URL=.*@localhost:' "$ENV_SRC" || grep -Eq '^REDIS_URL=redis://localhost' "$ENV_SRC"; then
echo "::error::$ENV_SRC 包含 localhost 数据库或 Redis 地址,容器内将无法连接。"
exit 1
fi
if grep -Eq '^DATABASE_URL=.*@postgresql:' "$ENV_SRC"; then
echo "::error::$ENV_SRC 仍引用过期主机名 postgresql当前 compose 服务名应为 postgres。"
exit 1
fi
echo "上传候选 compose 与环境文件..."
scp -P "$SSH_PORT" ./api/docker-compose.yml "$SSH_USER@$SSH_HOST:$COMPOSE_DIR/api/docker-compose.candidate.yml"
scp -P "$SSH_PORT" "$ENV_SRC" "$SSH_USER@$SSH_HOST:$COMPOSE_DIR/api/.env.candidate"
ssh -p "$SSH_PORT" "$SSH_USER@$SSH_HOST" "
set -euo pipefail
cd '$COMPOSE_DIR/api'
echo '拉取候选镜像: $IMAGE_TAG'
docker pull '$IMAGE_TAG'
sed -i.tmp 's|image:.*lifecho-api.*|image: $IMAGE_TAG|g' docker-compose.candidate.yml
sed -i.tmp 's|image:.*life-echo-api.*|image: $IMAGE_TAG|g' docker-compose.candidate.yml
rm -f docker-compose.candidate.yml.tmp 2>/dev/null || true
"
- name: Promote candidate release
env:
COMPOSE_FILE: docker-compose.yml
run: |
set -euo pipefail
echo "切换线上版本,容器启动时将自动执行 Alembic..."
ssh -p "$SSH_PORT" "$SSH_USER@$SSH_HOST" "
set -euo pipefail
cd '$COMPOSE_DIR/api'
if [ -f '$COMPOSE_FILE' ]; then
cp '$COMPOSE_FILE' '${COMPOSE_FILE}.predeploy'
fi
if [ -f '.env.production' ]; then
cp '.env.production' '.env.production.predeploy'
fi
if [ -f '.env' ]; then
cp '.env' '.env.predeploy'
fi
mv 'docker-compose.candidate.yml' '$COMPOSE_FILE'
mv '.env.candidate' '.env'
if ! docker compose -f '$COMPOSE_FILE' up -d --remove-orphans; then
echo 'docker compose up 失败,输出 api 状态与最近日志...'
docker compose -f '$COMPOSE_FILE' ps || true
API_CID=\$(docker compose -f '$COMPOSE_FILE' ps -q api || true)
if [ -n \"\$API_CID\" ]; then
docker inspect -f '{{json .State}}' \"\$API_CID\" || true
fi
docker compose -f '$COMPOSE_FILE' logs --tail=120 api || true
docker compose -f '$COMPOSE_FILE' logs --tail=80 celery-worker || true
exit 1
fi
echo '等待服务启动...'
sleep 20
docker image prune -f || true
docker compose -f '$COMPOSE_FILE' ps
"
- name: Verify deployment
run: |
echo "验证部署状态..."
ssh -p "$SSH_PORT" "$SSH_USER@$SSH_HOST" "
set -euo pipefail
cd '$COMPOSE_DIR/api'
docker compose ps
API_CID=\$(docker compose ps -q api)
if [ -z \"\$API_CID\" ]; then
echo '未找到 api 容器'
docker compose logs --tail=80 api || true
exit 1
fi
API_HEALTH=''
for _ in \$(seq 1 24); do
API_HEALTH=\$(docker inspect -f '{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}' \"\$API_CID\")
echo \"api health: \$API_HEALTH\"
if [ \"\$API_HEALTH\" = 'healthy' ]; then
break
fi
sleep 5
done
if [ \"\$API_HEALTH\" != 'healthy' ]; then
echo 'api 容器未在预期时间内变为 healthy'
docker inspect -f '{{json .State}}' \"\$API_CID\" || true
docker compose logs --tail=80 api || true
exit 1
fi
docker compose logs --tail=50 api
"