diff --git a/.github/workflows/docker-build-deploy.yml b/.github/workflows/docker-build-deploy.yml index 06b1b92..f2182b0 100644 --- a/.github/workflows/docker-build-deploy.yml +++ b/.github/workflows/docker-build-deploy.yml @@ -7,6 +7,7 @@ # 勿把 PROD 私钥与 Staging 混用:staging 只读 SSH_PRIVATE_KEY,prod 只读 PROD_SSH_PRIVATE_KEY。 # # 旧库 pg_dump 一次性迁入当前 schema:见 workflow「Legacy DB migrate (one-shot)」(手动运行,非每次构建)。 +# Alembic 规范与 staging 上错误 0020 revision 修复:api/docs/alembic-migrations.md # # 发布策略: # - merge / push 到 main:构建并部署到 Staging 机;使用仓库中的 api/.env.staging,上传后切换为运行时 .env diff --git a/api/README.md b/api/README.md index 4e572f7..fa26cd1 100644 --- a/api/README.md +++ b/api/README.md @@ -118,13 +118,18 @@ DEEPSEEK_MODEL=deepseek-chat ### 3. 数据库迁移 -数据库 schema 由 Alembic 管理。**`app/main.py` 启动时会在线程中执行 `alembic upgrade head`**(见 `app/core/alembic_startup.py`):对连接类错误自动重试;生产环境建议设置 `ALEMBIC_STARTUP_FAIL_FAST=true`,迁移失败则进程退出。仍可手动执行: +数据库 schema 由 Alembic 管理。**`app/main.py` 启动时会在线程中执行 `alembic upgrade head`**(见 `app/core/alembic_startup.py`):对连接类错误自动重试;生产环境建议设置 `ALEMBIC_STARTUP_FAIL_FAST=true`,迁移失败则进程退出。 + +规范与跨环境排障见 **[docs/alembic-migrations.md](docs/alembic-migrations.md)**(禁止改已部署 revision id、老库用显式 `0019` 补列等)。 ```bash cd api uv run alembic upgrade head +uv run pytest tests/test_alembic_migration_policy.py -q ``` +若库中仍为已撤回的 `0020_*` revision,部署前先执行 `uv run python scripts/repair_alembic_version_after_withdrawn_0020.py`(见上文文档)。 + ## 快速启动 ### 本地开发 diff --git a/api/alembic/script_helpers.py b/api/alembic/script_helpers.py new file mode 100644 index 0000000..15f791b --- /dev/null +++ b/api/alembic/script_helpers.py @@ -0,0 +1,34 @@ +"""Alembic 迁移共享工具(仅用于 versions/ 下的迁移脚本)。""" + +from __future__ import annotations + +import sqlalchemy as sa +from alembic import op + + +def table_exists(table_name: str) -> bool: + bind = op.get_bind() + return table_name in sa.inspect(bind).get_table_names() + + +def has_column(table_name: str, column_name: str) -> bool: + if not table_exists(table_name): + return False + bind = op.get_bind() + columns = sa.inspect(bind).get_columns(table_name) + return any(column["name"] == column_name for column in columns) + + +def add_column_if_missing(table_name: str, column: sa.Column) -> bool: + """若列不存在则 add_column;返回是否执行了添加。""" + if has_column(table_name, column.name): + return False + op.add_column(table_name, column) + return True + + +def drop_column_if_exists(table_name: str, column_name: str) -> bool: + if not has_column(table_name, column_name): + return False + op.drop_column(table_name, column_name) + return True diff --git a/api/alembic/versions/0001_initial_schema.py b/api/alembic/versions/0001_initial_schema.py index c3a032d..eb3e258 100644 --- a/api/alembic/versions/0001_initial_schema.py +++ b/api/alembic/versions/0001_initial_schema.py @@ -6,9 +6,10 @@ chapters 含 story 物化字段:markdown_compose_dirty、markdown_composed_at (阅读片段快照,随 ORM 一并 create_all)。 已并入原 0002(stories-first:无 chapter_sections / memoir_images.section_id)与原 0003(segments.tts_audio_urls) -的语义:新库仅由当前 ORM 建表即可,无需后续 ALTER。 -conversation_messages(会话轮次 durable log)由 app.features.conversation.models.ConversationMessage 一并 create_all。 -segments.audio_duration_seconds(语音条时长秒数,历史 API / Redis 回填)由 ORM 一并 create_all,无独立迁移。 +的语义:仅对「全新库」由 create_all 建出;**已有库不会 ALTER**。 +老库缺列见 0019_align_legacy_schema(segments.audio_duration_seconds、tts_audio_urls, +conversations.deleted_at,conversation_messages.tts_audio_urls 等)。 +conversation_messages 表由 ORM 在 0001 create_all 中创建(新库);老库若缺表须单独处理。 story_image_intents 无 source_span(主图回填在正文末尾,意图仅存 caption / prompt_brief 等)。 新库 / 删库重来:`alembic upgrade head`。 diff --git a/api/alembic/versions/0019_align_legacy_schema.py b/api/alembic/versions/0019_align_legacy_schema.py new file mode 100644 index 0000000..13dcdef --- /dev/null +++ b/api/alembic/versions/0019_align_legacy_schema.py @@ -0,0 +1,64 @@ +"""补齐 0001 create_all 未覆盖的老库列(显式清单,禁止内省全库) + +0001 对「已存在的表」不会 ALTER。下列列在 ORM / 0001 注释中视为新库默认字段, +但 staging/production 等自 squash 前就存在的库需要本迁移显式 add_column。 + +新增 ORM 字段时:禁止改已部署 revision id;在本文件追加列定义,或新建 0020_* 迁移。 + +Revision ID: 0019_align_legacy_schema +Revises: 0018_users_language_preference +""" + +from __future__ import annotations + +import sys +from pathlib import Path +from typing import Sequence, Union + +import sqlalchemy as sa + +# versions/ -> alembic/ +_ALEMBIC_DIR = Path(__file__).resolve().parents[1] +if str(_ALEMBIC_DIR) not in sys.path: + sys.path.insert(0, str(_ALEMBIC_DIR)) + +from script_helpers import add_column_if_missing, drop_column_if_exists, table_exists + +revision: str = "0019_align_legacy_schema" +down_revision: Union[str, None] = "0018_users_language_preference" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + +# (table, column) — 仅列「0002–0018 未 add_column 且老库常缺」的字段 +_LEGACY_COLUMNS: tuple[tuple[str, sa.Column], ...] = ( + ( + "segments", + sa.Column("audio_duration_seconds", sa.Integer(), nullable=True), + ), + ( + "conversations", + sa.Column("deleted_at", sa.DateTime(timezone=True), nullable=True), + ), + ( + "segments", + sa.Column("tts_audio_urls", sa.JSON(), nullable=True), + ), + ( + "conversation_messages", + sa.Column("tts_audio_urls", sa.JSON(), nullable=True), + ), +) + + +def upgrade() -> None: + for table_name, column in _LEGACY_COLUMNS: + if not table_exists(table_name): + continue + add_column_if_missing(table_name, column) + + +def downgrade() -> None: + for table_name, column in reversed(_LEGACY_COLUMNS): + if not table_exists(table_name): + continue + drop_column_if_exists(table_name, column.name) diff --git a/api/app/core/alembic_revision_repair.py b/api/app/core/alembic_revision_repair.py new file mode 100644 index 0000000..69a3e87 --- /dev/null +++ b/api/app/core/alembic_revision_repair.py @@ -0,0 +1,33 @@ +"""修复已撤回 migration 写入的 alembic_version(跨环境一次性兼容)。""" + +from __future__ import annotations + +from sqlalchemy import Connection, text + +_WITHDRAWN_0020_REVISIONS = frozenset( + { + "0020_add_tts_audio_urls_column", + "0020_backfill_missing_schema", + "0020_backfill_all_missing_columns", + } +) +_REPAIR_TARGET_REVISION = "0018_users_language_preference" + + +def try_repair_withdrawn_0020_revision(conn: Connection) -> bool: + """ + 若当前 stamp 为已撤回的 0020_*,回退到 0018 以便重新执行 0019_align_legacy_schema。 + + 返回 True 表示已执行 UPDATE;调用方负责 commit。 + """ + row = conn.execute(text("SELECT version_num FROM alembic_version")).fetchone() + if row is None: + return False + current = row[0] + if current not in _WITHDRAWN_0020_REVISIONS: + return False + conn.execute( + text("UPDATE alembic_version SET version_num = :target"), + {"target": _REPAIR_TARGET_REVISION}, + ) + return True diff --git a/api/app/core/alembic_startup.py b/api/app/core/alembic_startup.py index 08fbe94..ee0fd3c 100644 --- a/api/app/core/alembic_startup.py +++ b/api/app/core/alembic_startup.py @@ -21,10 +21,27 @@ logger = get_logger(__name__) _API_DIR: Final[Path] = Path(__file__).resolve().parent.parent.parent +def _repair_withdrawn_revision_stamp_if_needed() -> None: + from sqlalchemy import create_engine + + from app.core.alembic_revision_repair import try_repair_withdrawn_0020_revision + from app.core.db import _database_url + + engine = create_engine(_database_url()) + with engine.connect() as conn: + if try_repair_withdrawn_0020_revision(conn): + conn.commit() + logger.warning( + "alembic_version 曾为已撤回的 0020_*,已回退到 0018;" + "将重新执行 0019_align_legacy_schema" + ) + + def _run_alembic_upgrade_once() -> None: from alembic.command import upgrade from alembic.config import Config + _repair_withdrawn_revision_stamp_if_needed() cfg = Config(str(_API_DIR / "alembic.ini")) upgrade(cfg, "head") diff --git a/api/development.sh b/api/development.sh index e99bee7..d64c9c9 100755 --- a/api/development.sh +++ b/api/development.sh @@ -304,6 +304,9 @@ print_alembic_failure_hint() { print_warn "PostgreSQL 用户名或密码不匹配;请核对 .env.development 中的 DATABASE_URL" elif [[ "${log_output}" == *"No such file or directory"* ]] || [[ "${log_output}" == *"can't open file"* ]]; then print_warn "Alembic 依赖的文件或工作目录可能不正确;请确认在 api/ 目录运行脚本" + elif [[ "${log_output}" == *"Can't locate revision"* ]]; then + print_warn "alembic_version 与当前迁移链不一致(常见于已撤回的 0020_*)" + print_warn "将自动修复:重启 development.sh,或执行: uv run python scripts/repair_alembic_version_after_withdrawn_0020.py && uv run alembic upgrade head" fi } @@ -413,6 +416,8 @@ run_migrations() { local log_file log_file="$(mktemp -t life-echo-alembic.XXXXXX.log)" + uv run python scripts/repair_alembic_version_after_withdrawn_0020.py >>"${log_file}" 2>&1 || true + if uv run alembic upgrade head >"${log_file}" 2>&1; then print_ok "Alembic 迁移已就绪" rm -f "${log_file}" diff --git a/api/docs/alembic-migrations.md b/api/docs/alembic-migrations.md new file mode 100644 index 0000000..8ad452c --- /dev/null +++ b/api/docs/alembic-migrations.md @@ -0,0 +1,59 @@ +# Alembic 迁移规范 + +## 设计原则 + +1. **`0001` 仅服务全新库**:`create_all` 不会对已有表 `ALTER`。老库缺列必须用 **`0019` 及之后的显式迁移** 补齐。 +2. **一条迁移 = 明确 DDL**:禁止用「全库 ORM 内省」替代清单;可审查、可测、各环境行为一致。 +3. **已部署的 `revision` 永不改名**:文件名可描述用途,但 `revision = "..."` 一旦上线不得修改。 +4. **新 ORM 字段同 PR 必须带迁移**:改 `models.py` 须同时 `alembic/versions/` 新增或扩展迁移(见 CI 测试)。 + +## 当前链 + +``` +0001_initial → … → 0018_users_language_preference → 0019_align_legacy_schema (head) +``` + +`0019_align_legacy_schema` 维护「老库相对 0001 squash 常缺」的列清单,见 `alembic/versions/0019_align_legacy_schema.py` 内 `_LEGACY_COLUMNS`。 + +## 新增字段流程 + +1. 修改 ORM `models.py` +2. 若表在 squash 前就存在且 0002–0018 未覆盖该列 → 在 `0019` 的 `_LEGACY_COLUMNS` 追加一行(未部署前),或新建 `0020_.py` +3. 本地:`uv run alembic upgrade head` +4. 提交前:`uv run pytest tests/test_alembic_migration_policy.py` + +## Staging / Production 发布 + +| 环境 | 代码来源 | 说明 | +|------|----------|------| +| Staging | `main` push | 迁移在容器启动时 `upgrade head` | +| Production | tag `v*.*.*` | 发 tag 前确认 staging 已成功跑过同一迁移链 | + +生产推荐:`ALEMBIC_STARTUP_FAIL_FAST=true`,迁移失败则进程退出。 + +## 撤回错误迁移 0019/0020 后的库修复 + +若数据库 `alembic_version` 仍为已删除的 revision(例如 `0020_add_tts_audio_urls_column` 或 `0020_backfill_missing_schema`),**部署新代码前**先执行: + +```bash +cd api +# 仅当 version_num 为已撤回的 0020_* 时 +uv run python scripts/repair_alembic_version_after_withdrawn_0020.py +``` + +或手动 SQL(确认列已由旧迁移加过后再 stamp): + +```sql +SELECT version_num FROM alembic_version; +-- 若为 0020_add_tts_audio_urls_column 或 0020_backfill_missing_schema: +UPDATE alembic_version SET version_num = '0018_users_language_preference'; +``` + +然后部署;`0019_align_legacy_schema` 会幂等补齐缺列。 + +## 禁止事项 + +- 修改已上线 migration 的 `revision` / `down_revision` +- 用 `0020_backfill_*` 式全表内省替代显式列清单 +- 只在 ORM 加字段、指望 `create_all` 更新线上老库 +- Staging 与 Production 长期使用不同迁移链(应同一 commit / 同一 tag 构建) diff --git a/api/scripts/repair_alembic_version_after_withdrawn_0020.py b/api/scripts/repair_alembic_version_after_withdrawn_0020.py new file mode 100644 index 0000000..bcc9d45 --- /dev/null +++ b/api/scripts/repair_alembic_version_after_withdrawn_0020.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +"""将误写入的 0020_* revision 回退到 0018,以便重新执行 0019_align_legacy_schema。 + +用法(在 api 目录,DATABASE_URL 已配置): + + uv run python scripts/repair_alembic_version_after_withdrawn_0020.py + uv run python scripts/repair_alembic_version_after_withdrawn_0020.py --dry-run + +启动时也会自动执行同等逻辑(见 app.core.alembic_startup)。 +""" + +from __future__ import annotations + +import argparse +import sys +from pathlib import Path + +_API_DIR = Path(__file__).resolve().parents[1] +if str(_API_DIR) not in sys.path: + sys.path.insert(0, str(_API_DIR)) + +from sqlalchemy import create_engine, text + +from app.core.alembic_revision_repair import ( + _WITHDRAWN_0020_REVISIONS, + try_repair_withdrawn_0020_revision, +) +from app.core.db import _database_url + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--dry-run", + action="store_true", + help="只检查,不写库", + ) + args = parser.parse_args() + + engine = create_engine(_database_url()) + with engine.connect() as conn: + row = conn.execute(text("SELECT version_num FROM alembic_version")).fetchone() + current = row[0] if row else None + print(f"当前 version_num: {current!r}") + + if current is None: + print("alembic_version 表为空,无需修复") + return 0 + + if args.dry_run: + if current in _WITHDRAWN_0020_REVISIONS: + print("[dry-run] 将回退到 0018_users_language_preference") + else: + print("无需修复") + return 0 + + if try_repair_withdrawn_0020_revision(conn): + conn.commit() + print("已回退到 0018_users_language_preference;请执行: uv run alembic upgrade head") + else: + print("无需修复") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/api/tests/test_alembic_migration_policy.py b/api/tests/test_alembic_migration_policy.py new file mode 100644 index 0000000..6be073f --- /dev/null +++ b/api/tests/test_alembic_migration_policy.py @@ -0,0 +1,95 @@ +"""Alembic 迁移链与项目策略的静态校验(不依赖线上 Postgres)。""" + +from __future__ import annotations + +import re +from pathlib import Path + +import pytest +from alembic.config import Config +from alembic.script import ScriptDirectory + +_API_DIR = Path(__file__).resolve().parent.parent +_VERSIONS_DIR = _API_DIR / "alembic" / "versions" + +# 0019 必须显式覆盖的老库缺列(与 ORM / 历史事故相关) +_REQUIRED_LEGACY_COLUMNS = frozenset( + { + ("segments", "audio_duration_seconds"), + ("conversations", "deleted_at"), + ("segments", "tts_audio_urls"), + ("conversation_messages", "tts_audio_urls"), + } +) + +_FORBIDDEN_WITHDRAWN_REVISIONS = frozenset( + { + "0020_add_tts_audio_urls_column", + "0020_backfill_missing_schema", + "0020_backfill_all_missing_columns", + "0019_backfill_missing_columns", + } +) + + +def _script_dir() -> ScriptDirectory: + cfg = Config(str(_API_DIR / "alembic.ini")) + return ScriptDirectory.from_config(cfg) + + +def test_single_alembic_head() -> None: + heads = _script_dir().get_heads() + assert heads == ["0019_align_legacy_schema"], f"unexpected heads: {heads}" + + +def test_no_withdrawn_revision_ids_in_tree() -> None: + for rev in _script_dir().walk_revisions(): + assert rev.revision not in _FORBIDDEN_WITHDRAWN_REVISIONS, ( + f"withdrawn revision still in tree: {rev.revision}" + ) + + +def test_no_withdrawn_migration_files() -> None: + names = {p.name for p in _VERSIONS_DIR.glob("*.py")} + assert "0020_add_tts_audio_urls_column.py" not in names + assert "0019_backfill_missing_columns.py" not in names + + +def test_0019_align_legacy_schema_covers_required_columns() -> None: + path = _VERSIONS_DIR / "0019_align_legacy_schema.py" + src = path.read_text(encoding="utf-8") + assert 'revision: str = "0019_align_legacy_schema"' in src + assert "Base.metadata" not in src, "0019 must not introspect full ORM metadata" + assert "sorted_tables" not in src + + found: set[tuple[str, str]] = set() + for table, column in _REQUIRED_LEGACY_COLUMNS: + if f'"{table}"' in src and f'"{column}"' in src: + found.add((table, column)) + + missing = _REQUIRED_LEGACY_COLUMNS - found + assert not missing, f"0019 missing explicit legacy columns: {missing}" + + +def test_all_revisions_have_unique_ids() -> None: + ids: list[str] = [] + for rev in _script_dir().walk_revisions(): + ids.append(rev.revision) + assert len(ids) == len(set(ids)), "duplicate revision ids" + + +def test_revision_chain_reaches_0019_from_0018() -> None: + script = _script_dir() + rev = script.get_revision("0019_align_legacy_schema") + assert rev is not None + assert rev.down_revision == "0018_users_language_preference" + + +def test_no_autogenerate_introspection_backfill_pattern() -> None: + """禁止再次引入「遍历 ORM 全表补列」类迁移。""" + pattern = re.compile(r"for table in Base\.metadata\.sorted_tables") + for path in _VERSIONS_DIR.glob("*.py"): + text = path.read_text(encoding="utf-8") + assert not pattern.search(text), ( + f"{path.name} uses full-ORM introspection backfill; use explicit column list" + )