Merge remote-tracking branch 'origin/development'

This commit is contained in:
Kevin
2026-05-19 16:42:22 +08:00
11 changed files with 384 additions and 4 deletions

View File

@@ -118,13 +118,18 @@ DEEPSEEK_MODEL=deepseek-chat
### 3. 数据库迁移
数据库 schema 由 Alembic 管理。**`app/main.py` 启动时会在线程中执行 `alembic upgrade head`**(见 `app/core/alembic_startup.py`):对连接类错误自动重试;生产环境建议设置 `ALEMBIC_STARTUP_FAIL_FAST=true`,迁移失败则进程退出。仍可手动执行:
数据库 schema 由 Alembic 管理。**`app/main.py` 启动时会在线程中执行 `alembic upgrade head`**(见 `app/core/alembic_startup.py`):对连接类错误自动重试;生产环境建议设置 `ALEMBIC_STARTUP_FAIL_FAST=true`,迁移失败则进程退出。
规范与跨环境排障见 **[docs/alembic-migrations.md](docs/alembic-migrations.md)**(禁止改已部署 revision id、老库用显式 `0019` 补列等)。
```bash
cd api
uv run alembic upgrade head
uv run pytest tests/test_alembic_migration_policy.py -q
```
若库中仍为已撤回的 `0020_*` revision部署前先执行 `uv run python scripts/repair_alembic_version_after_withdrawn_0020.py`(见上文文档)。
## 快速启动
### 本地开发

View File

@@ -0,0 +1,34 @@
"""Alembic 迁移共享工具(仅用于 versions/ 下的迁移脚本)。"""
from __future__ import annotations
import sqlalchemy as sa
from alembic import op
def table_exists(table_name: str) -> bool:
bind = op.get_bind()
return table_name in sa.inspect(bind).get_table_names()
def has_column(table_name: str, column_name: str) -> bool:
if not table_exists(table_name):
return False
bind = op.get_bind()
columns = sa.inspect(bind).get_columns(table_name)
return any(column["name"] == column_name for column in columns)
def add_column_if_missing(table_name: str, column: sa.Column) -> bool:
"""若列不存在则 add_column返回是否执行了添加。"""
if has_column(table_name, column.name):
return False
op.add_column(table_name, column)
return True
def drop_column_if_exists(table_name: str, column_name: str) -> bool:
if not has_column(table_name, column_name):
return False
op.drop_column(table_name, column_name)
return True

View File

@@ -6,9 +6,10 @@ chapters 含 story 物化字段markdown_compose_dirty、markdown_composed_at
(阅读片段快照,随 ORM 一并 create_all
已并入原 0002stories-first无 chapter_sections / memoir_images.section_id与原 0003segments.tts_audio_urls
的语义:新库仅由当前 ORM 建表即可,无需后续 ALTER。
conversation_messages会话轮次 durable log由 app.features.conversation.models.ConversationMessage 一并 create_all。
segments.audio_duration_seconds语音条时长秒数历史 API / Redis 回填)由 ORM 一并 create_all无独立迁移
的语义:仅对「全新库」由 create_all 建出;**已有库不会 ALTER**
老库缺列见 0019_align_legacy_schemasegments.audio_duration_seconds、tts_audio_urls
conversations.deleted_atconversation_messages.tts_audio_urls 等)
conversation_messages 表由 ORM 在 0001 create_all 中创建(新库);老库若缺表须单独处理。
story_image_intents 无 source_span主图回填在正文末尾意图仅存 caption / prompt_brief 等)。
新库 / 删库重来:`alembic upgrade head`。

View File

@@ -0,0 +1,64 @@
"""补齐 0001 create_all 未覆盖的老库列(显式清单,禁止内省全库)
0001 对「已存在的表」不会 ALTER。下列列在 ORM / 0001 注释中视为新库默认字段,
但 staging/production 等自 squash 前就存在的库需要本迁移显式 add_column。
新增 ORM 字段时:禁止改已部署 revision id在本文件追加列定义或新建 0020_* 迁移。
Revision ID: 0019_align_legacy_schema
Revises: 0018_users_language_preference
"""
from __future__ import annotations
import sys
from pathlib import Path
from typing import Sequence, Union
import sqlalchemy as sa
# versions/ -> alembic/
_ALEMBIC_DIR = Path(__file__).resolve().parents[1]
if str(_ALEMBIC_DIR) not in sys.path:
sys.path.insert(0, str(_ALEMBIC_DIR))
from script_helpers import add_column_if_missing, drop_column_if_exists, table_exists
revision: str = "0019_align_legacy_schema"
down_revision: Union[str, None] = "0018_users_language_preference"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
# (table, column) — 仅列「00020018 未 add_column 且老库常缺」的字段
_LEGACY_COLUMNS: tuple[tuple[str, sa.Column], ...] = (
(
"segments",
sa.Column("audio_duration_seconds", sa.Integer(), nullable=True),
),
(
"conversations",
sa.Column("deleted_at", sa.DateTime(timezone=True), nullable=True),
),
(
"segments",
sa.Column("tts_audio_urls", sa.JSON(), nullable=True),
),
(
"conversation_messages",
sa.Column("tts_audio_urls", sa.JSON(), nullable=True),
),
)
def upgrade() -> None:
for table_name, column in _LEGACY_COLUMNS:
if not table_exists(table_name):
continue
add_column_if_missing(table_name, column)
def downgrade() -> None:
for table_name, column in reversed(_LEGACY_COLUMNS):
if not table_exists(table_name):
continue
drop_column_if_exists(table_name, column.name)

View File

@@ -0,0 +1,33 @@
"""修复已撤回 migration 写入的 alembic_version跨环境一次性兼容"""
from __future__ import annotations
from sqlalchemy import Connection, text
_WITHDRAWN_0020_REVISIONS = frozenset(
{
"0020_add_tts_audio_urls_column",
"0020_backfill_missing_schema",
"0020_backfill_all_missing_columns",
}
)
_REPAIR_TARGET_REVISION = "0018_users_language_preference"
def try_repair_withdrawn_0020_revision(conn: Connection) -> bool:
"""
若当前 stamp 为已撤回的 0020_*,回退到 0018 以便重新执行 0019_align_legacy_schema。
返回 True 表示已执行 UPDATE调用方负责 commit。
"""
row = conn.execute(text("SELECT version_num FROM alembic_version")).fetchone()
if row is None:
return False
current = row[0]
if current not in _WITHDRAWN_0020_REVISIONS:
return False
conn.execute(
text("UPDATE alembic_version SET version_num = :target"),
{"target": _REPAIR_TARGET_REVISION},
)
return True

View File

@@ -21,10 +21,27 @@ logger = get_logger(__name__)
_API_DIR: Final[Path] = Path(__file__).resolve().parent.parent.parent
def _repair_withdrawn_revision_stamp_if_needed() -> None:
from sqlalchemy import create_engine
from app.core.alembic_revision_repair import try_repair_withdrawn_0020_revision
from app.core.db import _database_url
engine = create_engine(_database_url())
with engine.connect() as conn:
if try_repair_withdrawn_0020_revision(conn):
conn.commit()
logger.warning(
"alembic_version 曾为已撤回的 0020_*,已回退到 0018"
"将重新执行 0019_align_legacy_schema"
)
def _run_alembic_upgrade_once() -> None:
from alembic.command import upgrade
from alembic.config import Config
_repair_withdrawn_revision_stamp_if_needed()
cfg = Config(str(_API_DIR / "alembic.ini"))
upgrade(cfg, "head")

View File

@@ -304,6 +304,9 @@ print_alembic_failure_hint() {
print_warn "PostgreSQL 用户名或密码不匹配;请核对 .env.development 中的 DATABASE_URL"
elif [[ "${log_output}" == *"No such file or directory"* ]] || [[ "${log_output}" == *"can't open file"* ]]; then
print_warn "Alembic 依赖的文件或工作目录可能不正确;请确认在 api/ 目录运行脚本"
elif [[ "${log_output}" == *"Can't locate revision"* ]]; then
print_warn "alembic_version 与当前迁移链不一致(常见于已撤回的 0020_*"
print_warn "将自动修复:重启 development.sh或执行: uv run python scripts/repair_alembic_version_after_withdrawn_0020.py && uv run alembic upgrade head"
fi
}
@@ -413,6 +416,8 @@ run_migrations() {
local log_file
log_file="$(mktemp -t life-echo-alembic.XXXXXX.log)"
uv run python scripts/repair_alembic_version_after_withdrawn_0020.py >>"${log_file}" 2>&1 || true
if uv run alembic upgrade head >"${log_file}" 2>&1; then
print_ok "Alembic 迁移已就绪"
rm -f "${log_file}"

View File

@@ -0,0 +1,59 @@
# Alembic 迁移规范
## 设计原则
1. **`0001` 仅服务全新库**`create_all` 不会对已有表 `ALTER`。老库缺列必须用 **`0019` 及之后的显式迁移** 补齐。
2. **一条迁移 = 明确 DDL**:禁止用「全库 ORM 内省」替代清单;可审查、可测、各环境行为一致。
3. **已部署的 `revision` 永不改名**:文件名可描述用途,但 `revision = "..."` 一旦上线不得修改。
4. **新 ORM 字段同 PR 必须带迁移**:改 `models.py` 须同时 `alembic/versions/` 新增或扩展迁移(见 CI 测试)。
## 当前链
```
0001_initial → … → 0018_users_language_preference → 0019_align_legacy_schema (head)
```
`0019_align_legacy_schema` 维护「老库相对 0001 squash 常缺」的列清单,见 `alembic/versions/0019_align_legacy_schema.py``_LEGACY_COLUMNS`
## 新增字段流程
1. 修改 ORM `models.py`
2. 若表在 squash 前就存在且 00020018 未覆盖该列 → 在 `0019``_LEGACY_COLUMNS` 追加一行(未部署前),或新建 `0020_<short_desc>.py`
3. 本地:`uv run alembic upgrade head`
4. 提交前:`uv run pytest tests/test_alembic_migration_policy.py`
## Staging / Production 发布
| 环境 | 代码来源 | 说明 |
|------|----------|------|
| Staging | `main` push | 迁移在容器启动时 `upgrade head` |
| Production | tag `v*.*.*` | 发 tag 前确认 staging 已成功跑过同一迁移链 |
生产推荐:`ALEMBIC_STARTUP_FAIL_FAST=true`,迁移失败则进程退出。
## 撤回错误迁移 0019/0020 后的库修复
若数据库 `alembic_version` 仍为已删除的 revision例如 `0020_add_tts_audio_urls_column``0020_backfill_missing_schema`**部署新代码前**先执行:
```bash
cd api
# 仅当 version_num 为已撤回的 0020_* 时
uv run python scripts/repair_alembic_version_after_withdrawn_0020.py
```
或手动 SQL确认列已由旧迁移加过后再 stamp
```sql
SELECT version_num FROM alembic_version;
-- 若为 0020_add_tts_audio_urls_column 或 0020_backfill_missing_schema
UPDATE alembic_version SET version_num = '0018_users_language_preference';
```
然后部署;`0019_align_legacy_schema` 会幂等补齐缺列。
## 禁止事项
- 修改已上线 migration 的 `revision` / `down_revision`
-`0020_backfill_*` 式全表内省替代显式列清单
- 只在 ORM 加字段、指望 `create_all` 更新线上老库
- Staging 与 Production 长期使用不同迁移链(应同一 commit / 同一 tag 构建)

View File

@@ -0,0 +1,66 @@
#!/usr/bin/env python3
"""将误写入的 0020_* revision 回退到 0018以便重新执行 0019_align_legacy_schema。
用法(在 api 目录DATABASE_URL 已配置):
uv run python scripts/repair_alembic_version_after_withdrawn_0020.py
uv run python scripts/repair_alembic_version_after_withdrawn_0020.py --dry-run
启动时也会自动执行同等逻辑(见 app.core.alembic_startup
"""
from __future__ import annotations
import argparse
import sys
from pathlib import Path
_API_DIR = Path(__file__).resolve().parents[1]
if str(_API_DIR) not in sys.path:
sys.path.insert(0, str(_API_DIR))
from sqlalchemy import create_engine, text
from app.core.alembic_revision_repair import (
_WITHDRAWN_0020_REVISIONS,
try_repair_withdrawn_0020_revision,
)
from app.core.db import _database_url
def main() -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--dry-run",
action="store_true",
help="只检查,不写库",
)
args = parser.parse_args()
engine = create_engine(_database_url())
with engine.connect() as conn:
row = conn.execute(text("SELECT version_num FROM alembic_version")).fetchone()
current = row[0] if row else None
print(f"当前 version_num: {current!r}")
if current is None:
print("alembic_version 表为空,无需修复")
return 0
if args.dry_run:
if current in _WITHDRAWN_0020_REVISIONS:
print("[dry-run] 将回退到 0018_users_language_preference")
else:
print("无需修复")
return 0
if try_repair_withdrawn_0020_revision(conn):
conn.commit()
print("已回退到 0018_users_language_preference请执行: uv run alembic upgrade head")
else:
print("无需修复")
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,95 @@
"""Alembic 迁移链与项目策略的静态校验(不依赖线上 Postgres"""
from __future__ import annotations
import re
from pathlib import Path
import pytest
from alembic.config import Config
from alembic.script import ScriptDirectory
_API_DIR = Path(__file__).resolve().parent.parent
_VERSIONS_DIR = _API_DIR / "alembic" / "versions"
# 0019 必须显式覆盖的老库缺列(与 ORM / 历史事故相关)
_REQUIRED_LEGACY_COLUMNS = frozenset(
{
("segments", "audio_duration_seconds"),
("conversations", "deleted_at"),
("segments", "tts_audio_urls"),
("conversation_messages", "tts_audio_urls"),
}
)
_FORBIDDEN_WITHDRAWN_REVISIONS = frozenset(
{
"0020_add_tts_audio_urls_column",
"0020_backfill_missing_schema",
"0020_backfill_all_missing_columns",
"0019_backfill_missing_columns",
}
)
def _script_dir() -> ScriptDirectory:
cfg = Config(str(_API_DIR / "alembic.ini"))
return ScriptDirectory.from_config(cfg)
def test_single_alembic_head() -> None:
heads = _script_dir().get_heads()
assert heads == ["0019_align_legacy_schema"], f"unexpected heads: {heads}"
def test_no_withdrawn_revision_ids_in_tree() -> None:
for rev in _script_dir().walk_revisions():
assert rev.revision not in _FORBIDDEN_WITHDRAWN_REVISIONS, (
f"withdrawn revision still in tree: {rev.revision}"
)
def test_no_withdrawn_migration_files() -> None:
names = {p.name for p in _VERSIONS_DIR.glob("*.py")}
assert "0020_add_tts_audio_urls_column.py" not in names
assert "0019_backfill_missing_columns.py" not in names
def test_0019_align_legacy_schema_covers_required_columns() -> None:
path = _VERSIONS_DIR / "0019_align_legacy_schema.py"
src = path.read_text(encoding="utf-8")
assert 'revision: str = "0019_align_legacy_schema"' in src
assert "Base.metadata" not in src, "0019 must not introspect full ORM metadata"
assert "sorted_tables" not in src
found: set[tuple[str, str]] = set()
for table, column in _REQUIRED_LEGACY_COLUMNS:
if f'"{table}"' in src and f'"{column}"' in src:
found.add((table, column))
missing = _REQUIRED_LEGACY_COLUMNS - found
assert not missing, f"0019 missing explicit legacy columns: {missing}"
def test_all_revisions_have_unique_ids() -> None:
ids: list[str] = []
for rev in _script_dir().walk_revisions():
ids.append(rev.revision)
assert len(ids) == len(set(ids)), "duplicate revision ids"
def test_revision_chain_reaches_0019_from_0018() -> None:
script = _script_dir()
rev = script.get_revision("0019_align_legacy_schema")
assert rev is not None
assert rev.down_revision == "0018_users_language_preference"
def test_no_autogenerate_introspection_backfill_pattern() -> None:
"""禁止再次引入「遍历 ORM 全表补列」类迁移。"""
pattern = re.compile(r"for table in Base\.metadata\.sorted_tables")
for path in _VERSIONS_DIR.glob("*.py"):
text = path.read_text(encoding="utf-8")
assert not pattern.search(text), (
f"{path.name} uses full-ORM introspection backfill; use explicit column list"
)