* update github actions * update github actions * update github actions * update github actions * update github actions * update github actions * update github actions --------- Co-authored-by: Kevin <kevin@brighteng.org>
172 lines
6.6 KiB
Python
172 lines
6.6 KiB
Python
"""
|
||
一键执行 chapter_sections 迁移:先执行 SQL 建表/加列,再回填数据并删列。
|
||
|
||
依赖:.env 中 DATABASE_URL,以及 psycopg、python-dotenv。
|
||
用法(在 api 目录下):
|
||
python -m scripts.run_chapter_sections_migration
|
||
"""
|
||
import json
|
||
import logging
|
||
import os
|
||
import sys
|
||
import uuid
|
||
from pathlib import Path
|
||
|
||
# 仅加载 .env,不导入 database(避免 asyncpg 等依赖)
|
||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||
os.chdir(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||
|
||
from dotenv import load_dotenv
|
||
load_dotenv()
|
||
|
||
import psycopg
|
||
from sqlalchemy import create_engine, text
|
||
from sqlalchemy.engine import Engine
|
||
from urllib.parse import urlsplit
|
||
|
||
logging.basicConfig(level=logging.INFO, format="%(message)s")
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
def get_engine() -> Engine:
|
||
migration_url = os.getenv("MIGRATION_DATABASE_URL")
|
||
database_url = os.getenv("DATABASE_URL", "postgresql://postgres:postgres@localhost:5432/life_echo")
|
||
url = migration_url or database_url
|
||
# region agent log
|
||
logger.info(
|
||
"DEBUG migration env selection: migration_present=%s database_present=%s selected=%s selected_host=%s migration_host=%s database_host=%s",
|
||
bool(migration_url),
|
||
bool(database_url),
|
||
"MIGRATION_DATABASE_URL" if migration_url else "DATABASE_URL",
|
||
urlsplit(url).hostname or "<missing-host>",
|
||
urlsplit(migration_url).hostname if migration_url else "<empty>",
|
||
urlsplit(database_url).hostname or "<missing-host>",
|
||
)
|
||
# endregion agent log
|
||
return create_engine(url.replace("postgresql://", "postgresql+psycopg://"), pool_pre_ping=True)
|
||
|
||
|
||
def run_sql_migration(engine: Engine):
|
||
sql_path = Path(__file__).parent.parent / "migrations" / "add_chapter_sections.sql"
|
||
sql = sql_path.read_text(encoding="utf-8")
|
||
# 按 DO $$ ... $$; 与普通 ; 拆分,避免把 PL/pgSQL 块拆碎
|
||
stmts = []
|
||
rest = sql
|
||
while rest:
|
||
rest = rest.lstrip()
|
||
if rest.startswith("--"):
|
||
rest = rest[rest.find("\n") + 1:] if "\n" in rest else ""
|
||
continue
|
||
if rest.upper().startswith("DO "):
|
||
# 找到 $$; 或 $$ ;
|
||
i = rest.find("$$")
|
||
if i == -1:
|
||
break
|
||
j = rest.find("$$", i + 2)
|
||
if j == -1:
|
||
break
|
||
stmts.append(rest[: j + 2].strip() + ";")
|
||
rest = rest[j + 2:].lstrip().lstrip(";").lstrip()
|
||
continue
|
||
idx = rest.find(";")
|
||
if idx == -1:
|
||
break
|
||
part = rest[: idx].strip()
|
||
rest = rest[idx + 1:]
|
||
if part and not part.startswith("--"):
|
||
stmts.append(part + ";")
|
||
with engine.begin() as conn:
|
||
for i, s in enumerate(stmts):
|
||
try:
|
||
conn.execute(text(s))
|
||
logger.info(" SQL %s OK", i + 1)
|
||
except Exception as e:
|
||
if "already exists" in str(e).lower():
|
||
logger.info(" SQL %s (已存在)", i + 1)
|
||
continue
|
||
raise
|
||
logger.info("1/2 SQL 迁移完成")
|
||
|
||
|
||
def run_data_migration(engine: Engine):
|
||
from services.memoir_images.parser import split_narrative_to_sections
|
||
|
||
with engine.connect() as conn:
|
||
r = conn.execute(text("""
|
||
SELECT column_name FROM information_schema.columns
|
||
WHERE table_schema = 'public' AND table_name = 'chapters' AND column_name = 'content'
|
||
"""))
|
||
if r.fetchone() is None:
|
||
logger.info("chapters.content 已不存在,跳过数据迁移")
|
||
return
|
||
|
||
rows = conn.execute(text("""
|
||
SELECT id, content, images FROM chapters WHERE content IS NOT NULL AND trim(content) != ''
|
||
""")).fetchall()
|
||
|
||
for row in rows:
|
||
ch_id, content, images_raw = row[0], row[1], row[2]
|
||
if isinstance(images_raw, str):
|
||
try:
|
||
images = json.loads(images_raw)
|
||
except Exception:
|
||
images = []
|
||
else:
|
||
images = images_raw if isinstance(images_raw, list) else []
|
||
|
||
sections = split_narrative_to_sections(content or "")
|
||
if not sections:
|
||
section_id = str(uuid.uuid4()).replace("-", "")[:32]
|
||
conn.execute(text("""
|
||
INSERT INTO chapter_sections (id, chapter_id, order_index, content, image, updated_at)
|
||
VALUES (:id, :ch_id, 0, :content, NULL, NOW())
|
||
"""), {"id": section_id, "ch_id": ch_id, "content": (content or "").strip()})
|
||
conn.commit()
|
||
logger.info("章节 %s: 1 条 section(无图)", ch_id)
|
||
continue
|
||
|
||
first_cover = None
|
||
img_index = 0
|
||
for order_idx, seg in enumerate(sections):
|
||
section_id = str(uuid.uuid4()).replace("-", "")[:32]
|
||
seg_content = seg.get("content") or ""
|
||
ph = seg.get("placeholder_info")
|
||
image_json = None
|
||
if ph is not None and img_index < len(images):
|
||
image_json = json.dumps(images[img_index]) if isinstance(images[img_index], dict) else None
|
||
if first_cover is None and image_json:
|
||
first_cover = image_json
|
||
img_index += 1
|
||
|
||
conn.execute(text("""
|
||
INSERT INTO chapter_sections (id, chapter_id, order_index, content, image, updated_at)
|
||
VALUES (:id, :ch_id, :ord, :content, CAST(:img AS jsonb), NOW())
|
||
"""), {
|
||
"id": section_id,
|
||
"ch_id": ch_id,
|
||
"ord": order_idx,
|
||
"content": seg_content,
|
||
"img": image_json,
|
||
})
|
||
if first_cover:
|
||
conn.execute(
|
||
text("UPDATE chapters SET cover_image = CAST(:img AS jsonb) WHERE id = :id"),
|
||
{"img": first_cover, "id": ch_id},
|
||
)
|
||
conn.commit()
|
||
logger.info("章节 %s: %d 条 sections", ch_id, len(sections))
|
||
|
||
conn.execute(text("ALTER TABLE chapters DROP COLUMN IF EXISTS content"))
|
||
conn.execute(text("ALTER TABLE chapters DROP COLUMN IF EXISTS images"))
|
||
conn.commit()
|
||
logger.info("已删除 chapters.content 与 chapters.images")
|
||
logger.info("2/2 数据迁移完成")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
logger.info("开始 chapter_sections 迁移…")
|
||
engine = get_engine()
|
||
run_sql_migration(engine)
|
||
run_data_migration(engine)
|
||
logger.info("迁移全部完成")
|