chore/ 删除无用文件

This commit is contained in:
Kevin
2026-03-19 14:36:14 +08:00
parent 2f60858c9c
commit c6e07ce5ca
135 changed files with 2111 additions and 4510 deletions

View File

@@ -1,98 +0,0 @@
"""
将 chapters 的 content + images 迁移到 chapter_sections并删除 chapters.content / chapters.images。
前置:已执行 api/migrations/add_chapter_sections.sql创建 chapter_sections 表、chapters.cover_image 列)。
用法(在 api 目录下):
python -m scripts.migrate_chapters_to_sections
"""
import json
import os
import sys
import uuid
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from sqlalchemy import text
from app.core.db import sync_engine as engine
from app.features.memoir.memoir_images.parser import split_narrative_to_sections
from app.core.logging import get_logger, setup_logging
setup_logging()
logger = get_logger(__name__)
def run():
with engine.connect() as conn:
# 检查是否还存在 content 列(若已删则跳过)
r = conn.execute(text("""
SELECT column_name FROM information_schema.columns
WHERE table_schema = 'public' AND table_name = 'chapters' AND column_name = 'content'
"""))
if r.fetchone() is None:
logger.info("chapters.content 已不存在,跳过迁移")
return
# 读取所有有 content 的章节(原始列)
rows = conn.execute(text("""
SELECT id, content, images FROM chapters WHERE content IS NOT NULL AND trim(content) != ''
""")).fetchall()
for row in rows:
ch_id, content, images_raw = row[0], row[1], row[2]
images = json.loads(images_raw) if isinstance(images_raw, str) else (images_raw or [])
if not isinstance(images, list):
images = []
sections = split_narrative_to_sections(content or "")
if not sections:
# 无占位符:整段为一条 section无图
section_id = str(uuid.uuid4()).replace("-", "")[:32]
conn.execute(text("""
INSERT INTO chapter_sections (id, chapter_id, order_index, content, image, updated_at)
VALUES (:id, :ch_id, 0, :content, NULL, NOW())
"""), {"id": section_id, "ch_id": ch_id, "content": (content or "").strip()})
conn.commit()
logger.info("章节 %s: 1 条 section无图", ch_id)
continue
first_cover = None
img_index = 0
for order_idx, seg in enumerate(sections):
section_id = str(uuid.uuid4()).replace("-", "")[:32]
seg_content = seg.get("content") or ""
ph = seg.get("placeholder_info")
image_json = None
if ph is not None and img_index < len(images):
image_json = json.dumps(images[img_index]) if isinstance(images[img_index], dict) else None
if first_cover is None and image_json:
first_cover = image_json
img_index += 1
conn.execute(text("""
INSERT INTO chapter_sections (id, chapter_id, order_index, content, image, updated_at)
VALUES (:id, :ch_id, :ord, :content, :img::jsonb, NOW())
"""), {
"id": section_id,
"ch_id": ch_id,
"ord": order_idx,
"content": seg_content,
"img": image_json,
})
if first_cover:
conn.execute(
text("UPDATE chapters SET cover_image = :img::jsonb WHERE id = :id"),
{"img": first_cover, "id": ch_id},
)
conn.commit()
logger.info("章节 %s: %d 条 sections", ch_id, len(sections))
# 删除 chapters.content 和 chapters.images
conn.execute(text("ALTER TABLE chapters DROP COLUMN IF EXISTS content"))
conn.execute(text("ALTER TABLE chapters DROP COLUMN IF EXISTS images"))
conn.commit()
logger.info("已删除 chapters.content 与 chapters.images")
if __name__ == "__main__":
run()

View File

@@ -1,686 +0,0 @@
"""
重新整理用户历史对话为回忆录章节(远程预览 + 确认后写入)
用法:
cd api
# 第一步:预览(只读远程 DB本地生成新章节输出对比 Markdown
python -m scripts.reprocess_user_memoir preview --phone 13800138000
# 第二步:确认后写入远程 DB
python -m scripts.reprocess_user_memoir apply --phone 13800138000
流程:
preview:
1. SSH 隧道连接远程 PostgreSQL
2. 读取用户现有章节 + 所有历史对话段落
3. 本地调用 LLM 生成新章节(不写入远程 DB
4. 输出对比 Markdown 表格 + 保存结果到 JSON 文件
apply:
1. 读取上次 preview 保存的 JSON 文件
2. SSH 隧道连接远程 PostgreSQL
3. 旧章节 is_active=False写入新章节
"""
import argparse
import json
import os
import sys
import uuid
import time
from datetime import datetime, timezone
from typing import Dict, List, Optional
from dataclasses import dataclass, field, asdict
# 确保 api/ 目录在 sys.path 中
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# 配置由 app.core.config.settings 统一加载
import socket
import subprocess
import signal
from sqlalchemy import create_engine, select
from sqlalchemy.orm import sessionmaker, Session
from app.core.db import Base
from app.features.conversation.models import Conversation, Segment
from app.features.memoir.models import Book, Chapter, ChapterSection, MemoirState
from app.features.user.models import User
from app.core.dependencies import get_llm_provider
from app.agents.state_schema import MemoirStateSchema, SlotData, default_state
from app.agents.memoir.prompts import (
get_creative_title_prompt,
get_narrative_prompt,
get_state_extraction_prompt,
inject_image_placeholder_template,
STAGE_TO_ORDER,
)
from app.features.memoir.memoir_images.json_payload import extract_json_payload
from app.features.memoir.memoir_images.parser import split_narrative_to_sections
from app.core.logging import get_logger, setup_logging
setup_logging()
logger = get_logger(__name__)
# ── SSH / DB 配置 ──────────────────────────────────────────────
SSH_HOST = "1.15.29.57"
SSH_PORT = 22
SSH_USER = "root"
SSH_KEY_PATH = os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
"..", "certs", "key.crt",
)
REMOTE_PG_HOST = "127.0.0.1"
REMOTE_PG_PORT = 5432
PG_USER = "postgres"
PG_PASSWORD = "postgres"
PG_DATABASE = "life_echo"
OUTPUT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "output")
# ── 关键字阶段检测 ────────────────────────────────────────────
STAGE_KEYWORDS = {
"childhood": ["童年", "小时候", "出生", "家乡", "小镇"],
"education": ["上学", "学校", "老师", "同学", "教育", "大学"],
"career": ["工作", "职业", "事业", "公司", "同事", "创业"],
"family": ["伴侣", "孩子", "家庭", "家人", "结婚", "父母"],
"belief": ["信念", "价值观", "座右铭", "坚持", "原则"],
}
def _detect_stage(text: str, fallback: str) -> str:
msg = text.lower()
for stage, keywords in STAGE_KEYWORDS.items():
if any(w in msg for w in keywords):
return stage
return fallback
# ── SSH 隧道 + DB 会话 ────────────────────────────────────────
class SshTunnel:
"""用 ssh -L 子进程建立隧道,兼容所有 paramiko 版本"""
def __init__(self, local_port: int = 15432):
self.local_port = local_port
self._proc: Optional[subprocess.Popen] = None
def start(self):
key_path = os.path.normpath(SSH_KEY_PATH)
cmd = [
"ssh", "-N", "-L",
f"{self.local_port}:{REMOTE_PG_HOST}:{REMOTE_PG_PORT}",
"-i", key_path,
"-p", str(SSH_PORT),
"-o", "StrictHostKeyChecking=no",
"-o", "ExitOnForwardFailure=yes",
"-o", "BatchMode=yes",
f"{SSH_USER}@{SSH_HOST}",
]
logger.info(f"SSH 隧道: {SSH_USER}@{SSH_HOST}:{SSH_PORT} -> 127.0.0.1:{self.local_port}, key={key_path}")
self._proc = subprocess.Popen(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE)
# 等待隧道端口可连接(最多 15 秒)
for attempt in range(30):
if self._proc.poll() is not None:
err = self._proc.stderr.read().decode() if self._proc.stderr else ""
raise RuntimeError(f"SSH 隧道进程已退出: {err}")
try:
sock = socket.create_connection(("127.0.0.1", self.local_port), timeout=1)
sock.close()
logger.info(f"SSH 隧道已建立, 本地端口: {self.local_port} (耗时 {attempt * 0.5:.1f}s)")
return
except (ConnectionRefusedError, OSError):
time.sleep(0.5)
# 超时
err = ""
if self._proc.poll() is not None and self._proc.stderr:
err = self._proc.stderr.read().decode()
raise RuntimeError(f"SSH 隧道端口 {self.local_port} 超时未就绪: {err}")
def stop(self):
if self._proc and self._proc.poll() is None:
self._proc.send_signal(signal.SIGTERM)
self._proc.wait(timeout=5)
logger.info("SSH 隧道已关闭")
@property
def local_bind_port(self) -> int:
return self.local_port
def open_ssh_tunnel() -> SshTunnel:
tunnel = SshTunnel()
tunnel.start()
return tunnel
def make_session(tunnel: SshTunnel) -> Session:
url = (
f"postgresql://{PG_USER}:{PG_PASSWORD}"
f"@127.0.0.1:{tunnel.local_bind_port}/{PG_DATABASE}"
)
engine = create_engine(url, pool_size=2, max_overflow=2)
return sessionmaker(bind=engine)()
# ── 数据结构:保存生成结果 ────────────────────────────────────
@dataclass
class GeneratedChapter:
category: str
title: str
content: str
order_index: int
source_segment_ids: List[str] = field(default_factory=list)
@dataclass
class PreviewResult:
user_id: str
phone: str
nickname: str
generated_at: str
old_chapters: List[dict] = field(default_factory=list) # {category, title, content_len, content_preview}
new_chapters: List[dict] = field(default_factory=list) # same shape + full content
# ── 核心:本地生成章节 ────────────────────────────────────────
def extract_slots_with_llm(llm, text: str, current_stage: str, stage_slots: dict):
try:
prompt = get_state_extraction_prompt(
user_message=text,
current_stage=current_stage,
stage_slots=stage_slots,
)
json_llm = llm.bind(
model_kwargs={"response_format": {"type": "json_object"}},
max_tokens=1024,
)
response = json_llm.invoke(prompt)
parsed = json.loads(extract_json_payload(response.content.strip()))
return parsed.get("detected_stage", current_stage), parsed.get("slots", {}) or {}
except Exception as e:
logger.warning(f"LLM slot 提取失败: {e}")
return current_stage, {}
def generate_chapters_in_memory(
segments: list, # list of (id, transcript_text)
llm,
batch_size: int,
skip_llm_slots: bool,
) -> List[GeneratedChapter]:
"""纯内存生成章节,不写任何 DB"""
state = default_state()
# 1. 阶段检测 & slot 提取(内存 state
stage_to_segments: Dict[str, list] = {}
for idx, (seg_id, text) in enumerate(segments, 1):
if not text or not text.strip():
continue
detected_stage = _detect_stage(text, state.current_stage)
if not skip_llm_slots:
try:
detected_stage, extracted_slots = extract_slots_with_llm(
llm, text, state.current_stage, state.slots.get(detected_stage, {})
)
# 内存更新 state slots
for slot_name, snippet in extracted_slots.items():
stage_slots = state.slots.get(detected_stage, {})
stage_slots[slot_name] = SlotData(snippet=snippet, segment_ids=[seg_id])
state.slots[detected_stage] = stage_slots
state.current_stage = detected_stage
except Exception as e:
logger.warning(f"段落 {idx} slot 提取失败: {e}")
stage_to_segments.setdefault(detected_stage, []).append((seg_id, text))
if idx % 20 == 0:
logger.info(f"阶段检测进度: {idx}/{len(segments)}")
for stage, segs in stage_to_segments.items():
logger.info(f"阶段 [{stage}]: {len(segs)} 条段落")
# 2. 按阶段分批生成
results: List[GeneratedChapter] = []
for stage, seg_list in stage_to_segments.items():
title = f"{stage} 回忆"
existing_content = ""
all_source_ids: List[str] = []
slot_snippets = {
key: value.snippet
for key, value in (state.slots.get(stage, {}) or {}).items()
if value.snippet
}
for i in range(0, len(seg_list), batch_size):
batch = seg_list[i : i + batch_size]
batch_num = i // batch_size + 1
total_batches = (len(seg_list) + batch_size - 1) // batch_size
logger.info(f"[{stage}] 处理第 {batch_num}/{total_batches} 批 ({len(batch)} 条)")
combined_text = "\n\n".join(text for _, text in batch)
source_ids = [sid for sid, _ in batch]
all_source_ids.extend(source_ids)
narrative = combined_text # fallback
try:
if not existing_content:
# 第一批 → 生成标题
title_prompt = get_creative_title_prompt(
stage=stage, emotion="neutral", slots=slot_snippets
)
title_response = llm.invoke(title_prompt)
title = title_response.content.strip().strip('"')
logger.info(f"[{stage}] 生成标题: {title}")
narrative_prompt = get_narrative_prompt(
stage=stage,
slots=slot_snippets,
new_content=combined_text,
existing_content=existing_content,
)
narrative_response = llm.invoke(narrative_prompt)
new_narrative = narrative_response.content.strip()
if existing_content:
narrative = f"{existing_content}\n\n{new_narrative}"
else:
narrative = new_narrative
except Exception as e:
logger.warning(f"[{stage}] LLM 生成失败: {e}")
if existing_content:
narrative = f"{existing_content}\n\n{combined_text}"
# 安全检查
if existing_content and len(narrative) < len(existing_content) * 0.8:
logger.warning(f"[{stage}] 内容长度异常, 回退追加模式")
narrative = f"{existing_content}\n\n{combined_text}"
existing_content = narrative
logger.info(f"[{stage}] 批次 {batch_num} 完成, 累计长度: {len(existing_content)}")
if i + batch_size < len(seg_list):
time.sleep(1)
# 入库前:占位符位置用正则匹配后拼上固定模板
content_to_save = inject_image_placeholder_template(existing_content)
results.append(GeneratedChapter(
category=stage,
title=title,
content=content_to_save,
order_index=STAGE_TO_ORDER.get(stage, 999),
source_segment_ids=all_source_ids,
))
return results
# ── preview 命令 ──────────────────────────────────────────────
def cmd_preview(phone: str, batch_size: int, skip_llm_slots: bool):
# LLM
llm = getattr(get_llm_provider(), "langchain_llm", None)
if not llm:
logger.error("LLM 未配置,请检查 .env 中的 DEEPSEEK_API_KEY")
sys.exit(1)
logger.info("LLM 就绪")
tunnel = open_ssh_tunnel()
try:
db = make_session(tunnel)
try:
# 找用户
user = db.execute(select(User).where(User.phone == phone)).scalar_one_or_none()
if not user:
logger.error(f"未找到手机号 {phone} 的用户")
sys.exit(1)
user_id = user.id
nickname = user.nickname
logger.info(f"用户: {nickname} (id={user_id})")
# 读取现有 active 章节(含 sections正文从 sections 拼接)
from sqlalchemy.orm import joinedload
old_chapters = (
db.execute(
select(Chapter)
.where(Chapter.user_id == user_id, Chapter.is_active == True)
.options(joinedload(Chapter.sections))
.order_by(Chapter.order_index)
)
.unique()
.scalars()
.all()
)
old_chapter_data = []
for ch in old_chapters:
content = ""
if getattr(ch, "sections", None):
content = "\n\n".join(
(s.content or "").strip()
for s in sorted(ch.sections, key=lambda x: x.order_index)
if (s.content or "").strip()
)
content_len = len(content)
content_preview = (content[:200] + "") if content_len > 200 else content
old_chapter_data.append({
"category": ch.category,
"title": ch.title,
"content_len": content_len,
"content_preview": content_preview,
})
logger.info(f"现有章节: {len(old_chapters)}")
# 读取所有段落
segments_raw = (
db.execute(
select(Segment.id, Segment.transcript_text)
.join(Conversation, Segment.conversation_id == Conversation.id)
.where(Conversation.user_id == user_id)
.order_by(Segment.created_at.asc())
)
.all()
)
logger.info(f"历史段落: {len(segments_raw)}")
if not segments_raw:
logger.warning("没有对话段落,无需处理")
return
finally:
db.close()
finally:
tunnel.stop()
# 在本地生成新章节(不需要 DB
seg_tuples = [(row[0], row[1]) for row in segments_raw]
new_chapters = generate_chapters_in_memory(seg_tuples, llm, batch_size, skip_llm_slots)
# 构建对比结果
new_chapter_data = []
for ch in new_chapters:
new_chapter_data.append({
"category": ch.category,
"title": ch.title,
"content_len": len(ch.content),
"content_preview": (ch.content[:200] + "") if len(ch.content) > 200 else ch.content,
"content": ch.content,
"order_index": ch.order_index,
"source_segment_ids": ch.source_segment_ids,
})
result = PreviewResult(
user_id=user_id,
phone=phone,
nickname=nickname,
generated_at=datetime.now(timezone.utc).isoformat(),
old_chapters=old_chapter_data,
new_chapters=new_chapter_data,
)
# 保存 JSON
os.makedirs(OUTPUT_DIR, exist_ok=True)
json_path = os.path.join(OUTPUT_DIR, f"preview_{phone}.json")
with open(json_path, "w", encoding="utf-8") as f:
json.dump(asdict(result), f, ensure_ascii=False, indent=2)
logger.info(f"预览结果已保存: {json_path}")
# 输出 Markdown
md_path = os.path.join(OUTPUT_DIR, f"preview_{phone}.md")
md_lines = _build_comparison_markdown(result)
with open(md_path, "w", encoding="utf-8") as f:
f.write(md_lines)
logger.info(f"对比 Markdown 已保存: {md_path}")
# 同时打印到终端
print("\n" + md_lines)
def _build_comparison_markdown(result: PreviewResult) -> str:
lines = []
lines.append(f"# 回忆录重整对比 — {result.nickname} ({result.phone})")
lines.append(f"\n生成时间: {result.generated_at}\n")
# 总览表格
lines.append("## 总览\n")
lines.append("| 阶段 | 旧标题 | 旧字数 | 新标题 | 新字数 | 变化 |")
lines.append("|------|--------|--------|--------|--------|------|")
old_map = {ch["category"]: ch for ch in result.old_chapters}
new_map = {ch["category"]: ch for ch in result.new_chapters}
all_stages = list(dict.fromkeys(
[ch["category"] for ch in result.old_chapters]
+ [ch["category"] for ch in result.new_chapters]
))
total_old = 0
total_new = 0
for stage in all_stages:
old = old_map.get(stage)
new = new_map.get(stage)
old_title = old["title"] if old else ""
old_len = old["content_len"] if old else 0
new_title = new["title"] if new else ""
new_len = new["content_len"] if new else 0
total_old += old_len
total_new += new_len
diff = new_len - old_len
diff_str = f"+{diff}" if diff >= 0 else str(diff)
lines.append(f"| {stage} | {old_title} | {old_len} | {new_title} | {new_len} | {diff_str} |")
diff_total = total_new - total_old
diff_total_str = f"+{diff_total}" if diff_total >= 0 else str(diff_total)
lines.append(f"| **合计** | | **{total_old}** | | **{total_new}** | **{diff_total_str}** |")
# 各章节详细对比
lines.append("\n---\n")
lines.append("## 各章节详细对比\n")
for stage in all_stages:
old = old_map.get(stage)
new = new_map.get(stage)
lines.append(f"### {stage}\n")
lines.append("**旧内容预览:**\n")
if old:
lines.append(f"> {old['content_preview']}\n")
else:
lines.append("> (无)\n")
lines.append("**新内容预览:**\n")
if new:
lines.append(f"> {new['content_preview']}\n")
else:
lines.append("> (无)\n")
# 新章节完整内容
lines.append("\n---\n")
lines.append("## 新章节完整内容\n")
for ch in result.new_chapters:
lines.append(f"### {ch['title']} ({ch['category']}, {ch['content_len']} 字)\n")
lines.append(ch["content"])
lines.append("\n")
return "\n".join(lines)
# ── apply 命令 ────────────────────────────────────────────────
def cmd_apply(phone: str):
json_path = os.path.join(OUTPUT_DIR, f"preview_{phone}.json")
if not os.path.exists(json_path):
logger.error(f"未找到预览文件: {json_path}")
logger.error("请先运行 preview 命令")
sys.exit(1)
with open(json_path, "r", encoding="utf-8") as f:
data = json.load(f)
user_id = data["user_id"]
new_chapters = data["new_chapters"]
logger.info(f"将写入 {len(new_chapters)} 个新章节到用户 {data['nickname']} ({user_id})")
# 确认
answer = input("\n确认写入远程数据库? (yes/no): ").strip().lower()
if answer != "yes":
logger.info("已取消")
return
tunnel = open_ssh_tunnel()
try:
db = make_session(tunnel)
try:
# 1. 旧章节 → inactive
old_active = (
db.execute(
select(Chapter).where(
Chapter.user_id == user_id, Chapter.is_active == True
)
)
.scalars()
.all()
)
for ch in old_active:
ch.is_active = False
logger.info(f"已将 {len(old_active)} 个旧章节标记为 inactive")
# 2. 删除旧 MemoirState
old_state = db.execute(
select(MemoirState).where(MemoirState.user_id == user_id)
).scalar_one_or_none()
if old_state:
db.delete(old_state)
logger.info("已删除旧 MemoirState")
# 3. 创建新 MemoirState
ds = default_state()
db.add(MemoirState(
id=str(uuid.uuid4()),
user_id=user_id,
stage_order=ds.stage_order,
current_stage=ds.current_stage,
covered_stages=ds.covered_stages,
slots={k: {sk: sv.model_dump() for sk, sv in v.items()} for k, v in ds.slots.items()},
))
# 4. 插入新章节(无 content/images正文与配图写入 chapter_sections
last_chapter_id = None
for ch_data in new_chapters:
ch_id = str(uuid.uuid4())
chapter = Chapter(
id=ch_id,
user_id=user_id,
title=ch_data["title"],
order_index=ch_data["order_index"],
status="completed",
category=ch_data["category"],
cover_image=None,
is_new=True,
source_segments=ch_data.get("source_segment_ids", []),
)
db.add(chapter)
db.flush()
content = ch_data.get("content") or ""
sections = split_narrative_to_sections(content)
if not sections:
db.add(ChapterSection(
id=str(uuid.uuid4()).replace("-", "")[:32],
chapter_id=ch_id,
order_index=0,
content=content.strip(),
image=None,
))
else:
for order_idx, seg in enumerate(sections):
db.add(ChapterSection(
id=str(uuid.uuid4()).replace("-", "")[:32],
chapter_id=ch_id,
order_index=order_idx,
content=(seg.get("content") or "").strip(),
image=None,
))
last_chapter_id = ch_id
logger.info(f" 新建章节: [{ch_data['category']}] {ch_data['title']}{ch_data['content_len']}")
# 5. 更新 Book
book = db.execute(
select(Book).where(Book.user_id == user_id).order_by(Book.updated_at.desc())
).scalar_one_or_none()
if not book:
book = Book(
id=str(uuid.uuid4()),
user_id=user_id,
title="我的回忆录",
total_pages=0,
total_words=0,
cover_image_url=None,
)
db.add(book)
book.has_update = True
if last_chapter_id:
book.last_update_chapter_id = last_chapter_id
# 6. 标记所有段落为已处理
segs = (
db.execute(
select(Segment)
.join(Conversation, Segment.conversation_id == Conversation.id)
.where(Conversation.user_id == user_id)
)
.scalars()
.all()
)
for seg in segs:
seg.processed = True
db.commit()
logger.info("远程数据库写入完成!")
finally:
db.close()
finally:
tunnel.stop()
# ── CLI 入口 ──────────────────────────────────────────────────
def main():
parser = argparse.ArgumentParser(description="重新整理用户历史对话为回忆录章节(远程预览+写入)")
sub = parser.add_subparsers(dest="command", required=True)
# preview
p_preview = sub.add_parser("preview", help="预览:读取远程 DB本地生成新章节输出对比")
p_preview.add_argument("--phone", required=True, help="用户手机号")
p_preview.add_argument("--batch-size", type=int, default=5, help="每批段落数(默认 5")
p_preview.add_argument("--skip-llm-slots", action="store_true", help="跳过 LLM slot 提取")
# apply
p_apply = sub.add_parser("apply", help="写入:将 preview 结果写入远程 DB")
p_apply.add_argument("--phone", required=True, help="用户手机号(需与 preview 一致)")
args = parser.parse_args()
if args.command == "preview":
cmd_preview(phone=args.phone, batch_size=args.batch_size, skip_llm_slots=args.skip_llm_slots)
elif args.command == "apply":
cmd_apply(phone=args.phone)
if __name__ == "__main__":
main()

View File

@@ -1,155 +0,0 @@
"""
一键执行 chapter_sections 迁移:先执行 SQL 建表/加列,再回填数据并删列。
依赖:.env 中 DATABASE_URL以及 python-dotenv。
用法(在 api 目录下):
python -m scripts.run_chapter_sections_migration
"""
import json
import os
import sys
import uuid
from pathlib import Path
# 配置由 app.core.config 加载DB 使用 psycopg 同步驱动
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
os.chdir(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from sqlalchemy import create_engine, text
from sqlalchemy.engine import Engine
from app.core.config import settings
from app.core.db import ensure_psycopg_url
from app.core.logging import get_logger, setup_logging
setup_logging()
logger = get_logger(__name__)
def get_engine() -> Engine:
url = (settings.migration_database_url or "").strip() or settings.database_url
return create_engine(ensure_psycopg_url(url), pool_pre_ping=True)
def run_sql_migration(engine: Engine):
sql_path = Path(__file__).parent.parent / "migrations" / "add_chapter_sections.sql"
sql = sql_path.read_text(encoding="utf-8")
# 按 DO $$ ... $$; 与普通 ; 拆分,避免把 PL/pgSQL 块拆碎
stmts = []
rest = sql
while rest:
rest = rest.lstrip()
if rest.startswith("--"):
rest = rest[rest.find("\n") + 1:] if "\n" in rest else ""
continue
if rest.upper().startswith("DO "):
# 找到 $$; 或 $$ ;
i = rest.find("$$")
if i == -1:
break
j = rest.find("$$", i + 2)
if j == -1:
break
stmts.append(rest[: j + 2].strip() + ";")
rest = rest[j + 2:].lstrip().lstrip(";").lstrip()
continue
idx = rest.find(";")
if idx == -1:
break
part = rest[: idx].strip()
rest = rest[idx + 1:]
if part and not part.startswith("--"):
stmts.append(part + ";")
with engine.begin() as conn:
for i, s in enumerate(stmts):
try:
conn.execute(text(s))
logger.info(" SQL %s OK", i + 1)
except Exception as e:
if "already exists" in str(e).lower():
logger.info(" SQL %s (已存在)", i + 1)
continue
raise
logger.info("1/2 SQL 迁移完成")
def run_data_migration(engine: Engine):
from app.features.memoir.memoir_images.parser import split_narrative_to_sections
with engine.connect() as conn:
r = conn.execute(text("""
SELECT column_name FROM information_schema.columns
WHERE table_schema = 'public' AND table_name = 'chapters' AND column_name = 'content'
"""))
if r.fetchone() is None:
logger.info("chapters.content 已不存在,跳过数据迁移")
return
rows = conn.execute(text("""
SELECT id, content, images FROM chapters WHERE content IS NOT NULL AND trim(content) != ''
""")).fetchall()
for row in rows:
ch_id, content, images_raw = row[0], row[1], row[2]
if isinstance(images_raw, str):
try:
images = json.loads(images_raw)
except Exception:
images = []
else:
images = images_raw if isinstance(images_raw, list) else []
sections = split_narrative_to_sections(content or "")
if not sections:
section_id = str(uuid.uuid4()).replace("-", "")[:32]
conn.execute(text("""
INSERT INTO chapter_sections (id, chapter_id, order_index, content, image, updated_at)
VALUES (:id, :ch_id, 0, :content, NULL, NOW())
"""), {"id": section_id, "ch_id": ch_id, "content": (content or "").strip()})
conn.commit()
logger.info("章节 %s: 1 条 section无图", ch_id)
continue
first_cover = None
img_index = 0
for order_idx, seg in enumerate(sections):
section_id = str(uuid.uuid4()).replace("-", "")[:32]
seg_content = seg.get("content") or ""
ph = seg.get("placeholder_info")
image_json = None
if ph is not None and img_index < len(images):
image_json = json.dumps(images[img_index]) if isinstance(images[img_index], dict) else None
if first_cover is None and image_json:
first_cover = image_json
img_index += 1
conn.execute(text("""
INSERT INTO chapter_sections (id, chapter_id, order_index, content, image, updated_at)
VALUES (:id, :ch_id, :ord, :content, CAST(:img AS jsonb), NOW())
"""), {
"id": section_id,
"ch_id": ch_id,
"ord": order_idx,
"content": seg_content,
"img": image_json,
})
if first_cover:
conn.execute(
text("UPDATE chapters SET cover_image = CAST(:img AS jsonb) WHERE id = :id"),
{"img": first_cover, "id": ch_id},
)
conn.commit()
logger.info("章节 %s: %d 条 sections", ch_id, len(sections))
conn.execute(text("ALTER TABLE chapters DROP COLUMN IF EXISTS content"))
conn.execute(text("ALTER TABLE chapters DROP COLUMN IF EXISTS images"))
conn.commit()
logger.info("已删除 chapters.content 与 chapters.images")
logger.info("2/2 数据迁移完成")
if __name__ == "__main__":
logger.info("开始 chapter_sections 迁移…")
engine = get_engine()
run_sql_migration(engine)
run_data_migration(engine)
logger.info("迁移全部完成")

View File

@@ -1,196 +0,0 @@
"""
将 chapters.cover_image 与 chapter_sections.image 的 JSON 数据迁移到 memoir_images 表(字段独立列)。
前置:先执行 api/migrations/add_memoir_images_table.sql 建表。
用法(在项目根目录或 api 目录下):
python -m api.scripts.run_memoir_images_migration
"""
import json
import os
import sys
import uuid
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
os.chdir(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from sqlalchemy import create_engine, text
from sqlalchemy.engine import Engine
from urllib.parse import urlsplit
from app.core.config import settings
from app.core.db import ensure_psycopg_url
from app.core.logging import get_logger, setup_logging
setup_logging()
logger = get_logger(__name__)
def get_engine() -> Engine:
url = (settings.migration_database_url or "").strip() or settings.database_url
return create_engine(ensure_psycopg_url(url), pool_pre_ping=True)
def _row_from_image_json(img: dict | None, chapter_id: str, section_id: str | None, order_index: int) -> dict | None:
if not img or not isinstance(img, dict):
return None
placeholder = (img.get("placeholder") or "").strip()
description = (img.get("description") or "").strip()
if not placeholder and not description:
return None
if not placeholder:
placeholder = f'{{{{IMAGE:{description}}}}}'
created = img.get("created_at")
updated = img.get("updated_at")
if isinstance(created, str) and created:
try:
from datetime import datetime
created = datetime.fromisoformat(created.replace("Z", "+00:00"))
except Exception:
created = None
if isinstance(updated, str) and updated:
try:
from datetime import datetime
updated = datetime.fromisoformat(updated.replace("Z", "+00:00"))
except Exception:
updated = None
return {
"id": str(uuid.uuid4()).replace("-", "")[:32],
"chapter_id": chapter_id,
"section_id": section_id,
"order_index": order_index,
"placeholder": placeholder or None,
"description": description or None,
"status": (img.get("status") or "pending").strip() or "pending",
"prompt": img.get("prompt") or None,
"url": img.get("url") or None,
"storage_key": img.get("storage_key") or None,
"provider": img.get("provider") or None,
"style": img.get("style") or None,
"size": img.get("size") or None,
"error": img.get("error") or None,
"retryable": img.get("retryable") if img.get("retryable") is not None else None,
"created_at": created,
"updated_at": updated,
}
def run_sql_migration(engine: Engine):
from pathlib import Path
sql_path = Path(__file__).parent.parent / "migrations" / "add_memoir_images_table.sql"
if not sql_path.exists():
logger.warning("未找到 %s,请先执行该 SQL 建表", sql_path)
return
sql = sql_path.read_text(encoding="utf-8")
stmts = []
rest = sql
while rest:
rest = rest.lstrip()
if rest.startswith("--"):
rest = rest[rest.find("\n") + 1:] if "\n" in rest else ""
continue
if rest.upper().startswith("DO "):
i = rest.find("$$")
if i == -1:
break
j = rest.find("$$", i + 2)
if j == -1:
break
stmts.append(rest[: j + 2].strip() + ";")
rest = rest[j + 2:].lstrip().lstrip(";").lstrip()
continue
idx = rest.find(";")
if idx == -1:
break
part = rest[: idx].strip()
rest = rest[idx + 1:]
if part and not part.startswith("--"):
stmts.append(part + ";")
with engine.begin() as conn:
for i, s in enumerate(stmts):
try:
conn.execute(text(s))
logger.info(" SQL %s OK", i + 1)
except Exception as e:
if "already exists" in str(e).lower():
logger.info(" SQL %s (已存在)", i + 1)
continue
raise
logger.info("1/2 SQL 迁移完成")
def run_data_migration(engine: Engine):
ins = text("""
INSERT INTO memoir_images (
id, chapter_id, section_id, order_index,
placeholder, description, status, prompt, url, storage_key,
provider, style, size, error, retryable, created_at, updated_at
) VALUES (
:id, :chapter_id, :section_id, :order_index,
:placeholder, :description, :status, :prompt, :url, :storage_key,
:provider, :style, :size, :error, :retryable, :created_at, :updated_at
)
""")
with engine.connect() as conn:
r = conn.execute(text("""
SELECT id, cover_image FROM chapters WHERE cover_image IS NOT NULL
"""))
cover_count = 0
for row in r:
ch_id, cover = row[0], row[1]
if isinstance(cover, str):
try:
cover = json.loads(cover)
except Exception:
cover = None
if not cover or not isinstance(cover, dict):
continue
exists = conn.execute(
text("SELECT 1 FROM memoir_images WHERE chapter_id = :ch_id AND section_id IS NULL"),
{"ch_id": ch_id},
).fetchone()
if exists:
continue
row_data = _row_from_image_json(cover, ch_id, None, 0)
if not row_data:
continue
conn.execute(ins, {**row_data, "updated_at": row_data.get("updated_at")})
conn.commit()
cover_count += 1
logger.info("封面图迁移: %d", cover_count)
with engine.connect() as conn:
r = conn.execute(text("""
SELECT id, chapter_id, order_index, image FROM chapter_sections WHERE image IS NOT NULL
"""))
sec_count = 0
for row in r:
sec_id, ch_id, ord_idx, img = row[0], row[1], row[2], row[3]
if isinstance(img, str):
try:
img = json.loads(img)
except Exception:
img = None
if not img or not isinstance(img, dict):
continue
exists = conn.execute(
text("SELECT 1 FROM memoir_images WHERE section_id = :sec_id"),
{"sec_id": sec_id},
).fetchone()
if exists:
continue
row_data = _row_from_image_json(img, ch_id, sec_id, ord_idx + 1)
if not row_data:
continue
conn.execute(ins, {**row_data, "updated_at": row_data.get("updated_at")})
conn.commit()
sec_count += 1
logger.info("段落配图迁移: %d", sec_count)
logger.info("2/2 数据迁移完成")
if __name__ == "__main__":
logger.info("开始 memoir_images 迁移…")
engine = get_engine()
run_sql_migration(engine)
run_data_migration(engine)
logger.info("迁移全部完成")