本次 squash merge 将 codex-story-first-image-intent 的整体改动合入 development,核心内容包括: 1. 后端数据与迁移:新增 stories、story_versions、story_image_intents、chapter_cover_intents、assets 等模型与 Alembic 迁移,建立 story-first、markdown-first、asset-first 的主数据链路。 2. 生成与任务链:引入 StoryBuilderOrchestrator、ChapterComposerOrchestrator、story_image_tasks、chapter_cover_tasks,图片生成从正文占位符改为结构化 intent -> asset -> markdown 回填。 3. 并发与一致性:为 story/chapter intent 增加 claim_token、claimed_at、attempt_count,采用数据库原子 claim 为主、Redis 锁为辅,避免重复生成、锁误删和 processing 卡死。 4. Memoir 读写路径:章节 canonical_markdown 成为正文真源,列表/详情接口补齐 markdown、cover_asset、word_count 等字段,PDF 与 asset 解析链路同步升级。 5. Memory / Retrieval:扩展 transcript ingest、chunking、evidence 检索与 story 聚合基础设施,为后续 story-first RAG 与多 agent 编排提供底座。 6. App 端体验:章节页继续走 MarkdownRenderer 阅读链,同时吸收 fix3-19 的跨平台 UI glitch 修复;更新对话页、首页、文案资源与章节列表映射逻辑。 7. 测试与文档:补充 asset resolver、story image task、章节封面派发、markdown 映射等回归测试,并加入图片占位符退役设计文档。
102 lines
3.0 KiB
Python
102 lines
3.0 KiB
Python
"""
|
||
asset:// 与旧占位符清理。
|
||
|
||
迁移与渲染共用:从正文移除 {{IMAGE:...}} / {{{{IMAGE:...}}}}。
|
||
"""
|
||
|
||
import re
|
||
from typing import Callable
|
||
|
||
_PLACEHOLDER_RE = re.compile(
|
||
r"\{\{\{\{IMAGE:(.*?)\}\}\}\}|\{\{IMAGE:(.*?)\}\}",
|
||
re.DOTALL,
|
||
)
|
||
|
||
_ASSET_REF_RE = re.compile(r"!\[([^\]]*)\]\(asset://([a-zA-Z0-9_-]+)\)")
|
||
|
||
|
||
def strip_legacy_image_placeholders(text: str | None) -> str:
|
||
"""移除正文中的旧 IMAGE 占位符,保留其余 markdown。"""
|
||
if not text:
|
||
return ""
|
||
return _PLACEHOLDER_RE.sub("", text).strip()
|
||
|
||
|
||
def parse_asset_refs(markdown: str) -> list[tuple[int, int, str, str]]:
|
||
refs = []
|
||
for m in _ASSET_REF_RE.finditer(markdown or ""):
|
||
refs.append((m.start(), m.end(), m.group(1) or "", m.group(2) or ""))
|
||
return refs
|
||
|
||
|
||
def collect_asset_ids_from_markdown(markdown: str) -> list[str]:
|
||
return [m.group(2) for m in _ASSET_REF_RE.finditer(markdown or "") if m.group(2)]
|
||
|
||
|
||
def collect_asset_ids_for_chapter(chapter) -> set[str]:
|
||
"""章节正文(canonical + 各 section)与 cover_asset_id 中出现的 asset id。"""
|
||
ids: set[str] = set()
|
||
md = getattr(chapter, "canonical_markdown", None) or ""
|
||
ids.update(collect_asset_ids_from_markdown(md))
|
||
for sec in getattr(chapter, "sections", None) or []:
|
||
ids.update(collect_asset_ids_from_markdown(getattr(sec, "content", None) or ""))
|
||
cid = getattr(chapter, "cover_asset_id", None)
|
||
if cid:
|
||
ids.add(str(cid))
|
||
return ids
|
||
|
||
|
||
def collect_asset_ids_for_chapters(chapters: list) -> set[str]:
|
||
combined: set[str] = set()
|
||
for ch in chapters or []:
|
||
combined |= collect_asset_ids_for_chapter(ch)
|
||
return combined
|
||
|
||
|
||
def split_markdown_by_asset_refs(
|
||
markdown: str,
|
||
resolve_asset: Callable[[str], str | None],
|
||
) -> list[dict]:
|
||
blocks: list[dict] = []
|
||
refs = parse_asset_refs(markdown or "")
|
||
if not refs:
|
||
text = (markdown or "").strip()
|
||
if text:
|
||
blocks.append({"type": "text", "value": text})
|
||
return blocks
|
||
|
||
pos = 0
|
||
for start, end, caption, asset_id in refs:
|
||
if start > pos:
|
||
text = markdown[pos:start].strip()
|
||
if text:
|
||
blocks.append({"type": "text", "value": text})
|
||
url = resolve_asset(asset_id) if asset_id else None
|
||
if url:
|
||
blocks.append({"type": "image", "url": url, "caption": caption})
|
||
pos = end
|
||
|
||
if pos < len(markdown or ""):
|
||
text = markdown[pos:].strip()
|
||
if text:
|
||
blocks.append({"type": "text", "value": text})
|
||
|
||
return blocks
|
||
|
||
|
||
def resolve_asset_refs_in_markdown(
|
||
markdown: str,
|
||
resolve_asset: Callable[[str], str | None],
|
||
) -> str:
|
||
if not markdown or not resolve_asset:
|
||
return markdown or ""
|
||
|
||
def repl(m):
|
||
caption, asset_id = m.group(1) or "", m.group(2) or ""
|
||
url = resolve_asset(asset_id) if asset_id else None
|
||
if url:
|
||
return f""
|
||
return m.group(0)
|
||
|
||
return _ASSET_REF_RE.sub(repl, markdown)
|