重构回忆录为 story-first / markdown-first 架构并整合图片意图与前端 UI 修复
本次 squash merge 将 codex-story-first-image-intent 的整体改动合入 development,核心内容包括: 1. 后端数据与迁移:新增 stories、story_versions、story_image_intents、chapter_cover_intents、assets 等模型与 Alembic 迁移,建立 story-first、markdown-first、asset-first 的主数据链路。 2. 生成与任务链:引入 StoryBuilderOrchestrator、ChapterComposerOrchestrator、story_image_tasks、chapter_cover_tasks,图片生成从正文占位符改为结构化 intent -> asset -> markdown 回填。 3. 并发与一致性:为 story/chapter intent 增加 claim_token、claimed_at、attempt_count,采用数据库原子 claim 为主、Redis 锁为辅,避免重复生成、锁误删和 processing 卡死。 4. Memoir 读写路径:章节 canonical_markdown 成为正文真源,列表/详情接口补齐 markdown、cover_asset、word_count 等字段,PDF 与 asset 解析链路同步升级。 5. Memory / Retrieval:扩展 transcript ingest、chunking、evidence 检索与 story 聚合基础设施,为后续 story-first RAG 与多 agent 编排提供底座。 6. App 端体验:章节页继续走 MarkdownRenderer 阅读链,同时吸收 fix3-19 的跨平台 UI glitch 修复;更新对话页、首页、文案资源与章节列表映射逻辑。 7. 测试与文档:补充 asset resolver、story image task、章节封面派发、markdown 映射等回归测试,并加入图片占位符退役设计文档。
This commit is contained in:
101
api/app/features/memoir/asset_resolver.py
Normal file
101
api/app/features/memoir/asset_resolver.py
Normal file
@@ -0,0 +1,101 @@
|
||||
"""
|
||||
asset:// 与旧占位符清理。
|
||||
|
||||
迁移与渲染共用:从正文移除 {{IMAGE:...}} / {{{{IMAGE:...}}}}。
|
||||
"""
|
||||
|
||||
import re
|
||||
from typing import Callable
|
||||
|
||||
_PLACEHOLDER_RE = re.compile(
|
||||
r"\{\{\{\{IMAGE:(.*?)\}\}\}\}|\{\{IMAGE:(.*?)\}\}",
|
||||
re.DOTALL,
|
||||
)
|
||||
|
||||
_ASSET_REF_RE = re.compile(r"!\[([^\]]*)\]\(asset://([a-zA-Z0-9_-]+)\)")
|
||||
|
||||
|
||||
def strip_legacy_image_placeholders(text: str | None) -> str:
|
||||
"""移除正文中的旧 IMAGE 占位符,保留其余 markdown。"""
|
||||
if not text:
|
||||
return ""
|
||||
return _PLACEHOLDER_RE.sub("", text).strip()
|
||||
|
||||
|
||||
def parse_asset_refs(markdown: str) -> list[tuple[int, int, str, str]]:
|
||||
refs = []
|
||||
for m in _ASSET_REF_RE.finditer(markdown or ""):
|
||||
refs.append((m.start(), m.end(), m.group(1) or "", m.group(2) or ""))
|
||||
return refs
|
||||
|
||||
|
||||
def collect_asset_ids_from_markdown(markdown: str) -> list[str]:
|
||||
return [m.group(2) for m in _ASSET_REF_RE.finditer(markdown or "") if m.group(2)]
|
||||
|
||||
|
||||
def collect_asset_ids_for_chapter(chapter) -> set[str]:
|
||||
"""章节正文(canonical + 各 section)与 cover_asset_id 中出现的 asset id。"""
|
||||
ids: set[str] = set()
|
||||
md = getattr(chapter, "canonical_markdown", None) or ""
|
||||
ids.update(collect_asset_ids_from_markdown(md))
|
||||
for sec in getattr(chapter, "sections", None) or []:
|
||||
ids.update(collect_asset_ids_from_markdown(getattr(sec, "content", None) or ""))
|
||||
cid = getattr(chapter, "cover_asset_id", None)
|
||||
if cid:
|
||||
ids.add(str(cid))
|
||||
return ids
|
||||
|
||||
|
||||
def collect_asset_ids_for_chapters(chapters: list) -> set[str]:
|
||||
combined: set[str] = set()
|
||||
for ch in chapters or []:
|
||||
combined |= collect_asset_ids_for_chapter(ch)
|
||||
return combined
|
||||
|
||||
|
||||
def split_markdown_by_asset_refs(
|
||||
markdown: str,
|
||||
resolve_asset: Callable[[str], str | None],
|
||||
) -> list[dict]:
|
||||
blocks: list[dict] = []
|
||||
refs = parse_asset_refs(markdown or "")
|
||||
if not refs:
|
||||
text = (markdown or "").strip()
|
||||
if text:
|
||||
blocks.append({"type": "text", "value": text})
|
||||
return blocks
|
||||
|
||||
pos = 0
|
||||
for start, end, caption, asset_id in refs:
|
||||
if start > pos:
|
||||
text = markdown[pos:start].strip()
|
||||
if text:
|
||||
blocks.append({"type": "text", "value": text})
|
||||
url = resolve_asset(asset_id) if asset_id else None
|
||||
if url:
|
||||
blocks.append({"type": "image", "url": url, "caption": caption})
|
||||
pos = end
|
||||
|
||||
if pos < len(markdown or ""):
|
||||
text = markdown[pos:].strip()
|
||||
if text:
|
||||
blocks.append({"type": "text", "value": text})
|
||||
|
||||
return blocks
|
||||
|
||||
|
||||
def resolve_asset_refs_in_markdown(
|
||||
markdown: str,
|
||||
resolve_asset: Callable[[str], str | None],
|
||||
) -> str:
|
||||
if not markdown or not resolve_asset:
|
||||
return markdown or ""
|
||||
|
||||
def repl(m):
|
||||
caption, asset_id = m.group(1) or "", m.group(2) or ""
|
||||
url = resolve_asset(asset_id) if asset_id else None
|
||||
if url:
|
||||
return f""
|
||||
return m.group(0)
|
||||
|
||||
return _ASSET_REF_RE.sub(repl, markdown)
|
||||
Reference in New Issue
Block a user