2026-03-20 10:30:07 +08:00
|
|
|
|
"""
|
2026-03-22 16:45:57 +08:00
|
|
|
|
asset:// 与正文占位符清理。
|
2026-03-20 10:30:07 +08:00
|
|
|
|
|
2026-03-22 16:45:57 +08:00
|
|
|
|
从正文移除 {{IMAGE:...}} / {{{{IMAGE:...}}}}(历史正文可能仍含此类标记)。
|
2026-03-20 10:30:07 +08:00
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
import re
|
|
|
|
|
|
from typing import Callable
|
|
|
|
|
|
|
|
|
|
|
|
_PLACEHOLDER_RE = re.compile(
|
|
|
|
|
|
r"\{\{\{\{IMAGE:(.*?)\}\}\}\}|\{\{IMAGE:(.*?)\}\}",
|
|
|
|
|
|
re.DOTALL,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
_ASSET_REF_RE = re.compile(r"!\[([^\]]*)\]\(asset://([a-zA-Z0-9_-]+)\)")
|
2026-03-20 17:25:42 +08:00
|
|
|
|
_BLANK_RUN_RE = re.compile(r"\n{3,}")
|
2026-03-20 10:30:07 +08:00
|
|
|
|
|
|
|
|
|
|
|
2026-03-22 16:45:57 +08:00
|
|
|
|
def strip_image_placeholders(text: str | None) -> str:
|
|
|
|
|
|
"""移除正文中的 IMAGE 占位符,保留其余 markdown。"""
|
2026-03-20 10:30:07 +08:00
|
|
|
|
if not text:
|
|
|
|
|
|
return ""
|
|
|
|
|
|
return _PLACEHOLDER_RE.sub("", text).strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_asset_refs(markdown: str) -> list[tuple[int, int, str, str]]:
|
|
|
|
|
|
refs = []
|
|
|
|
|
|
for m in _ASSET_REF_RE.finditer(markdown or ""):
|
|
|
|
|
|
refs.append((m.start(), m.end(), m.group(1) or "", m.group(2) or ""))
|
|
|
|
|
|
return refs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def collect_asset_ids_from_markdown(markdown: str) -> list[str]:
|
|
|
|
|
|
return [m.group(2) for m in _ASSET_REF_RE.finditer(markdown or "") if m.group(2)]
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-03-20 17:25:42 +08:00
|
|
|
|
def strip_asset_image_refs_from_markdown(markdown: str | None) -> str:
|
|
|
|
|
|
"""Remove all `` references; collapse blank lines.
|
|
|
|
|
|
|
|
|
|
|
|
Used for story single-primary policy: new versions / backfill must not
|
|
|
|
|
|
accumulate multiple inline asset images.
|
|
|
|
|
|
"""
|
|
|
|
|
|
if not markdown or not str(markdown).strip():
|
|
|
|
|
|
return ""
|
|
|
|
|
|
text = _ASSET_REF_RE.sub("", markdown or "")
|
|
|
|
|
|
text = _BLANK_RUN_RE.sub("\n\n", text)
|
|
|
|
|
|
return text.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-03-20 10:30:07 +08:00
|
|
|
|
def collect_asset_ids_for_chapter(chapter) -> set[str]:
|
2026-03-20 15:15:35 +08:00
|
|
|
|
"""章节正文 canonical、收录的各 story 正文、cover_asset_id 中的 asset id。"""
|
2026-03-20 10:30:07 +08:00
|
|
|
|
ids: set[str] = set()
|
|
|
|
|
|
md = getattr(chapter, "canonical_markdown", None) or ""
|
|
|
|
|
|
ids.update(collect_asset_ids_from_markdown(md))
|
|
|
|
|
|
cid = getattr(chapter, "cover_asset_id", None)
|
|
|
|
|
|
if cid:
|
|
|
|
|
|
ids.add(str(cid))
|
2026-03-20 15:15:35 +08:00
|
|
|
|
for link in getattr(chapter, "story_links", None) or []:
|
|
|
|
|
|
st = getattr(link, "story", None)
|
|
|
|
|
|
if st is None:
|
|
|
|
|
|
continue
|
|
|
|
|
|
smd = getattr(st, "canonical_markdown", None) or ""
|
|
|
|
|
|
ids.update(collect_asset_ids_from_markdown(smd))
|
2026-03-20 16:36:42 +08:00
|
|
|
|
for intent in getattr(st, "image_intents", None) or []:
|
|
|
|
|
|
if getattr(intent, "intent_role", None) == "primary":
|
|
|
|
|
|
aid = getattr(intent, "asset_id", None)
|
|
|
|
|
|
if aid:
|
|
|
|
|
|
ids.add(str(aid))
|
2026-03-20 10:30:07 +08:00
|
|
|
|
return ids
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def collect_asset_ids_for_chapters(chapters: list) -> set[str]:
|
|
|
|
|
|
combined: set[str] = set()
|
|
|
|
|
|
for ch in chapters or []:
|
|
|
|
|
|
combined |= collect_asset_ids_for_chapter(ch)
|
|
|
|
|
|
return combined
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def split_markdown_by_asset_refs(
|
|
|
|
|
|
markdown: str,
|
|
|
|
|
|
resolve_asset: Callable[[str], str | None],
|
|
|
|
|
|
) -> list[dict]:
|
|
|
|
|
|
blocks: list[dict] = []
|
|
|
|
|
|
refs = parse_asset_refs(markdown or "")
|
|
|
|
|
|
if not refs:
|
|
|
|
|
|
text = (markdown or "").strip()
|
|
|
|
|
|
if text:
|
|
|
|
|
|
blocks.append({"type": "text", "value": text})
|
|
|
|
|
|
return blocks
|
|
|
|
|
|
|
|
|
|
|
|
pos = 0
|
|
|
|
|
|
for start, end, caption, asset_id in refs:
|
|
|
|
|
|
if start > pos:
|
|
|
|
|
|
text = markdown[pos:start].strip()
|
|
|
|
|
|
if text:
|
|
|
|
|
|
blocks.append({"type": "text", "value": text})
|
|
|
|
|
|
url = resolve_asset(asset_id) if asset_id else None
|
|
|
|
|
|
if url:
|
|
|
|
|
|
blocks.append({"type": "image", "url": url, "caption": caption})
|
|
|
|
|
|
pos = end
|
|
|
|
|
|
|
|
|
|
|
|
if pos < len(markdown or ""):
|
|
|
|
|
|
text = markdown[pos:].strip()
|
|
|
|
|
|
if text:
|
|
|
|
|
|
blocks.append({"type": "text", "value": text})
|
|
|
|
|
|
|
|
|
|
|
|
return blocks
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def resolve_asset_refs_in_markdown(
|
|
|
|
|
|
markdown: str,
|
|
|
|
|
|
resolve_asset: Callable[[str], str | None],
|
|
|
|
|
|
) -> str:
|
|
|
|
|
|
if not markdown or not resolve_asset:
|
|
|
|
|
|
return markdown or ""
|
|
|
|
|
|
|
|
|
|
|
|
def repl(m):
|
|
|
|
|
|
caption, asset_id = m.group(1) or "", m.group(2) or ""
|
|
|
|
|
|
url = resolve_asset(asset_id) if asset_id else None
|
|
|
|
|
|
if url:
|
|
|
|
|
|
return f""
|
|
|
|
|
|
return m.group(0)
|
|
|
|
|
|
|
|
|
|
|
|
return _ASSET_REF_RE.sub(repl, markdown)
|