api/app/features/memoir/memoir_images/parser.py

import re
from typing import Any

from .schema import IMAGE_STATUS_PENDING

PLACEHOLDER_RE = re.compile(
    r"\{\{\{\{IMAGE:(.*?)\}\}\}\}|\{\{IMAGE:(.*?)\}\}",
    re.DOTALL,
)


def parse_image_placeholders(content: str, max_images: int) -> list[dict[str, Any]]:
    items: list[dict[str, Any]] = []
    for match in PLACEHOLDER_RE.finditer(content or ""):
        description = (match.group(1) or match.group(2) or "").strip()
        if not description:
            continue
        items.append(
            {
                "index": len(items),
                "description": description,
                "placeholder": match.group(0),
                "start_offset": match.start(),
            }
        )
        if max_images is not None and len(items) >= max_images:
            break
    return items


def build_initial_image_assets(
    placeholders: list[dict[str, Any]],
    provider: str,
    style: str,
    size: str,
    now_iso: str,
) -> list[dict[str, Any]]:
    return [
        {
            "index": item["index"],
            "placeholder": item["placeholder"],
            "description": item["description"],
            "prompt": None,
            "url": None,
            "status": IMAGE_STATUS_PENDING,
            "provider": provider,
            "style": style,
            "size": size,
            "error": None,
            "created_at": now_iso,
            "updated_at": now_iso,
        }
        for item in placeholders
    ]


def split_narrative_to_sections(narrative: str) -> list[dict[str, Any]]:
    """
    将带 {{IMAGE:...}} 占位符的正文按占位符拆成多段。
    返回 list[dict]，每项含:
    - content: 本段纯文本（不含占位符）
    - placeholder_info: 本段后的配图占位信息，或 None（最后一段无图）
    """
    if not (narrative or narrative.strip()):
        return []
    placeholders = parse_image_placeholders(narrative, max_images=None)
    sections: list[dict[str, Any]] = []
    for i in range(len(placeholders) + 1):
        if i == 0:
            start = 0
        else:
            prev = placeholders[i - 1]
            start = prev["start_offset"] + len(prev["placeholder"])
        if i < len(placeholders):
            end = placeholders[i]["start_offset"]
            placeholder_info = placeholders[i]
        else:
            end = len(narrative)
            placeholder_info = None
        content = narrative[start:end]
        if isinstance(content, str):
            content = content.strip()
        sections.append({"content": content or "", "placeholder_info": placeholder_info})
    return sections