api/app/features/memoir/memoir_images/parser.py

import json
import re
from typing import Any

from .json_payload import extract_json_payload
from .schema import IMAGE_STATUS_PENDING

PLACEHOLDER_RE = re.compile(
    r"\{\{\{\{IMAGE:(.*?)\}\}\}\}|\{\{IMAGE:(.*?)\}\}",
    re.DOTALL,
)


def parse_image_placeholders(content: str, max_images: int) -> list[dict[str, Any]]:
    items: list[dict[str, Any]] = []
    for match in PLACEHOLDER_RE.finditer(content or ""):
        description = (match.group(1) or match.group(2) or "").strip()
        if not description:
            continue
        items.append(
            {
                "index": len(items),
                "description": description,
                "placeholder": match.group(0),
                "start_offset": match.start(),
            }
        )
        if max_images is not None and len(items) >= max_images:
            break
    return items


def build_initial_image_assets(
    placeholders: list[dict[str, Any]],
    provider: str,
    style: str,
    size: str,
    now_iso: str,
) -> list[dict[str, Any]]:
    return [
        {
            "index": item["index"],
            "placeholder": item["placeholder"],
            "description": item["description"],
            "prompt": None,
            "url": None,
            "status": IMAGE_STATUS_PENDING,
            "provider": provider,
            "style": style,
            "size": size,
            "error": None,
            "created_at": now_iso,
            "updated_at": now_iso,
        }
        for item in placeholders
    ]


def split_narrative_to_sections(narrative: str) -> list[dict[str, Any]]:
    """
    将带 {{IMAGE:...}} 占位符的正文按占位符拆成多段。
    返回 list[dict]，每项含:
    - content: 本段纯文本（不含占位符）
    - placeholder_info: 本段后的配图占位信息，或 None（最后一段无图）
    """
    if not (narrative or narrative.strip()):
        return []
    placeholders = parse_image_placeholders(narrative, max_images=None)
    sections: list[dict[str, Any]] = []
    for i in range(len(placeholders) + 1):
        if i == 0:
            start = 0
        else:
            prev = placeholders[i - 1]
            start = prev["start_offset"] + len(prev["placeholder"])
        if i < len(placeholders):
            end = placeholders[i]["start_offset"]
            placeholder_info = placeholders[i]
        else:
            end = len(narrative)
            placeholder_info = None
        content = narrative[start:end]
        if isinstance(content, str):
            content = content.strip()
        sections.append(
            {"content": content or "", "placeholder_info": placeholder_info}
        )
    return sections


def parse_narrative_json(raw: str) -> list[dict[str, Any]]:
    """
    解析 LLM 输出的 JSON 格式叙事。
    返回与 split_narrative_to_sections 相同结构：list[dict]，每项含 content、placeholder_info。
    """
    if not (raw or raw.strip()):
        return []
    try:
        payload = extract_json_payload(raw)
        data = json.loads(payload)
        paragraphs = data.get("paragraphs") or []
        if not isinstance(paragraphs, list):
            return []
    except (json.JSONDecodeError, TypeError, AttributeError):
        return []

    result: list[dict[str, Any]] = []
    for i, p in enumerate(paragraphs):
        if not isinstance(p, dict):
            continue
        content = (p.get("content") or "").strip()
        desc = (p.get("image_description") or "").strip()
        placeholder_info = None
        if desc:
            placeholder_info = {
                "placeholder": f"{{{{IMAGE:{desc}}}}}",
                "description": desc,
                "index": i,
                "start_offset": 0,
            }
        result.append({"content": content, "placeholder_info": placeholder_info})
    return result


def parse_narrative_to_sections(narrative: str) -> list[dict[str, Any]]:
    """
    将 narrative 解析为 sections。优先尝试 JSON 格式，失败则回退到占位符解析。
    返回与 split_narrative_to_sections 相同结构。
    """
    if not (narrative or narrative.strip()):
        return []
    stripped = narrative.strip()
    if stripped.startswith("{") and "paragraphs" in stripped:
        segments = parse_narrative_json(narrative)
        if segments:
            return segments
    return split_narrative_to_sections(narrative)
-												fix: 去除LLM直接生成图片占位符逻辑

											
										
										
											2026-03-19 11:18:58 +08:00
+								import json
-												feat(api): add memoir image placeholder parsing

Made-with: Cursor

											
										
										
											2026-03-10 15:59:36 +08:00
+								import re
 								from typing import Any
-												fix: 去除LLM直接生成图片占位符逻辑

											
										
										
											2026-03-19 11:18:58 +08:00
+								from .json_payload import extract_json_payload
-												fix: fix various issues before merging

											
										
										
											2026-03-11 11:27:32 +08:00
+								from .schema import IMAGE_STATUS_PENDING
-												Fix memoir image delivery and Android rendering

											
										
										
											2026-03-11 10:06:12 +08:00
+								PLACEHOLDER_RE = re.compile(
 								    r"\{\{\{\{IMAGE:(.*?)\}\}\}\}|\{\{IMAGE:(.*?)\}\}",
 								    re.DOTALL,
 								)
-												feat(api): add memoir image placeholder parsing

Made-with: Cursor

											
										
										
											2026-03-10 15:59:36 +08:00
 								def parse_image_placeholders(content: str, max_images: int) -> list[dict[str, Any]]:
 								    items: list[dict[str, Any]] = []
 								    for match in PLACEHOLDER_RE.finditer(content or ""):
-												Fix memoir image delivery and Android rendering

											
										
										
											2026-03-11 10:06:12 +08:00
+								        description = (match.group(1) or match.group(2) or "").strip()
-												feat(api): add memoir image placeholder parsing

Made-with: Cursor

											
										
										
											2026-03-10 15:59:36 +08:00
+								        if not description:
 								            continue
 								        items.append(
 								            {
 								                "index": len(items),
 								                "description": description,
 								                "placeholder": match.group(0),
 								                "start_offset": match.start(),
 								            }
 								        )
-												Fix dynamic memoir image limits

											
										
										
											2026-03-11 14:07:02 +08:00
+								        if max_images is not None and len(items) >= max_images:
-												feat(api): add memoir image placeholder parsing

Made-with: Cursor

											
										
										
											2026-03-10 15:59:36 +08:00
+								            break
 								    return items
 								def build_initial_image_assets(
 								    placeholders: list[dict[str, Any]],
 								    provider: str,
 								    style: str,
 								    size: str,
 								    now_iso: str,
 								) -> list[dict[str, Any]]:
 								    return [
 								        {
 								            "index": item["index"],
 								            "placeholder": item["placeholder"],
 								            "description": item["description"],
 								            "prompt": None,
 								            "url": None,
-												fix: fix various issues before merging

											
										
										
											2026-03-11 11:27:32 +08:00
+								            "status": IMAGE_STATUS_PENDING,
-												feat(api): add memoir image placeholder parsing

Made-with: Cursor

											
										
										
											2026-03-10 15:59:36 +08:00
+								            "provider": provider,
 								            "style": style,
 								            "size": size,
 								            "error": None,
 								            "created_at": now_iso,
 								            "updated_at": now_iso,
 								        }
 								        for item in placeholders
 								    ]
-												把“章节正文 + 图片”从 chapters 单表/JSON 结构，重构为“章节 chapter + 段落 section + 图片 memoir_images 独立表”的新数据模型，同时联动修改接口、PDF 导出、异步任务、迁移脚本、测试，以及修复 Android 端聊天列表显示问题。 (#9)

* refactor: 表结构重构，新增段落section和图片image新表

* fix: fix android app import error

* refactor: 重构文件名

* fix: 优化提示词

* fix: 消息气泡显示位置异常问题

---------

Co-authored-by: yangshilin <2157598560@qq.com>
											
										
										
											2026-03-13 11:12:10 +08:00
 								def split_narrative_to_sections(narrative: str) -> list[dict[str, Any]]:
 								    """
 								    将带 {{IMAGE:...}} 占位符的正文按占位符拆成多段。
 								    返回 list[dict]，每项含:
 								    - content: 本段纯文本（不含占位符）
 								    - placeholder_info: 本段后的配图占位信息，或 None（最后一段无图）
 								    """
 								    if not (narrative or narrative.strip()):
 								        return []
 								    placeholders = parse_image_placeholders(narrative, max_images=None)
 								    sections: list[dict[str, Any]] = []
 								    for i in range(len(placeholders) + 1):
 								        if i == 0:
 								            start = 0
 								        else:
 								            prev = placeholders[i - 1]
 								            start = prev["start_offset"] + len(prev["placeholder"])
 								        if i < len(placeholders):
 								            end = placeholders[i]["start_offset"]
 								            placeholder_info = placeholders[i]
 								        else:
 								            end = len(narrative)
 								            placeholder_info = None
 								        content = narrative[start:end]
 								        if isinstance(content, str):
 								            content = content.strip()
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
+								        sections.append(
 								            {"content": content or "", "placeholder_info": placeholder_info}
 								        )
-												把“章节正文 + 图片”从 chapters 单表/JSON 结构，重构为“章节 chapter + 段落 section + 图片 memoir_images 独立表”的新数据模型，同时联动修改接口、PDF 导出、异步任务、迁移脚本、测试，以及修复 Android 端聊天列表显示问题。 (#9)

* refactor: 表结构重构，新增段落section和图片image新表

* fix: fix android app import error

* refactor: 重构文件名

* fix: 优化提示词

* fix: 消息气泡显示位置异常问题

---------

Co-authored-by: yangshilin <2157598560@qq.com>
											
										
										
											2026-03-13 11:12:10 +08:00
+								    return sections
-												fix: 去除LLM直接生成图片占位符逻辑

											
										
										
											2026-03-19 11:18:58 +08:00
 								def parse_narrative_json(raw: str) -> list[dict[str, Any]]:
 								    """
 								    解析 LLM 输出的 JSON 格式叙事。
 								    返回与 split_narrative_to_sections 相同结构：list[dict]，每项含 content、placeholder_info。
 								    """
 								    if not (raw or raw.strip()):
 								        return []
 								    try:
 								        payload = extract_json_payload(raw)
 								        data = json.loads(payload)
 								        paragraphs = data.get("paragraphs") or []
 								        if not isinstance(paragraphs, list):
 								            return []
 								    except (json.JSONDecodeError, TypeError, AttributeError):
 								        return []
 								    result: list[dict[str, Any]] = []
 								    for i, p in enumerate(paragraphs):
 								        if not isinstance(p, dict):
 								            continue
 								        content = (p.get("content") or "").strip()
 								        desc = (p.get("image_description") or "").strip()
 								        placeholder_info = None
 								        if desc:
 								            placeholder_info = {
 								                "placeholder": f"{{{{IMAGE:{desc}}}}}",
 								                "description": desc,
 								                "index": i,
 								                "start_offset": 0,
 								            }
 								        result.append({"content": content, "placeholder_info": placeholder_info})
 								    return result
 								def parse_narrative_to_sections(narrative: str) -> list[dict[str, Any]]:
 								    """
 								    将 narrative 解析为 sections。优先尝试 JSON 格式，失败则回退到占位符解析。
 								    返回与 split_narrative_to_sections 相同结构。
 								    """
 								    if not (narrative or narrative.strip()):
 								        return []
 								    stripped = narrative.strip()
 								    if stripped.startswith("{") and "paragraphs" in stripped:
 								        segments = parse_narrative_json(narrative)
 								        if segments:
 								            return segments
 								    return split_narrative_to_sections(narrative)