Files
life-echo/api/app/features/memoir/memoir_images/parser.py

85 lines
2.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import re
from typing import Any
from .schema import IMAGE_STATUS_PENDING
PLACEHOLDER_RE = re.compile(
r"\{\{\{\{IMAGE:(.*?)\}\}\}\}|\{\{IMAGE:(.*?)\}\}",
re.DOTALL,
)
def parse_image_placeholders(content: str, max_images: int) -> list[dict[str, Any]]:
items: list[dict[str, Any]] = []
for match in PLACEHOLDER_RE.finditer(content or ""):
description = (match.group(1) or match.group(2) or "").strip()
if not description:
continue
items.append(
{
"index": len(items),
"description": description,
"placeholder": match.group(0),
"start_offset": match.start(),
}
)
if max_images is not None and len(items) >= max_images:
break
return items
def build_initial_image_assets(
placeholders: list[dict[str, Any]],
provider: str,
style: str,
size: str,
now_iso: str,
) -> list[dict[str, Any]]:
return [
{
"index": item["index"],
"placeholder": item["placeholder"],
"description": item["description"],
"prompt": None,
"url": None,
"status": IMAGE_STATUS_PENDING,
"provider": provider,
"style": style,
"size": size,
"error": None,
"created_at": now_iso,
"updated_at": now_iso,
}
for item in placeholders
]
def split_narrative_to_sections(narrative: str) -> list[dict[str, Any]]:
"""
将带 {{IMAGE:...}} 占位符的正文按占位符拆成多段。
返回 list[dict],每项含:
- content: 本段纯文本(不含占位符)
- placeholder_info: 本段后的配图占位信息,或 None最后一段无图
"""
if not (narrative or narrative.strip()):
return []
placeholders = parse_image_placeholders(narrative, max_images=None)
sections: list[dict[str, Any]] = []
for i in range(len(placeholders) + 1):
if i == 0:
start = 0
else:
prev = placeholders[i - 1]
start = prev["start_offset"] + len(prev["placeholder"])
if i < len(placeholders):
end = placeholders[i]["start_offset"]
placeholder_info = placeholders[i]
else:
end = len(narrative)
placeholder_info = None
content = narrative[start:end]
if isinstance(content, str):
content = content.strip()
sections.append({"content": content or "", "placeholder_info": placeholder_info})
return sections