Files
life-echo/api/app/features/memoir/memoir_images/parser.py

85 lines
2.6 KiB
Python
Raw Normal View History

import re
from typing import Any
2026-03-11 11:27:32 +08:00
from .schema import IMAGE_STATUS_PENDING
PLACEHOLDER_RE = re.compile(
r"\{\{\{\{IMAGE:(.*?)\}\}\}\}|\{\{IMAGE:(.*?)\}\}",
re.DOTALL,
)
def parse_image_placeholders(content: str, max_images: int) -> list[dict[str, Any]]:
items: list[dict[str, Any]] = []
for match in PLACEHOLDER_RE.finditer(content or ""):
description = (match.group(1) or match.group(2) or "").strip()
if not description:
continue
items.append(
{
"index": len(items),
"description": description,
"placeholder": match.group(0),
"start_offset": match.start(),
}
)
2026-03-11 14:07:02 +08:00
if max_images is not None and len(items) >= max_images:
break
return items
def build_initial_image_assets(
placeholders: list[dict[str, Any]],
provider: str,
style: str,
size: str,
now_iso: str,
) -> list[dict[str, Any]]:
return [
{
"index": item["index"],
"placeholder": item["placeholder"],
"description": item["description"],
"prompt": None,
"url": None,
2026-03-11 11:27:32 +08:00
"status": IMAGE_STATUS_PENDING,
"provider": provider,
"style": style,
"size": size,
"error": None,
"created_at": now_iso,
"updated_at": now_iso,
}
for item in placeholders
]
def split_narrative_to_sections(narrative: str) -> list[dict[str, Any]]:
"""
将带 {{IMAGE:...}} 占位符的正文按占位符拆成多段
返回 list[dict]每项含:
- content: 本段纯文本不含占位符
- placeholder_info: 本段后的配图占位信息 None最后一段无图
"""
if not (narrative or narrative.strip()):
return []
placeholders = parse_image_placeholders(narrative, max_images=None)
sections: list[dict[str, Any]] = []
for i in range(len(placeholders) + 1):
if i == 0:
start = 0
else:
prev = placeholders[i - 1]
start = prev["start_offset"] + len(prev["placeholder"])
if i < len(placeholders):
end = placeholders[i]["start_offset"]
placeholder_info = placeholders[i]
else:
end = len(narrative)
placeholder_info = None
content = narrative[start:end]
if isinstance(content, str):
content = content.strip()
sections.append({"content": content or "", "placeholder_info": placeholder_info})
return sections