import json import re from typing import Any from .json_payload import extract_json_payload from .schema import IMAGE_STATUS_PENDING PLACEHOLDER_RE = re.compile( r"\{\{\{\{IMAGE:(.*?)\}\}\}\}|\{\{IMAGE:(.*?)\}\}", re.DOTALL, ) def parse_image_placeholders(content: str, max_images: int) -> list[dict[str, Any]]: items: list[dict[str, Any]] = [] for match in PLACEHOLDER_RE.finditer(content or ""): description = (match.group(1) or match.group(2) or "").strip() if not description: continue items.append( { "index": len(items), "description": description, "placeholder": match.group(0), "start_offset": match.start(), } ) if max_images is not None and len(items) >= max_images: break return items def build_initial_image_assets( placeholders: list[dict[str, Any]], provider: str, style: str, size: str, now_iso: str, ) -> list[dict[str, Any]]: return [ { "index": item["index"], "placeholder": item["placeholder"], "description": item["description"], "prompt": None, "url": None, "status": IMAGE_STATUS_PENDING, "provider": provider, "style": style, "size": size, "error": None, "created_at": now_iso, "updated_at": now_iso, } for item in placeholders ] def split_narrative_to_sections(narrative: str) -> list[dict[str, Any]]: """ 将带 {{IMAGE:...}} 占位符的正文按占位符拆成多段。 返回 list[dict],每项含: - content: 本段纯文本(不含占位符) - placeholder_info: 本段后的配图占位信息,或 None(最后一段无图) """ if not (narrative or narrative.strip()): return [] placeholders = parse_image_placeholders(narrative, max_images=None) sections: list[dict[str, Any]] = [] for i in range(len(placeholders) + 1): if i == 0: start = 0 else: prev = placeholders[i - 1] start = prev["start_offset"] + len(prev["placeholder"]) if i < len(placeholders): end = placeholders[i]["start_offset"] placeholder_info = placeholders[i] else: end = len(narrative) placeholder_info = None content = narrative[start:end] if isinstance(content, str): content = content.strip() sections.append({"content": content or "", "placeholder_info": placeholder_info}) return sections def parse_narrative_json(raw: str) -> list[dict[str, Any]]: """ 解析 LLM 输出的 JSON 格式叙事。 返回与 split_narrative_to_sections 相同结构:list[dict],每项含 content、placeholder_info。 """ if not (raw or raw.strip()): return [] try: payload = extract_json_payload(raw) data = json.loads(payload) paragraphs = data.get("paragraphs") or [] if not isinstance(paragraphs, list): return [] except (json.JSONDecodeError, TypeError, AttributeError): return [] result: list[dict[str, Any]] = [] for i, p in enumerate(paragraphs): if not isinstance(p, dict): continue content = (p.get("content") or "").strip() desc = (p.get("image_description") or "").strip() placeholder_info = None if desc: placeholder_info = { "placeholder": f"{{{{IMAGE:{desc}}}}}", "description": desc, "index": i, "start_offset": 0, } result.append({"content": content, "placeholder_info": placeholder_info}) return result def parse_narrative_to_sections(narrative: str) -> list[dict[str, Any]]: """ 将 narrative 解析为 sections。优先尝试 JSON 格式,失败则回退到占位符解析。 返回与 split_narrative_to_sections 相同结构。 """ if not (narrative or narrative.strip()): return [] stripped = narrative.strip() if stripped.startswith("{") and "paragraphs" in stripped: segments = parse_narrative_json(narrative) if segments: return segments return split_narrative_to_sections(narrative)