import json import re from typing import Any from app.features.memoir.asset_resolver import strip_image_placeholders from .json_payload import extract_json_payload from .schema import IMAGE_STATUS_PENDING PLACEHOLDER_RE = re.compile( r"\{\{\{\{IMAGE:(.*?)\}\}\}\}|\{\{IMAGE:(.*?)\}\}", re.DOTALL, ) def parse_image_placeholders(content: str, max_images: int) -> list[dict[str, Any]]: """离线迁移/调试用:解析正文中的 IMAGE 占位符。""" items: list[dict[str, Any]] = [] for match in PLACEHOLDER_RE.finditer(content or ""): description = (match.group(1) or match.group(2) or "").strip() if not description: continue items.append( { "index": len(items), "description": description, "placeholder": match.group(0), "start_offset": match.start(), } ) if max_images is not None and len(items) >= max_images: break return items def build_initial_image_assets( placeholders: list[dict[str, Any]], provider: str, style: str, size: str, now_iso: str, ) -> list[dict[str, Any]]: return [ { "index": item["index"], "placeholder": item["placeholder"], "description": item["description"], "prompt": None, "url": None, "status": IMAGE_STATUS_PENDING, "provider": provider, "style": style, "size": size, "error": None, "created_at": now_iso, "updated_at": now_iso, } for item in placeholders ] def parse_narrative_json(raw: str) -> list[dict[str, Any]]: """ 解析 LLM 输出的 JSON 叙事(paragraphs)。 不根据 image_description 生成配图占位;插图由 story/chapter 结构化流程单独处理。 """ if not raw or not str(raw).strip(): return [] try: payload = extract_json_payload(raw) data = json.loads(payload) paragraphs = data.get("paragraphs") or [] if not isinstance(paragraphs, list): return [] except (json.JSONDecodeError, TypeError, AttributeError): return [] result: list[dict[str, Any]] = [] for p in paragraphs: if not isinstance(p, dict): continue content = (p.get("content") or "").strip() if content: result.append({"content": content, "placeholder_info": None}) return result def split_plain_narrative_into_sections(narrative: str) -> list[dict[str, Any]]: """非 JSON 叙事:去掉遗留占位符后按空行拆段,不产生段落配图。""" text = strip_image_placeholders(narrative or "") if not text.strip(): return [] parts = [p.strip() for p in text.split("\n\n") if p.strip()] return [{"content": p, "placeholder_info": None} for p in parts] def parse_narrative_to_sections(narrative: str) -> list[dict[str, Any]]: """ 将 narrative 解析为 sections。 JSON(paragraphs)走 parse_narrative_json;否则剥离占位符后按段拆分。 """ if not narrative or not str(narrative).strip(): return [] stripped = narrative.strip() if stripped.startswith("{") and "paragraphs" in stripped: segments = parse_narrative_json(narrative) if segments: return segments return split_plain_narrative_into_sections(narrative)