2026-03-19 11:18:58 +08:00
|
|
|
|
import json
|
2026-03-10 15:59:36 +08:00
|
|
|
|
import re
|
|
|
|
|
|
from typing import Any
|
|
|
|
|
|
|
2026-03-22 16:45:57 +08:00
|
|
|
|
from app.features.memoir.asset_resolver import strip_image_placeholders
|
2026-03-20 10:30:07 +08:00
|
|
|
|
|
2026-04-02 12:00:00 +08:00
|
|
|
|
from app.core.json_utils import extract_json_payload
|
2026-03-11 11:27:32 +08:00
|
|
|
|
from .schema import IMAGE_STATUS_PENDING
|
|
|
|
|
|
|
2026-03-11 10:06:12 +08:00
|
|
|
|
PLACEHOLDER_RE = re.compile(
|
|
|
|
|
|
r"\{\{\{\{IMAGE:(.*?)\}\}\}\}|\{\{IMAGE:(.*?)\}\}",
|
|
|
|
|
|
re.DOTALL,
|
|
|
|
|
|
)
|
2026-03-10 15:59:36 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_image_placeholders(content: str, max_images: int) -> list[dict[str, Any]]:
|
2026-03-20 10:30:07 +08:00
|
|
|
|
"""离线迁移/调试用:解析正文中的 IMAGE 占位符。"""
|
2026-03-10 15:59:36 +08:00
|
|
|
|
items: list[dict[str, Any]] = []
|
|
|
|
|
|
for match in PLACEHOLDER_RE.finditer(content or ""):
|
2026-03-11 10:06:12 +08:00
|
|
|
|
description = (match.group(1) or match.group(2) or "").strip()
|
2026-03-10 15:59:36 +08:00
|
|
|
|
if not description:
|
|
|
|
|
|
continue
|
|
|
|
|
|
items.append(
|
|
|
|
|
|
{
|
|
|
|
|
|
"index": len(items),
|
|
|
|
|
|
"description": description,
|
|
|
|
|
|
"placeholder": match.group(0),
|
|
|
|
|
|
"start_offset": match.start(),
|
|
|
|
|
|
}
|
|
|
|
|
|
)
|
2026-03-11 14:07:02 +08:00
|
|
|
|
if max_images is not None and len(items) >= max_images:
|
2026-03-10 15:59:36 +08:00
|
|
|
|
break
|
|
|
|
|
|
return items
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def build_initial_image_assets(
|
|
|
|
|
|
placeholders: list[dict[str, Any]],
|
|
|
|
|
|
provider: str,
|
|
|
|
|
|
style: str,
|
|
|
|
|
|
size: str,
|
|
|
|
|
|
now_iso: str,
|
|
|
|
|
|
) -> list[dict[str, Any]]:
|
|
|
|
|
|
return [
|
|
|
|
|
|
{
|
|
|
|
|
|
"index": item["index"],
|
|
|
|
|
|
"placeholder": item["placeholder"],
|
|
|
|
|
|
"description": item["description"],
|
|
|
|
|
|
"prompt": None,
|
|
|
|
|
|
"url": None,
|
2026-03-11 11:27:32 +08:00
|
|
|
|
"status": IMAGE_STATUS_PENDING,
|
2026-03-10 15:59:36 +08:00
|
|
|
|
"provider": provider,
|
|
|
|
|
|
"style": style,
|
|
|
|
|
|
"size": size,
|
|
|
|
|
|
"error": None,
|
|
|
|
|
|
"created_at": now_iso,
|
|
|
|
|
|
"updated_at": now_iso,
|
|
|
|
|
|
}
|
|
|
|
|
|
for item in placeholders
|
|
|
|
|
|
]
|
2026-03-13 11:12:10 +08:00
|
|
|
|
|
|
|
|
|
|
|
2026-03-19 11:18:58 +08:00
|
|
|
|
def parse_narrative_json(raw: str) -> list[dict[str, Any]]:
|
|
|
|
|
|
"""
|
2026-03-20 10:30:07 +08:00
|
|
|
|
解析 LLM 输出的 JSON 叙事(paragraphs)。
|
|
|
|
|
|
不根据 image_description 生成配图占位;插图由 story/chapter 结构化流程单独处理。
|
2026-03-19 11:18:58 +08:00
|
|
|
|
"""
|
2026-03-20 10:30:07 +08:00
|
|
|
|
if not raw or not str(raw).strip():
|
2026-03-19 11:18:58 +08:00
|
|
|
|
return []
|
|
|
|
|
|
try:
|
|
|
|
|
|
payload = extract_json_payload(raw)
|
|
|
|
|
|
data = json.loads(payload)
|
|
|
|
|
|
paragraphs = data.get("paragraphs") or []
|
|
|
|
|
|
if not isinstance(paragraphs, list):
|
|
|
|
|
|
return []
|
|
|
|
|
|
except (json.JSONDecodeError, TypeError, AttributeError):
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
result: list[dict[str, Any]] = []
|
2026-03-20 10:30:07 +08:00
|
|
|
|
for p in paragraphs:
|
2026-03-19 11:18:58 +08:00
|
|
|
|
if not isinstance(p, dict):
|
|
|
|
|
|
continue
|
|
|
|
|
|
content = (p.get("content") or "").strip()
|
2026-03-20 10:30:07 +08:00
|
|
|
|
if content:
|
|
|
|
|
|
result.append({"content": content, "placeholder_info": None})
|
2026-03-19 11:18:58 +08:00
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-03-20 10:30:07 +08:00
|
|
|
|
def split_plain_narrative_into_sections(narrative: str) -> list[dict[str, Any]]:
|
|
|
|
|
|
"""非 JSON 叙事:去掉遗留占位符后按空行拆段,不产生段落配图。"""
|
2026-03-22 16:45:57 +08:00
|
|
|
|
text = strip_image_placeholders(narrative or "")
|
2026-03-20 10:30:07 +08:00
|
|
|
|
if not text.strip():
|
|
|
|
|
|
return []
|
|
|
|
|
|
parts = [p.strip() for p in text.split("\n\n") if p.strip()]
|
|
|
|
|
|
return [{"content": p, "placeholder_info": None} for p in parts]
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-03-19 11:18:58 +08:00
|
|
|
|
def parse_narrative_to_sections(narrative: str) -> list[dict[str, Any]]:
|
|
|
|
|
|
"""
|
2026-03-20 10:30:07 +08:00
|
|
|
|
将 narrative 解析为 sections。
|
|
|
|
|
|
JSON(paragraphs)走 parse_narrative_json;否则剥离占位符后按段拆分。
|
2026-03-19 11:18:58 +08:00
|
|
|
|
"""
|
2026-03-20 10:30:07 +08:00
|
|
|
|
if not narrative or not str(narrative).strip():
|
2026-03-19 11:18:58 +08:00
|
|
|
|
return []
|
|
|
|
|
|
stripped = narrative.strip()
|
|
|
|
|
|
if stripped.startswith("{") and "paragraphs" in stripped:
|
|
|
|
|
|
segments = parse_narrative_json(narrative)
|
|
|
|
|
|
if segments:
|
|
|
|
|
|
return segments
|
2026-03-20 10:30:07 +08:00
|
|
|
|
return split_plain_narrative_into_sections(narrative)
|