fix: 去除LLM直接生成图片占位符逻辑

2026-03-19 11:18:58 +08:00
parent 67fb5d2cb6
commit f3629efec3
6 changed files with 160 additions and 19 deletions
--- a/api/app/features/memoir/memoir_images/parser.py
+++ b/api/app/features/memoir/memoir_images/parser.py
@@ -1,6 +1,8 @@
+import json
 import re
 from typing import Any

+from .json_payload import extract_json_payload
 from .schema import IMAGE_STATUS_PENDING

 PLACEHOLDER_RE = re.compile(
@@ -82,3 +84,52 @@ def split_narrative_to_sections(narrative: str) -> list[dict[str, Any]]:
            content = content.strip()
        sections.append({"content": content or "", "placeholder_info": placeholder_info})
    return sections
+
+
+def parse_narrative_json(raw: str) -> list[dict[str, Any]]:
+    """
+    解析 LLM 输出的 JSON 格式叙事。
+    返回与 split_narrative_to_sections 相同结构：list[dict]，每项含 content、placeholder_info。
+    """
+    if not (raw or raw.strip()):
+        return []
+    try:
+        payload = extract_json_payload(raw)
+        data = json.loads(payload)
+        paragraphs = data.get("paragraphs") or []
+        if not isinstance(paragraphs, list):
+            return []
+    except (json.JSONDecodeError, TypeError, AttributeError):
+        return []
+
+    result: list[dict[str, Any]] = []
+    for i, p in enumerate(paragraphs):
+        if not isinstance(p, dict):
+            continue
+        content = (p.get("content") or "").strip()
+        desc = (p.get("image_description") or "").strip()
+        placeholder_info = None
+        if desc:
+            placeholder_info = {
+                "placeholder": f"{{{{IMAGE:{desc}}}}}",
+                "description": desc,
+                "index": i,
+                "start_offset": 0,
+            }
+        result.append({"content": content, "placeholder_info": placeholder_info})
+    return result
+
+
+def parse_narrative_to_sections(narrative: str) -> list[dict[str, Any]]:
+    """
+    将 narrative 解析为 sections。优先尝试 JSON 格式，失败则回退到占位符解析。
+    返回与 split_narrative_to_sections 相同结构。
+    """
+    if not (narrative or narrative.strip()):
+        return []
+    stripped = narrative.strip()
+    if stripped.startswith("{") and "paragraphs" in stripped:
+        segments = parse_narrative_json(narrative)
+        if segments:
+            return segments
+    return split_narrative_to_sections(narrative)