fix: 去除LLM直接生成图片占位符逻辑
This commit is contained in:
@@ -10,7 +10,7 @@ from app.core.logging import get_logger
|
||||
|
||||
from app.agents.memoir.prompts import (
|
||||
get_creative_title_prompt,
|
||||
get_narrative_prompt,
|
||||
get_narrative_json_prompt,
|
||||
)
|
||||
|
||||
logger = get_logger(__name__)
|
||||
@@ -61,7 +61,7 @@ class NarrativeAgent:
|
||||
return f"{existing_content}\n\n{new_content}"
|
||||
return new_content
|
||||
try:
|
||||
prompt = get_narrative_prompt(
|
||||
prompt = get_narrative_json_prompt(
|
||||
stage=stage,
|
||||
slots=slots,
|
||||
new_content=new_content,
|
||||
|
||||
@@ -15,7 +15,7 @@ from app.core.task_tracker import task_tracker
|
||||
from app.agents.state_schema import MemoirStateSchema
|
||||
from app.agents.memoir.prompts import (
|
||||
get_creative_title_prompt,
|
||||
get_narrative_prompt,
|
||||
get_narrative_json_prompt,
|
||||
get_state_extraction_prompt,
|
||||
)
|
||||
|
||||
@@ -141,7 +141,7 @@ class MemoirGenerator:
|
||||
return f"{existing_content}\n\n{new_content}"
|
||||
return new_content
|
||||
try:
|
||||
prompt = get_narrative_prompt(
|
||||
prompt = get_narrative_json_prompt(
|
||||
stage=stage,
|
||||
slots=slots,
|
||||
new_content=new_content,
|
||||
|
||||
@@ -349,3 +349,53 @@ def get_narrative_prompt(
|
||||
|
||||
只输出新对话内容的改写结果(包含图片占位符)。如果对话中没有值得记录的人生经历内容,输出空字符串。
|
||||
"""
|
||||
|
||||
|
||||
def get_narrative_json_prompt(
|
||||
stage: str,
|
||||
slots: dict,
|
||||
new_content: str,
|
||||
existing_content: str = "",
|
||||
user_profile: str = "",
|
||||
birth_year: Optional[int] = None,
|
||||
) -> str:
|
||||
"""将新对话改写为叙述,输出 JSON 格式(paragraphs: [{content, image_description}])"""
|
||||
context_tail = ""
|
||||
if existing_content:
|
||||
context_tail = existing_content[-300:] if len(existing_content) > 300 else existing_content
|
||||
context_section = f"\n\n【衔接上下文(已有内容的末尾,仅供参考衔接,不要重复)】:\n{context_tail}" if context_tail else ""
|
||||
profile_section = f"\n\n用户基本信息:\n{user_profile}" if user_profile else ""
|
||||
age_hint = _build_age_hint(stage, birth_year)
|
||||
time_section = f"\n时间参考:{age_hint}" if age_hint else ""
|
||||
|
||||
return f"""{get_system_prompt()}
|
||||
|
||||
请将以下新的对话内容改写为第一人称文学叙述,并输出 **纯 JSON**,不要包含任何其他文字或 markdown 代码块。
|
||||
|
||||
阶段:{stage}
|
||||
可用信息:{slots}{profile_section}{time_section}
|
||||
|
||||
新的对话内容:
|
||||
{new_content}
|
||||
{context_section}
|
||||
|
||||
## 要求
|
||||
1. 从对话中提炼与人生经历相关的核心内容,过滤语气词、寒暄、与AI的交互
|
||||
2. 使用第一人称,改写为流畅的书面叙述,不要直接引用对话原话
|
||||
3. 只输出新内容的改写,不要重复已有内容
|
||||
4. 每 200-300 字左右一个段落,每个段落配一张图
|
||||
5. 如有衔接上下文,确保新内容与之自然衔接
|
||||
|
||||
## 输出格式(严格 JSON)
|
||||
{{
|
||||
"paragraphs": [
|
||||
{{"content": "段落正文", "image_description": "该段配图的场景描述,具体有画面感"}},
|
||||
...
|
||||
]
|
||||
}}
|
||||
|
||||
- content: 本段纯正文,不含占位符
|
||||
- image_description: 该段配图的场景描述,具体、有画面感,便于生成图片。示例:南方小镇的青石板路,两旁是白墙黑瓦的老房子
|
||||
|
||||
如果对话中没有值得记录的人生经历内容,输出:{{"paragraphs": []}}
|
||||
"""
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import json
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
from .json_payload import extract_json_payload
|
||||
from .schema import IMAGE_STATUS_PENDING
|
||||
|
||||
PLACEHOLDER_RE = re.compile(
|
||||
@@ -82,3 +84,52 @@ def split_narrative_to_sections(narrative: str) -> list[dict[str, Any]]:
|
||||
content = content.strip()
|
||||
sections.append({"content": content or "", "placeholder_info": placeholder_info})
|
||||
return sections
|
||||
|
||||
|
||||
def parse_narrative_json(raw: str) -> list[dict[str, Any]]:
|
||||
"""
|
||||
解析 LLM 输出的 JSON 格式叙事。
|
||||
返回与 split_narrative_to_sections 相同结构:list[dict],每项含 content、placeholder_info。
|
||||
"""
|
||||
if not (raw or raw.strip()):
|
||||
return []
|
||||
try:
|
||||
payload = extract_json_payload(raw)
|
||||
data = json.loads(payload)
|
||||
paragraphs = data.get("paragraphs") or []
|
||||
if not isinstance(paragraphs, list):
|
||||
return []
|
||||
except (json.JSONDecodeError, TypeError, AttributeError):
|
||||
return []
|
||||
|
||||
result: list[dict[str, Any]] = []
|
||||
for i, p in enumerate(paragraphs):
|
||||
if not isinstance(p, dict):
|
||||
continue
|
||||
content = (p.get("content") or "").strip()
|
||||
desc = (p.get("image_description") or "").strip()
|
||||
placeholder_info = None
|
||||
if desc:
|
||||
placeholder_info = {
|
||||
"placeholder": f"{{{{IMAGE:{desc}}}}}",
|
||||
"description": desc,
|
||||
"index": i,
|
||||
"start_offset": 0,
|
||||
}
|
||||
result.append({"content": content, "placeholder_info": placeholder_info})
|
||||
return result
|
||||
|
||||
|
||||
def parse_narrative_to_sections(narrative: str) -> list[dict[str, Any]]:
|
||||
"""
|
||||
将 narrative 解析为 sections。优先尝试 JSON 格式,失败则回退到占位符解析。
|
||||
返回与 split_narrative_to_sections 相同结构。
|
||||
"""
|
||||
if not (narrative or narrative.strip()):
|
||||
return []
|
||||
stripped = narrative.strip()
|
||||
if stripped.startswith("{") and "paragraphs" in stripped:
|
||||
segments = parse_narrative_json(narrative)
|
||||
if segments:
|
||||
return segments
|
||||
return split_narrative_to_sections(narrative)
|
||||
|
||||
@@ -28,7 +28,7 @@ from app.core.dependencies import get_llm_provider
|
||||
from app.agents.state_schema import MemoirStateSchema, SlotData, default_state
|
||||
from app.agents.memoir.prompts import (
|
||||
STAGE_TO_ORDER,
|
||||
get_narrative_prompt,
|
||||
get_narrative_json_prompt,
|
||||
inject_image_placeholder_template,
|
||||
)
|
||||
from app.agents.memoir import MemoirOrchestrator
|
||||
@@ -38,6 +38,7 @@ from app.agents.chat.prompts_profile import format_user_profile_context
|
||||
from app.features.memoir.memoir_images.parser import (
|
||||
build_initial_image_assets,
|
||||
parse_image_placeholders,
|
||||
parse_narrative_to_sections,
|
||||
split_narrative_to_sections,
|
||||
)
|
||||
import hashlib
|
||||
@@ -67,6 +68,14 @@ logger = get_logger(__name__)
|
||||
_REDIS_CLIENTS: dict[bool, redis.Redis] = {}
|
||||
|
||||
|
||||
def _is_json_narrative(text: str) -> bool:
|
||||
"""检测 narrative 是否为 JSON 格式(paragraphs 结构)"""
|
||||
if not text or not text.strip():
|
||||
return False
|
||||
s = text.strip()
|
||||
return s.startswith("{") and "paragraphs" in s
|
||||
|
||||
|
||||
def _get_llm():
|
||||
"""Celery 任务内获取 LangChain LLM(通过 port)"""
|
||||
try:
|
||||
@@ -328,7 +337,7 @@ def _save_narrative_to_sections(db: Session, chapter, narrative: str, title: str
|
||||
img_settings = MemoirImageSettings.from_env()
|
||||
prompt_service = MemoirImagePromptService(llm=None, settings=img_settings) if img_settings.enabled else None
|
||||
|
||||
segments = split_narrative_to_sections(narrative_to_parse)
|
||||
segments = parse_narrative_to_sections(narrative_to_parse)
|
||||
if not segments:
|
||||
sec = ChapterSection(
|
||||
id=str(uuid.uuid4()),
|
||||
@@ -368,8 +377,11 @@ def _save_narrative_to_sections(db: Session, chapter, narrative: str, title: str
|
||||
chapter.source_segments = list(set((chapter.source_segments or []) + (source_segments or [])))
|
||||
return chapter
|
||||
|
||||
# 每 3 个 section 对应 1 张图片,其他 section 的 image_id 为空
|
||||
def _should_have_image(order_idx: int) -> bool:
|
||||
def _should_have_image(seg: dict, order_idx: int) -> bool:
|
||||
"""有 placeholder_info 的段落配图;无则兼容旧格式(每 3 段 1 图)"""
|
||||
ph = seg.get("placeholder_info")
|
||||
if ph and ph.get("description"):
|
||||
return True
|
||||
return (order_idx % 3) == 2
|
||||
|
||||
def _placeholder_for_segment(seg: dict, order_idx: int) -> dict | None:
|
||||
@@ -385,7 +397,7 @@ def _save_narrative_to_sections(db: Session, chapter, narrative: str, title: str
|
||||
order_idx = order_base + i
|
||||
content = (seg.get("content") or "").strip()
|
||||
image_asset = None
|
||||
if img_settings.enabled and _should_have_image(order_idx):
|
||||
if img_settings.enabled and _should_have_image(seg, order_idx):
|
||||
ph = _placeholder_for_segment(seg, order_idx)
|
||||
style = prompt_service.CATEGORY_STYLE_MAP.get(category, img_settings.default_style) if prompt_service else img_settings.default_style
|
||||
image_asset = build_initial_image_assets(
|
||||
@@ -640,12 +652,14 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]):
|
||||
birth_year=birth_year,
|
||||
llm=llm,
|
||||
)
|
||||
if existing_content:
|
||||
if _is_json_narrative(new_narrative):
|
||||
narrative = new_narrative
|
||||
elif existing_content:
|
||||
narrative = f"{existing_content}\n\n{new_narrative}"
|
||||
else:
|
||||
narrative = new_narrative
|
||||
|
||||
if existing_content and len(narrative) < len(existing_content) * 0.8:
|
||||
if existing_content and not _is_json_narrative(narrative) and len(narrative) < len(existing_content) * 0.8:
|
||||
logger.warning(
|
||||
"内容长度异常: existing=%d, new=%d, category=%s. 回退为追加模式",
|
||||
len(existing_content),
|
||||
@@ -654,7 +668,8 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]):
|
||||
)
|
||||
narrative = f"{existing_content}\n\n{combined_text}"
|
||||
|
||||
narrative = inject_placeholders(narrative)
|
||||
if not _is_json_narrative(narrative):
|
||||
narrative = inject_placeholders(narrative)
|
||||
calculated_order_index = STAGE_TO_ORDER.get(chapter_category, 999)
|
||||
|
||||
chapter = _save_narrative_to_sections(
|
||||
@@ -777,7 +792,7 @@ def generate_chapter_content(self, user_id: str, stage: str, new_content: str):
|
||||
)
|
||||
|
||||
if llm:
|
||||
prompt = get_narrative_prompt(
|
||||
prompt = get_narrative_json_prompt(
|
||||
stage=stage,
|
||||
slots={},
|
||||
new_content=new_content,
|
||||
@@ -785,24 +800,25 @@ def generate_chapter_content(self, user_id: str, stage: str, new_content: str):
|
||||
)
|
||||
response = llm.invoke(prompt)
|
||||
new_narrative = response.content.strip()
|
||||
# 追加而非替换
|
||||
if existing_content:
|
||||
if _is_json_narrative(new_narrative):
|
||||
narrative = new_narrative
|
||||
elif existing_content:
|
||||
narrative = f"{existing_content}\n\n{new_narrative}"
|
||||
else:
|
||||
narrative = new_narrative
|
||||
else:
|
||||
narrative = f"{existing_content}\n\n{new_content}" if existing_content else new_content
|
||||
|
||||
# 安全检查:新内容不应比旧内容短
|
||||
if existing_content and len(narrative) < len(existing_content) * 0.8:
|
||||
# 安全检查:新内容不应比旧内容短(仅非 JSON 格式)
|
||||
if existing_content and not _is_json_narrative(narrative) and len(narrative) < len(existing_content) * 0.8:
|
||||
logger.warning(
|
||||
f"内容长度异常: existing={len(existing_content)}, "
|
||||
f"new={len(narrative)}, stage={stage}. 回退为追加模式"
|
||||
)
|
||||
narrative = f"{existing_content}\n\n{new_content}"
|
||||
|
||||
# 入库前:占位符位置用正则匹配后拼上固定模板
|
||||
narrative = inject_image_placeholder_template(narrative)
|
||||
if not _is_json_narrative(narrative):
|
||||
narrative = inject_image_placeholder_template(narrative)
|
||||
calculated_order_index = STAGE_TO_ORDER.get(stage, 999)
|
||||
title = chapter.title if chapter else f"{stage} 回忆"
|
||||
chapter = _save_narrative_to_sections(
|
||||
|
||||
@@ -3,6 +3,8 @@ import unittest
|
||||
from app.features.memoir.memoir_images.parser import (
|
||||
build_initial_image_assets,
|
||||
parse_image_placeholders,
|
||||
parse_narrative_json,
|
||||
parse_narrative_to_sections,
|
||||
)
|
||||
|
||||
|
||||
@@ -52,3 +54,25 @@ class MemoirImageParserTest(unittest.TestCase):
|
||||
self.assertEqual(len(items), 1)
|
||||
self.assertEqual(items[0]["placeholder"], "{{IMAGE:1938年初的上海弄堂口,冬日萧瑟}}")
|
||||
self.assertEqual(items[0]["description"], "1938年初的上海弄堂口,冬日萧瑟")
|
||||
|
||||
def test_parse_narrative_json_returns_sections_with_content_and_placeholder_info(self):
|
||||
raw = '{"paragraphs": [{"content": "那年春天。", "image_description": "南方小镇的青石板路"}, {"content": "奶奶坐在藤椅上。", "image_description": "奶奶的藤椅"}]}'
|
||||
segments = parse_narrative_json(raw)
|
||||
self.assertEqual(len(segments), 2)
|
||||
self.assertEqual(segments[0]["content"], "那年春天。")
|
||||
self.assertEqual(segments[0]["placeholder_info"]["description"], "南方小镇的青石板路")
|
||||
self.assertEqual(segments[1]["content"], "奶奶坐在藤椅上。")
|
||||
self.assertEqual(segments[1]["placeholder_info"]["description"], "奶奶的藤椅")
|
||||
|
||||
def test_parse_narrative_to_sections_prefers_json_then_fallback_to_placeholder(self):
|
||||
json_raw = '{"paragraphs": [{"content": "段落一", "image_description": "图一"}]}'
|
||||
segments = parse_narrative_to_sections(json_raw)
|
||||
self.assertEqual(len(segments), 1)
|
||||
self.assertEqual(segments[0]["content"], "段落一")
|
||||
self.assertEqual(segments[0]["placeholder_info"]["description"], "图一")
|
||||
|
||||
placeholder_raw = "正文。\n\n{{{{IMAGE:描述}}}}\n\n结尾。"
|
||||
segments2 = parse_narrative_to_sections(placeholder_raw)
|
||||
self.assertEqual(len(segments2), 2)
|
||||
self.assertIn("正文", segments2[0]["content"])
|
||||
self.assertEqual(segments2[0]["placeholder_info"]["description"], "描述")
|
||||
|
||||
Reference in New Issue
Block a user