diff --git a/api/app/agents/memoir/narrative_agent.py b/api/app/agents/memoir/narrative_agent.py index 0c90c8c..0f68cf0 100644 --- a/api/app/agents/memoir/narrative_agent.py +++ b/api/app/agents/memoir/narrative_agent.py @@ -10,7 +10,7 @@ from app.core.logging import get_logger from app.agents.memoir.prompts import ( get_creative_title_prompt, - get_narrative_prompt, + get_narrative_json_prompt, ) logger = get_logger(__name__) @@ -61,7 +61,7 @@ class NarrativeAgent: return f"{existing_content}\n\n{new_content}" return new_content try: - prompt = get_narrative_prompt( + prompt = get_narrative_json_prompt( stage=stage, slots=slots, new_content=new_content, diff --git a/api/app/agents/memoir/processor.py b/api/app/agents/memoir/processor.py index e9f0e84..f81e751 100644 --- a/api/app/agents/memoir/processor.py +++ b/api/app/agents/memoir/processor.py @@ -15,7 +15,7 @@ from app.core.task_tracker import task_tracker from app.agents.state_schema import MemoirStateSchema from app.agents.memoir.prompts import ( get_creative_title_prompt, - get_narrative_prompt, + get_narrative_json_prompt, get_state_extraction_prompt, ) @@ -141,7 +141,7 @@ class MemoirGenerator: return f"{existing_content}\n\n{new_content}" return new_content try: - prompt = get_narrative_prompt( + prompt = get_narrative_json_prompt( stage=stage, slots=slots, new_content=new_content, diff --git a/api/app/agents/memoir/prompts.py b/api/app/agents/memoir/prompts.py index a451e2e..6eff10e 100644 --- a/api/app/agents/memoir/prompts.py +++ b/api/app/agents/memoir/prompts.py @@ -349,3 +349,53 @@ def get_narrative_prompt( 只输出新对话内容的改写结果(包含图片占位符)。如果对话中没有值得记录的人生经历内容,输出空字符串。 """ + + +def get_narrative_json_prompt( + stage: str, + slots: dict, + new_content: str, + existing_content: str = "", + user_profile: str = "", + birth_year: Optional[int] = None, +) -> str: + """将新对话改写为叙述,输出 JSON 格式(paragraphs: [{content, image_description}])""" + context_tail = "" + if existing_content: + context_tail = existing_content[-300:] if len(existing_content) > 300 else existing_content + context_section = f"\n\n【衔接上下文(已有内容的末尾,仅供参考衔接,不要重复)】:\n{context_tail}" if context_tail else "" + profile_section = f"\n\n用户基本信息:\n{user_profile}" if user_profile else "" + age_hint = _build_age_hint(stage, birth_year) + time_section = f"\n时间参考:{age_hint}" if age_hint else "" + + return f"""{get_system_prompt()} + +请将以下新的对话内容改写为第一人称文学叙述,并输出 **纯 JSON**,不要包含任何其他文字或 markdown 代码块。 + +阶段:{stage} +可用信息:{slots}{profile_section}{time_section} + +新的对话内容: +{new_content} +{context_section} + +## 要求 +1. 从对话中提炼与人生经历相关的核心内容,过滤语气词、寒暄、与AI的交互 +2. 使用第一人称,改写为流畅的书面叙述,不要直接引用对话原话 +3. 只输出新内容的改写,不要重复已有内容 +4. 每 200-300 字左右一个段落,每个段落配一张图 +5. 如有衔接上下文,确保新内容与之自然衔接 + +## 输出格式(严格 JSON) +{{ + "paragraphs": [ + {{"content": "段落正文", "image_description": "该段配图的场景描述,具体有画面感"}}, + ... + ] +}} + +- content: 本段纯正文,不含占位符 +- image_description: 该段配图的场景描述,具体、有画面感,便于生成图片。示例:南方小镇的青石板路,两旁是白墙黑瓦的老房子 + +如果对话中没有值得记录的人生经历内容,输出:{{"paragraphs": []}} +""" diff --git a/api/app/features/memoir/memoir_images/parser.py b/api/app/features/memoir/memoir_images/parser.py index e6db4e5..1fa316a 100644 --- a/api/app/features/memoir/memoir_images/parser.py +++ b/api/app/features/memoir/memoir_images/parser.py @@ -1,6 +1,8 @@ +import json import re from typing import Any +from .json_payload import extract_json_payload from .schema import IMAGE_STATUS_PENDING PLACEHOLDER_RE = re.compile( @@ -82,3 +84,52 @@ def split_narrative_to_sections(narrative: str) -> list[dict[str, Any]]: content = content.strip() sections.append({"content": content or "", "placeholder_info": placeholder_info}) return sections + + +def parse_narrative_json(raw: str) -> list[dict[str, Any]]: + """ + 解析 LLM 输出的 JSON 格式叙事。 + 返回与 split_narrative_to_sections 相同结构:list[dict],每项含 content、placeholder_info。 + """ + if not (raw or raw.strip()): + return [] + try: + payload = extract_json_payload(raw) + data = json.loads(payload) + paragraphs = data.get("paragraphs") or [] + if not isinstance(paragraphs, list): + return [] + except (json.JSONDecodeError, TypeError, AttributeError): + return [] + + result: list[dict[str, Any]] = [] + for i, p in enumerate(paragraphs): + if not isinstance(p, dict): + continue + content = (p.get("content") or "").strip() + desc = (p.get("image_description") or "").strip() + placeholder_info = None + if desc: + placeholder_info = { + "placeholder": f"{{{{IMAGE:{desc}}}}}", + "description": desc, + "index": i, + "start_offset": 0, + } + result.append({"content": content, "placeholder_info": placeholder_info}) + return result + + +def parse_narrative_to_sections(narrative: str) -> list[dict[str, Any]]: + """ + 将 narrative 解析为 sections。优先尝试 JSON 格式,失败则回退到占位符解析。 + 返回与 split_narrative_to_sections 相同结构。 + """ + if not (narrative or narrative.strip()): + return [] + stripped = narrative.strip() + if stripped.startswith("{") and "paragraphs" in stripped: + segments = parse_narrative_json(narrative) + if segments: + return segments + return split_narrative_to_sections(narrative) diff --git a/api/app/tasks/memoir_tasks.py b/api/app/tasks/memoir_tasks.py index 6f02936..ef3b0bb 100644 --- a/api/app/tasks/memoir_tasks.py +++ b/api/app/tasks/memoir_tasks.py @@ -28,7 +28,7 @@ from app.core.dependencies import get_llm_provider from app.agents.state_schema import MemoirStateSchema, SlotData, default_state from app.agents.memoir.prompts import ( STAGE_TO_ORDER, - get_narrative_prompt, + get_narrative_json_prompt, inject_image_placeholder_template, ) from app.agents.memoir import MemoirOrchestrator @@ -38,6 +38,7 @@ from app.agents.chat.prompts_profile import format_user_profile_context from app.features.memoir.memoir_images.parser import ( build_initial_image_assets, parse_image_placeholders, + parse_narrative_to_sections, split_narrative_to_sections, ) import hashlib @@ -67,6 +68,14 @@ logger = get_logger(__name__) _REDIS_CLIENTS: dict[bool, redis.Redis] = {} +def _is_json_narrative(text: str) -> bool: + """检测 narrative 是否为 JSON 格式(paragraphs 结构)""" + if not text or not text.strip(): + return False + s = text.strip() + return s.startswith("{") and "paragraphs" in s + + def _get_llm(): """Celery 任务内获取 LangChain LLM(通过 port)""" try: @@ -328,7 +337,7 @@ def _save_narrative_to_sections(db: Session, chapter, narrative: str, title: str img_settings = MemoirImageSettings.from_env() prompt_service = MemoirImagePromptService(llm=None, settings=img_settings) if img_settings.enabled else None - segments = split_narrative_to_sections(narrative_to_parse) + segments = parse_narrative_to_sections(narrative_to_parse) if not segments: sec = ChapterSection( id=str(uuid.uuid4()), @@ -368,8 +377,11 @@ def _save_narrative_to_sections(db: Session, chapter, narrative: str, title: str chapter.source_segments = list(set((chapter.source_segments or []) + (source_segments or []))) return chapter - # 每 3 个 section 对应 1 张图片,其他 section 的 image_id 为空 - def _should_have_image(order_idx: int) -> bool: + def _should_have_image(seg: dict, order_idx: int) -> bool: + """有 placeholder_info 的段落配图;无则兼容旧格式(每 3 段 1 图)""" + ph = seg.get("placeholder_info") + if ph and ph.get("description"): + return True return (order_idx % 3) == 2 def _placeholder_for_segment(seg: dict, order_idx: int) -> dict | None: @@ -385,7 +397,7 @@ def _save_narrative_to_sections(db: Session, chapter, narrative: str, title: str order_idx = order_base + i content = (seg.get("content") or "").strip() image_asset = None - if img_settings.enabled and _should_have_image(order_idx): + if img_settings.enabled and _should_have_image(seg, order_idx): ph = _placeholder_for_segment(seg, order_idx) style = prompt_service.CATEGORY_STYLE_MAP.get(category, img_settings.default_style) if prompt_service else img_settings.default_style image_asset = build_initial_image_assets( @@ -640,12 +652,14 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]): birth_year=birth_year, llm=llm, ) - if existing_content: + if _is_json_narrative(new_narrative): + narrative = new_narrative + elif existing_content: narrative = f"{existing_content}\n\n{new_narrative}" else: narrative = new_narrative - if existing_content and len(narrative) < len(existing_content) * 0.8: + if existing_content and not _is_json_narrative(narrative) and len(narrative) < len(existing_content) * 0.8: logger.warning( "内容长度异常: existing=%d, new=%d, category=%s. 回退为追加模式", len(existing_content), @@ -654,7 +668,8 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]): ) narrative = f"{existing_content}\n\n{combined_text}" - narrative = inject_placeholders(narrative) + if not _is_json_narrative(narrative): + narrative = inject_placeholders(narrative) calculated_order_index = STAGE_TO_ORDER.get(chapter_category, 999) chapter = _save_narrative_to_sections( @@ -777,7 +792,7 @@ def generate_chapter_content(self, user_id: str, stage: str, new_content: str): ) if llm: - prompt = get_narrative_prompt( + prompt = get_narrative_json_prompt( stage=stage, slots={}, new_content=new_content, @@ -785,24 +800,25 @@ def generate_chapter_content(self, user_id: str, stage: str, new_content: str): ) response = llm.invoke(prompt) new_narrative = response.content.strip() - # 追加而非替换 - if existing_content: + if _is_json_narrative(new_narrative): + narrative = new_narrative + elif existing_content: narrative = f"{existing_content}\n\n{new_narrative}" else: narrative = new_narrative else: narrative = f"{existing_content}\n\n{new_content}" if existing_content else new_content - # 安全检查:新内容不应比旧内容短 - if existing_content and len(narrative) < len(existing_content) * 0.8: + # 安全检查:新内容不应比旧内容短(仅非 JSON 格式) + if existing_content and not _is_json_narrative(narrative) and len(narrative) < len(existing_content) * 0.8: logger.warning( f"内容长度异常: existing={len(existing_content)}, " f"new={len(narrative)}, stage={stage}. 回退为追加模式" ) narrative = f"{existing_content}\n\n{new_content}" - # 入库前:占位符位置用正则匹配后拼上固定模板 - narrative = inject_image_placeholder_template(narrative) + if not _is_json_narrative(narrative): + narrative = inject_image_placeholder_template(narrative) calculated_order_index = STAGE_TO_ORDER.get(stage, 999) title = chapter.title if chapter else f"{stage} 回忆" chapter = _save_narrative_to_sections( diff --git a/api/tests/test_memoir_image_parser.py b/api/tests/test_memoir_image_parser.py index b42e3df..38ae532 100644 --- a/api/tests/test_memoir_image_parser.py +++ b/api/tests/test_memoir_image_parser.py @@ -3,6 +3,8 @@ import unittest from app.features.memoir.memoir_images.parser import ( build_initial_image_assets, parse_image_placeholders, + parse_narrative_json, + parse_narrative_to_sections, ) @@ -52,3 +54,25 @@ class MemoirImageParserTest(unittest.TestCase): self.assertEqual(len(items), 1) self.assertEqual(items[0]["placeholder"], "{{IMAGE:1938年初的上海弄堂口,冬日萧瑟}}") self.assertEqual(items[0]["description"], "1938年初的上海弄堂口,冬日萧瑟") + + def test_parse_narrative_json_returns_sections_with_content_and_placeholder_info(self): + raw = '{"paragraphs": [{"content": "那年春天。", "image_description": "南方小镇的青石板路"}, {"content": "奶奶坐在藤椅上。", "image_description": "奶奶的藤椅"}]}' + segments = parse_narrative_json(raw) + self.assertEqual(len(segments), 2) + self.assertEqual(segments[0]["content"], "那年春天。") + self.assertEqual(segments[0]["placeholder_info"]["description"], "南方小镇的青石板路") + self.assertEqual(segments[1]["content"], "奶奶坐在藤椅上。") + self.assertEqual(segments[1]["placeholder_info"]["description"], "奶奶的藤椅") + + def test_parse_narrative_to_sections_prefers_json_then_fallback_to_placeholder(self): + json_raw = '{"paragraphs": [{"content": "段落一", "image_description": "图一"}]}' + segments = parse_narrative_to_sections(json_raw) + self.assertEqual(len(segments), 1) + self.assertEqual(segments[0]["content"], "段落一") + self.assertEqual(segments[0]["placeholder_info"]["description"], "图一") + + placeholder_raw = "正文。\n\n{{{{IMAGE:描述}}}}\n\n结尾。" + segments2 = parse_narrative_to_sections(placeholder_raw) + self.assertEqual(len(segments2), 2) + self.assertIn("正文", segments2[0]["content"]) + self.assertEqual(segments2[0]["placeholder_info"]["description"], "描述")