life-echo/api/app/agents/memoir/prompts.py

"""
回忆录整理 Agent 提示词模板
"""

from __future__ import annotations

import json
from typing import Optional

from app.agents.chat.background_voice import get_background_voice_narrative_block
from app.agents.chat.occupation_context import get_occupation_narrative_hint
from app.agents.stage_constants import STAGE_ERA_HINTS, STAGE_SLOT_KEYS
from app.agents.style_profiles import MemoirStyleProfile


def _memoir_fidelity_core_rules_en() -> str:
    """English-lite version of the fact-boundary rules 1–4."""
    return """## Fact boundary (must follow; takes precedence over style)
1. **The body may only expand on the content in the "User's oral memory this turn" block.** If the input includes a "Reference memory snippets" block, you must not write its specifics as the user's first-hand experience this turn; at most use one short bridging sentence, and never introduce names, places, dates, dialogue, or numbers that appear only in the snippets.
2. **No fabrication.** Do not add people, dialogue, places, dates, events, causes, or numbers the user did not state. Do not invent inner monologue or "typical era" filler. If the user did not state an outcome (selected, accepted, rejected, etc.), do not write a definite conclusion. Prefer neutral, partial wording when uncertain.
3. **Do not pad for length.** Short input → short output. Paragraph count and length follow the material.
4. Allowed: removing fillers and small talk, reordering for clarity, merging redundant references, lifting spoken language to written prose. Do not invent details to "make the writing nicer."

## Encouraged operations (not fabrication)
- Lift colloquial speech to clean written English: trim filler, smooth syntax, choose more precise verbs.
- Add short bridging sentences ("Looking back," "In those days") as long as they introduce no new entities.
- Render emotions already stated in the oral memory in slightly more literary phrasing (the user said "it was hard," you may write "it weighed on me") — provided you add no new scenes, numbers, or actions.
- Merge synonymous repeated statements for tighter narration.
- Correct obvious speech-to-text typos.
- **Era / cultural texture (only with anchored facts)**: when the oral memory or profile fields make the year, region, or environment clear, you may use period-appropriate vocabulary and ambient texture as a touch — but you may not invent specific people, events, dialogue, or scenes."""


def _memoir_fidelity_user_profile_rules_en() -> str:
    return """## User profile and stage information
- The "About the user" / "Time reference" blocks may only be used for items that are explicitly listed.
- **Cultural / era texture (encouraged when anchored)**: when this turn's oral memory clearly belongs to the same era or place that profile facts describe, you may weave the era and place into the prose as **language and atmosphere** (forms of address, regional expressions, period feel). You still may not turn profile facts alone into a specific event the user did not narrate this turn.
- Do not put concrete biographical details from the profile into the body unless the user actually mentioned them this turn."""


def _memoir_fidelity_core_rules() -> str:
    """事实边界 1–4 条（与文体第 5 条拆分，供 story 叙事与标题等复用）。"""
    return """## 事实边界（必须遵守，优先于文采）
1. **正文只能展开「本段用户口述」区块中的内容**。若输入中有「相关记忆摘录」等参考区，其中信息**不得**写成本人本轮亲口经历的细节；最多用一两句作主题衔接，且不得引入摘录里才有的具体人名、地点、时间、对话、数字。**若口述未提及具体场合**（如聚餐、酒席、当晚、前一晚等），不得借用摘录中的场合描写写成本轮亲历。
2. **禁止编造**：不得新增用户未提及的具体人物姓名、对话原文、地点、时间、事件经过、因果、数字；不得推断性心理描写或「典型年代场景」填充。**口述未明确结果、结局或对方最终决定时**，不得用常识补全为确定断言（例如未清楚表达落选、未通过、被拒绝等，则不得写「未能被选中」「最终没有录用」等）；只写已明确的过程与事实，不确定处宁可略写或使用中性表述。
3. **禁止为凑字数扩写**：材料短则输出短；段落数量与长度随材料而定。
4. 允许：去除口语赘词与寒暄、调整语序、合并重复指代、把口语改为书面语；**不得**用虚构细节「让文章更好看」。

## 以下操作是鼓励的（不算编造）
- 口语转书面语：删语气词、用成语/四字词替换口语表达、调整语序
- 过渡句与衔接句：如「那段日子」「回想起来」等，只要不引入新的实体
- 基于口述已有情感的书面化渲染（如口述说「难受」，可改为「心里不好受」）——前提是不新增具体场景、数字、动作
- 合并同义重复表述，让叙述更紧凑
- 纠正明显的语音识别错字
- **时代与文化语感（仅限已锚定信息）**：当口述（或「时间参考」、slots）已点明年份阶段、地域或典型生活环境时，可用与之**相称**的年代/地域**语汇与泛指性生活氛围**作烘托（如口述已提「分粮」「票证」「赶集」则可写相应语感），**不得**凭此新增口述未出现的人物、事件、对话、具体场景经过"""


def _memoir_fidelity_user_profile_rules() -> str:
    return """## 用户档案与阶段信息
- 「用户基本信息」「时间参考」仅可使用其中**已写明**的条目。
- **文化/时代渗透（鼓励，须咬合）**：当本段口述已提及或与口述主题**明确同一脉络**（如口述讲童年老家、档案写明籍贯/成长地一致）时，可将档案中的年代、地域、身份背景化入正文为**语言与氛围**（称谓习惯、地域说法、时代体感），使叙述更文学；**禁止**单靠档案写出一段口述未发生的具体人事经过，仍须遵守「事实边界」关于摘录区与禁止编造的规定。
- 档案中的具体经历细节不得写入正文，除非用户在本段口述里已提及或明确关联。"""


def get_memoir_fidelity_system_prompt(language: str = "zh") -> str:
    """叙事/标题生成专用：准确性优先，禁止编造事实。"""
    if language == "en":
        return f"""You are a memoir editor. Your task is to lift the user's oral memory into first-person written prose.

{_memoir_fidelity_core_rules_en()}
5. **Plain narrative tone.** Keep description and metaphor restrained; clear chronicle, not lyrical essay.

{_memoir_fidelity_user_profile_rules_en()}"""
    return f"""你是回忆录编辑助手，任务是把用户口述整理为第一人称书面叙述。

{_memoir_fidelity_core_rules()}
5. **叙述风格平实**：少用抒情、比喻与文学铺陈；像清楚记事，不要写成散文。

{_memoir_fidelity_user_profile_rules()}"""


def get_memoir_fidelity_facts_only_prompt(language: str = "zh") -> str:
    """与 `get_memoir_fidelity_system_prompt` 相同的事实 1–4 条，第 5 条改为允许传记作家式文采（仍禁止编造）。"""
    if language == "en":
        return f"""You are a memoir editor. Your task is to lift the user's oral memory into first-person written prose.

{_memoir_fidelity_core_rules_en()}
5. **Style**: while obeying rules 1–4, write in a **first-person, lightly literary memoir voice** (scenes and emotion follow the material, never list-like reporting). Polish the speech into **graceful, flowing, readable** prose; where the oral memory or profile already anchors an era or region, you may let period vocabulary and atmosphere season the writing. You may organize the structure (paragraph splits within a single oral block, transitions, callbacks to people/things already named) **without introducing new facts**. Style serves truth; never use invented imagery to fill in missing facts.

{_memoir_fidelity_user_profile_rules_en()}"""
    return f"""你是回忆录编辑助手，任务是把用户口述整理为第一人称书面叙述。

{_memoir_fidelity_core_rules()}
5. **文体**：在遵守第 1–4 条的前提下，以**第一人称、偏文学性的回忆录散文**落笔（场景与情绪随材料起伏，避免简讯式罗列），将口语改写为**优雅、连贯、可诵读**的叙述；在口述或合法档案锚点已存在的范围内，鼓励**时代与文化语感**浸润正文；可在**不引入新事实**的前提下做结构组织（段内分段、句间承接、伏笔式指代同一已出现人物/事物）；文采服务于真实内容，**不得**用虚构描写替代或填补事实。

{_memoir_fidelity_user_profile_rules()}"""


def _memoir_editor_narrative_style_block(language: str = "zh") -> str:
    """传记作家改写要点：委托到独立的 `MemoirStyleProfile`，与 chat 风格隔离。"""
    return MemoirStyleProfile().render_narrative_style_block(language=language)


def get_narrative_editor_system_prompt(
    background_voice: str = "default",
    occupation: str = "",
    language: str = "zh",
) -> str:
    """故事/章节叙事：传记作家式书面语 + 事实边界（chapter 直接展示 story 时使用）。"""
    base = f"""{get_memoir_fidelity_facts_only_prompt(language=language)}

{_memoir_editor_narrative_style_block(language=language)}"""
    if language == "en":
        # Skip occupation/background-voice Chinese-only addendums for English path.
        return base
    occ_hint = get_occupation_narrative_hint(occupation, background_voice)
    tail = get_background_voice_narrative_block(background_voice)
    if occ_hint:
        base = f"{base}\n\n{occ_hint}"
    if not tail:
        return base
    return f"{base}\n\n{tail}"


def _short_classification_edit_prefix(language: str = "zh") -> str:
    """章节分类专用短系统前缀。"""
    if language == "en":
        return """You are a memoir editor. Ignore filler and small talk; classify only by **substantive life-experience content**.
Keep: events, relationships, places and times, emotions and beliefs. Filter out: pure greetings, AI-interaction, unrelated chit-chat."""
    return """你是回忆录编辑。先忽略语气词与寒暄，只根据**与人生经历有关的实质内容**判断归类。
保留：事件、人物关系、地点时间、情感与信念。过滤：纯寒暄、与 AI 的交互、无关闲聊。"""


def get_chapter_classification_json_prompt(
    segments_text: str, language: str = "zh"
) -> str:
    """章节分类：JSON 输出（与 invoke_json_object 配合）。"""
    if language == "en":
        return f"""{_short_classification_edit_prefix("en")}

## Chapter keys
childhood, education, career_early, career_achievement, career_challenge, family, beliefs, summary; if not enough to form a story → **none**.

If, after stripping greetings, only profile-style point facts remain with no narrative spine (no event / scene / process / interaction / emotion arc) → **none**; a short but vivid micro-story belongs in the closest category.

Dialogue content:
{segments_text}

Output shape (only this object):
{{"category": "childhood|education|career_early|career_achievement|career_challenge|family|beliefs|summary|none"}}

If you return **none**, the server will map this batch to the **summary** chapter and still write it into the memoir body (it is not dropped)."""
    return f"""{_short_classification_edit_prefix()}

## 章节 key（英文）
childhood, education, career_early, career_achievement, career_challenge, family, beliefs, summary；不足以成篇则 **none**。

当去掉寒暄后仅为档案式点状信息、无可讲述叙事骨架（无事件/场景/过程/互动/情绪展开）→ **none**；短但有画面的微型故事应归入最贴类别。

对话内容：
{segments_text}

输出形状（仅此对象）：
{{"category": "childhood|education|career_early|career_achievement|career_challenge|family|beliefs|summary|none"}}

若你返回 **none**，服务端会将本段映射到 **summary** 章节并仍写入回忆录正文（不落库丢弃）。"""


def get_state_extraction_prompt(
    user_message: str,
    current_stage: str,
    stage_slots: dict,
    language: str = "zh",
) -> str:
    """抽取结构化信息并判断阶段"""
    slot_keys = list(stage_slots.keys())
    all_stage_slots = {k: list(v) for k, v in STAGE_SLOT_KEYS.items()}

    if language == "en":
        return f"""You are a memoir interview information extractor. From the user's utterance, extract structured information and decide which life stage they are actually talking about.
Only extract snippets that are clearly supported by the oral memory; do not fabricate or guess.

You should first distill the **substantive life-experience content** from the user's words, then extract structured slots (only when there is clear evidence in the oral memory).

System currently tracking stage: {current_stage}
Allowed slots for this stage: {slot_keys}

All stages and their slots:
{json.dumps(all_stage_slots, ensure_ascii=False, indent=2)}

User utterance:
{user_message}

Return JSON only, in this shape:
{{
  "detected_stage": "childhood|education|career|family|belief",
  "slots": {{
    "slot_key": "snippet"
  }},
  "emotion": "neutral|warm|low|highlight",
  "is_new_chapter": true
}}

Requirements:
1. **First strip filler, AI-interaction commands, greetings, and small talk** — focus only on real life-experience content.
2. **Only when slots is non-empty**, detected_stage must reflect what the user actually talked about; the user may discuss a different stage than the system is tracking.
3. The keys in `slots` must belong to the slot list of `detected_stage`.
4. Only fill slots with substantive, life-experience content the user actually mentioned.
5. **Snippets are distilled cores** — strip filler, keep within ~50 characters where possible.
6. If the utterance has no real life-experience content (pure small talk, meta-instructions like "organize my memories", commands, fillers), `slots` must be the empty object and `detected_stage` must equal the system's current stage."""

    return f"""你是回忆录访谈信息抽取助手。从用户话语中提取结构化信息，判断用户实际在谈论哪个人生阶段。
只提取口述中确有依据的片段，不得编造或推测。

你需要从用户话语中**先提炼与人生经历相关的核心内容**，然后抽取结构化信息（slots 仅填口述中确有依据的片段）。

系统当前跟踪的阶段：{current_stage}
该阶段可填 slots：{slot_keys}

所有阶段及其 slots 参考：
{json.dumps(all_stage_slots, ensure_ascii=False, indent=2)}

用户话语：
{user_message}

请只返回 JSON，格式如下：
{{
  "detected_stage": "childhood|education|career|family|belief",
  "slots": {{
    "slot_key": "snippet"
  }},
  "emotion": "neutral|warm|low|highlight",
  "is_new_chapter": true
}}

要求：
1. **先忽略话语中的语气词、填充词、寒暄、与AI的交互指令等无关内容**，只关注涉及人生经历的实质信息
2. **仅当 slots 非空时**：detected_stage 必须根据用户话语的实际内容判断；用户可能在聊与系统当前阶段不同的人生阶段
3. slots 的 key 必须属于 detected_stage 对应的 slot 列表
4. slots 只填写确实提到的、与人生经历相关的实质内容
5. **snippet 应是提炼后的核心信息**，去除语气词和冗余表达，50 字以内
6. 如果用户话语中没有任何与人生经历相关的实质内容（如纯粹的寒暄、元话语「整理回忆」、指令、语气词），**slots 必须为空对象**，且 **detected_stage 必须恰好等于系统当前跟踪的阶段**（「不明确」时不得另猜阶段）
"""


def get_batch_memoir_phase1_prep_prompt(
    *,
    system_current_stage: str,
    slots_snapshot: dict,
    segment_items: list[tuple[str, str]],
    language: str = "zh",
) -> str:
    """
    Phase1 批处理：多段口述一次 JSON 输出「抽取 + 章节分类」。
    segment_items: (segment_id, user_text)，须按时间顺序。
    """
    if language == "en":
        lines_en: list[str] = []
        for sid, text in segment_items:
            lines_en.append(f"- id={sid}\n  text: {text}")
        slot_lines_en = "\n".join(
            f"- {st}: {', '.join(keys)}" for st, keys in STAGE_SLOT_KEYS.items()
        )
        return f"""You are a memoir interview assistant. Below are several user oral memory segments (in time order). For **each segment**:
1) Extract information (slots, detected_stage) — same rules as single-segment extraction.
2) Classify the chapter (chapter_category) — same rules as single-segment classification.

System currently tracking stage (chat stage key): {system_current_stage}
Slot summary already gathered (context only — do not invent details that did not appear):
{json.dumps(slots_snapshot, ensure_ascii=False, indent=2)}

`detected_stage` allowed values: childhood | education | career | family | belief
The keys in `slots` must belong to the slot list for that stage:
{slot_lines_en}

`chapter_category` allowed values: childhood | education | career_early | career_achievement | career_challenge | family | beliefs | summary | **none**
(Profile-only points or pure small talk → **none**, same as single-segment classification.)

Per-segment task (the `segments` array MUST cover every id below in the same order):
{chr(10).join(lines_en)}

Return JSON object only (no markdown), shaped:
{{
  "segments": [
    {{
      "id": "<same as input id>",
      "detected_stage": "childhood|education|career|family|belief",
      "slots": {{ "slot_key": "snippet within ~50 chars" }},
      "chapter_category": "childhood|education|career_early|career_achievement|career_challenge|family|beliefs|summary|none"
    }}
  ]
}}

Same as single-segment extraction: **only when `slots` is non-empty** does `detected_stage` follow the content; if no life-experience content exists this segment, `slots` must be empty and `detected_stage` must equal the current system stage `{system_current_stage}`."""

    lines: list[str] = []
    for sid, text in segment_items:
        lines.append(f"- id={sid}\n  文本：{text}")

    slot_lines = "\n".join(
        f"- {st}: {', '.join(keys)}" for st, keys in STAGE_SLOT_KEYS.items()
    )

    return f"""你是回忆录访谈助手。下面有多段用户口述（按时间顺序），请**逐段**完成：
1）信息抽取（slots、detected_stage）——规则与单段抽取相同；
2）章节分类（chapter_category）——规则与单段分类相同。

系统当前跟踪的人生阶段（chat stage key）：{system_current_stage}
当前各阶段已占用的 slots 摘要（仅作语境，勿编造未出现的细节）：
{json.dumps(slots_snapshot, ensure_ascii=False, indent=2)}

detected_stage 仅允许：childhood | education | career | family | belief
slots 的 key 必须属于该 detected_stage 对应集合：
{slot_lines}

chapter_category 仅允许：childhood | education | career_early | career_achievement | career_challenge | family | beliefs | summary | **none**
（不足以成篇的档案点/纯寒暄 → **none**；与单段分类一致。）

逐段任务（按下列列表顺序，**segments 数组须覆盖每一行 id，且顺序一致**）：
{chr(10).join(lines)}

输出 JSON 对象（无 markdown），格式：
{{
  "segments": [
    {{
      "id": "<与输入相同的 segment id>",
      "detected_stage": "childhood|education|career|family|belief",
      "slots": {{ "slot_key": "snippet 50 字以内" }},
      "chapter_category": "childhood|education|career_early|career_achievement|career_challenge|family|beliefs|summary|none"
    }}
  ]
}}

与单段抽取一致：**仅当 slots 非空时** detected_stage 才按内容推断；若本段无人生经历实质、slots 为空，则 detected_stage 必须等于系统当前跟踪阶段 {system_current_stage}。
"""


def _build_age_hint(stage: str, birth_year: Optional[int] = None) -> str:
    """根据人生阶段和出生年份推算大致年龄区间（`STAGE_ERA_HINTS`，仅作提示）。"""
    if not birth_year:
        return ""
    age_range = STAGE_ERA_HINTS.get(stage)
    if not age_range:
        return ""
    year_start = birth_year + age_range[0]
    year_end = birth_year + age_range[1]
    return f"大约 {year_start}-{year_end} 年（{age_range[0]}-{age_range[1]} 岁）"


def get_creative_title_prompt(
    stage: str,
    emotion: str,
    slots: dict,
    user_profile: str = "",
    birth_year: Optional[int] = None,
    language: str = "zh",
) -> str:
    """生成故事标题：概括口述事实或主题，禁止纯意象编造。"""
    age_hint = _build_age_hint(stage, birth_year)
    if language == "en":
        profile_section_en = (
            f"\nAbout the user:\n{user_profile}" if user_profile else ""
        )
        time_section_en = f"\nTime reference: {age_hint}" if age_hint else ""
        return f"""{get_memoir_fidelity_facts_only_prompt(language="en")}

Generate **one** memoir story title based on the stage, emotion, and available information below.

Stage: {stage}
Emotion: {emotion}
Available information (oral slots and profile): {slots}{profile_section_en}{time_section_en}

Requirements:
1. Format: "Time tag · Title body" (the time tag may use age, era, or stage; it must be consistent with the information above; do not invent years).
2. The title body should be **6–12 words**, concisely summarizing a theme or fact present in the oral memory or slots; literary phrasing is welcome but **invention is forbidden**.
3. Any **specific facts in the title** (job titles, unit names, battles, names, life-or-death outcomes) must have **literal evidence** in the oral excerpt or other slots; do not extrapolate from the stage name or age hint.
4. Be concise; memoir-flavored; neither flat nor florid.

### Examples (facts come from slots/oral memory; the format is illustrative)
- Slots include childhood, river, heavy rain → `Around age 6 · Grandfather carrying me across the river in the rain`
- Slots include dorm, instant noodles, cafeteria → `Student years · Instant noodles when the cafeteria did not suit me`

Output only the title line — no quotes, no brackets.
"""
    profile_section = f"\n用户基本信息：\n{user_profile}" if user_profile else ""
    time_section = f"\n时间参考：{age_hint}" if age_hint else ""

    return f"""{get_memoir_fidelity_facts_only_prompt()}

请根据下面「阶段、情绪、可用信息」生成 **1 个**回忆录故事标题。

阶段：{stage}
情绪：{emotion}
可用信息（含口述 slots 与档案）：{slots}{profile_section}{time_section}

要求：
1. 格式：「时间标注 · 标题正文」（时间标注可用年龄、年代或阶段，须与上列信息一致；勿编造未出现的年份）。
2. 标题正文 **12–18 字**，须概括用户口述或 slots 中已出现的主题/事实；可以用书面化的概括与凝练表达，但**禁止虚构**口述中不存在的人、事、地、物。
3. **标题中的具体事实**（职务升迁链、部队番号驻地、战役名、生死去向等）必须能在正文摘录或其它已给出的 slots 中找到**逐字**依据；不得仅凭阶段名或年龄提示臆补未出现的履历词。
4. 语言凝练、有回忆录感，不需要平白直叙也不需要堆砌辞藻。

### 标题示例（事实均来自 slots/口述，非意象编造；格式供参照）
- 可用信息含童年、过河、大雨 → `6岁前后 · 雨天里爷爷背我过河`
- 可用信息含宿舍、方便面、食堂 → `求学阶段 · 食堂不合口时的方便面充饥`

只输出标题这一行文字，不要加引号或书名号。
"""


def get_creative_title_json_prompt(
    stage: str,
    emotion: str,
    slots: dict,
    user_profile: str = "",
    birth_year: Optional[int] = None,
    language: str = "zh",
) -> str:
    """生成故事标题（JSON：`{"title":"..."}`），与 invoke_json_object 配合。"""
    base = get_creative_title_prompt(
        stage=stage,
        emotion=emotion,
        slots=slots,
        user_profile=user_profile,
        birth_year=birth_year,
        language=language,
    )
    if language == "en":
        return (
            base.rstrip()
            + "\n\nExample output (only this JSON object):"
            + '\n{"title":"Full title on one line (with time tag · body format)"}\n'
        )
    return (
        base.rstrip()
        + "\n\n输出示例（仅此 JSON 对象）："
        + '\n{"title":"完整标题一行（含时间标注 · 正文格式）"}\n'
    )


def get_narrative_json_prompt(
    stage: str,
    slots: dict,
    new_content: str,
    existing_content: str = "",
    user_profile: str = "",
    birth_year: Optional[int] = None,
    background_voice: str = "default",
    occupation: str = "",
    language: str = "zh",
) -> str:
    """将新对话改写为叙述，输出 JSON 格式（paragraphs: [{content, image_description}]）"""
    context_tail = ""
    if existing_content:
        context_tail = (
            existing_content[-300:] if len(existing_content) > 300 else existing_content
        )
    age_hint = _build_age_hint(stage, birth_year)
    if language == "en":
        context_section_en = (
            f"\n\n[Bridging context — tail of the existing story, for continuity only; do not repeat]:\n{context_tail}"
            if context_tail
            else ""
        )
        profile_section_en = (
            f"\n\nAbout the user:\n{user_profile}" if user_profile else ""
        )
        time_section_en = f"\nTime reference: {age_hint}" if age_hint else ""
        return f"""{get_narrative_editor_system_prompt(background_voice=background_voice, occupation=occupation, language="en")}

Rewrite the "User's oral memory this turn" block into first-person written prose and return **pure JSON** (no markdown fences).

Stage: {stage}
Available information (slots): {slots}{profile_section_en}{time_section_en}

Input material:
{new_content}
{context_section_en}

## Requirements
1. **Format**: JSON only; first person; no `#`, `##`, no tables; `content` is body text only.
2. **Facts and material**: obey the fact boundary; do not fill in details that were not given. Expand only the "User's oral memory this turn"; if a reference-snippet block is included, do not write its specifics as the user's first-hand experience this turn; strip filler and small talk; do not repeat the full body of an existing story; stay within the same theme/event chain; paragraph count and length follow the material; do not pad for length.
3. **Do not infer outcomes**: when the user did not state a result (admitted, accepted, etc.), do not fill in a definite conclusion based on common sense.

## Output schema (strict JSON)
{{
  "paragraphs": [
    {{"content": "paragraph body"}},
    ...
  ]
}}

- content: body text only.

If nothing is worth recording: {{"paragraphs": []}}
"""

    context_section = (
        f"\n\n【衔接上下文（已有内容的末尾，仅供参考衔接，不要重复）】：\n{context_tail}"
        if context_tail
        else ""
    )
    profile_section = f"\n\n用户基本信息：\n{user_profile}" if user_profile else ""
    time_section = f"\n时间参考：{age_hint}" if age_hint else ""

    return f"""{get_narrative_editor_system_prompt(background_voice=background_voice, occupation=occupation)}

请将「本段用户口述」改写为第一人称书面叙述，并输出 **纯 JSON**（无 markdown 围栏）。

阶段：{stage}
可用信息（slots）：{slots}{profile_section}{time_section}

输入材料：
{new_content}
{context_section}

## 要求
1. **格式与输出**：只输出 JSON；第一人称；不使用 `#`、`##`、表格；`content` 仅含正文。
2. **事实与取材**：遵守事实边界，不补写未给出的细节。只展开「本段用户口述」；若有参考摘录区，不得把摘录中的具体事实写成本轮亲历；过滤语气词与寒暄；不重复已有故事全文；本批同一主题/事件链；段落数量与长度随材料，禁止为凑字数编造。
3. **不推断结局**：用户未明确说结果（是否录取、是否被选中等）时，不要凭常识补全为确定结论。

## 输出格式（严格 JSON）
{{
  "paragraphs": [
    {{"content": "段落正文"}},
    ...
  ]
}}

- content：仅含正文。

若无值得记录的内容：{{"paragraphs": []}}
"""


# 整篇合并时避免超长上下文：保留首尾，中间省略（字符级）
NARRATIVE_MERGE_EXISTING_MAX_CHARS = 14000
NARRATIVE_MERGE_HEAD_CHARS = 7000
NARRATIVE_MERGE_TAIL_CHARS = 7000


def clip_existing_story_body_for_merge(existing_markdown: str) -> str:
    """供 append 合并提示使用：极长正文截断为 头+尾，避免 token 爆炸。"""
    s = (existing_markdown or "").strip()
    if not s:
        return ""
    if len(s) <= NARRATIVE_MERGE_EXISTING_MAX_CHARS:
        return s
    head = s[:NARRATIVE_MERGE_HEAD_CHARS]
    tail = s[-NARRATIVE_MERGE_TAIL_CHARS:]
    return (
        f"{head}\n\n【…中间省略…】\n\n"
        f"{tail}\n\n（上文为已有故事正文节选，合并时须保留其中全部事实，不得因省略而删事实。）"
    )


def get_narrative_merge_json_prompt(
    stage: str,
    slots: dict,
    new_content: str,
    existing_content: str,
    user_profile: str = "",
    birth_year: Optional[int] = None,
    background_voice: str = "default",
    occupation: str = "",
    language: str = "zh",
) -> str:
    """
    已有故事追加：将「已有全文（或节选）」与「本段口述」合并为**一篇**第一人称叙述，
    按事件发生顺序组织段落，输出覆盖全篇的 JSON paragraphs。
    """
    clipped = clip_existing_story_body_for_merge(existing_content)
    age_hint = _build_age_hint(stage, birth_year)

    if language == "en":
        existing_section_en = (
            f"\n\n[Existing story body — keep all of its facts; reorder and bridge only; do not fabricate]:\n{clipped}"
            if clipped
            else ""
        )
        profile_section_en = (
            f"\n\nAbout the user:\n{user_profile}" if user_profile else ""
        )
        time_section_en = f"\nTime reference: {age_hint}" if age_hint else ""
        return f"""{get_narrative_editor_system_prompt(background_voice=background_voice, occupation=occupation, language="en")}

You are **expanding and reorganizing** an existing memoir story: you must keep every fact from the existing story in the output (you may merge redundant phrasing and adjust order), and weave in the new facts from "User's oral memory this turn"; order paragraphs by **chronological order of events** (earliest → latest); do not drop existing content unless the new memory contradicts it.

Stage: {stage}
Available information (slots): {slots}{profile_section_en}{time_section_en}

[User's oral memory this turn and reference — when an evidence-snippet block is present, follow the fact boundary]:
{new_content}
{existing_section_en}

## Requirements
1. **Full body output**: `paragraphs` must be the **complete reorganized story body** (not just this turn's segment).
2. **Fact boundary**: obey the fact boundary; do not fill in missing details. Do not add people, places, dates, dialogue, or numbers that appear in neither the existing body nor this turn; write first-person, graceful prose; no `#`, `##`, no tables.
3. If this turn fully overlaps the old body or adds no new information, return a faithful reorganized version of the old body (do not arbitrarily shorten it).
4. **Do not infer outcomes**: when this turn does not state an outcome, do not assert a definite outcome unless the old body already states the same fact.

## Output schema (strict JSON)
{{
  "paragraphs": [
    {{"content": "paragraph body"}},
    ...
  ]
}}

If nothing can be retained: {{"paragraphs": []}}
"""

    existing_section = (
        f"\n\n【已有故事正文（须全部保留事实，仅调整顺序与衔接；不得编造）】：\n{clipped}"
        if clipped
        else ""
    )
    profile_section = f"\n\n用户基本信息：\n{user_profile}" if user_profile else ""
    time_section = f"\n时间参考：{age_hint}" if age_hint else ""

    return f"""{get_narrative_editor_system_prompt(background_voice=background_voice, occupation=occupation)}

你正在**扩写并重组**一则已有回忆录故事：必须把「已有故事」中的事实全部保留在输出中（可合并重复表述、调整语序），并融入「本段用户口述」中的新事实；按**事件发生的时间顺序**排列段落（早→晚）；禁止丢弃未矛盾的旧内容。

阶段：{stage}
可用信息（slots）：{slots}{profile_section}{time_section}

【本段用户口述与参考（含证据摘录时遵守系统事实边界）】：
{new_content}
{existing_section}

## 要求
1. **全文输出**：`paragraphs` 须为重组后的**完整故事正文**（非仅本段）。
2. **事实边界**：遵守事实边界，不补写未给出的细节。不得新增「已有」或「本段」未出现的人名、地点、时间、对话、数字；第一人称、优雅书面语须符合上文传记作家文体说明；不用 `#`、`##`、表格。
3. 若本段与旧文完全重复或无新信息，可输出与旧文等价重组的正文（不得无故缩短到明显少于旧文）。
4. **不推断结局**：本段未明确结果时，不要补全落选/未通过等确定说法，除非旧文中已有同一事实。

## 输出格式（严格 JSON）
{{
  "paragraphs": [
    {{"content": "段落正文"}},
    ...
  ]
}}

若无任何可保留内容：{{"paragraphs": []}}
"""


def story_route_merge_hint_for_category(chapter_category: str) -> str:
    """按章节类目的 append/new 倾向（与 StoryRouteAgent 路由提示共用）。"""
    cc = (chapter_category or "").strip()
    if cc in ("beliefs", "summary"):
        return (
            "### 本章类别路由倾向（强主题容器）\n"
            "- 多条短感悟、同一价值维度、同一总结脉络的补充 → **优先 append_story**，"
            "选最匹配的一条候选 id。\n"
            "- 仅在用户明确讲述**与所有候选主题明显不相关**、且可独立成篇的长经历时，才用 new_story。"
        )
    if cc == "family":
        return (
            "### 本章类别路由倾向（家庭）\n"
            "- **默认 append_story**：同一家庭成员、同一居住环境、婚姻育儿、节日团聚、童年与父母的回忆等，"
            "只要仍围绕已出现的人物或关系网络补充细节，一律并入最匹配的候选，不要因为换了个场景就 new_story。\n"
            "- 仅当口述出现**完全新的人物组合 + 可独立成篇的新事件链**（与所有候选正文都接不上）时，才 new_story。"
        )
    if cc in (
        "childhood",
        "education",
        "career_early",
        "career_achievement",
        "career_challenge",
    ):
        if cc in ("childhood", "education"):
            return (
                "### 本章类别路由倾向（童年 / 求学 — 少拆分）\n"
                "- **默认 append_story**：同一成长阶段里，地点（老家、学校）、父母职业、玩伴、游戏影视、"
                "怀旧细节等**主题延续**的补充，即使分段讲述，也应并入已有童年/求学故事，避免多篇开头重复交代背景。\n"
                "- **仅当**口述出现**另一条清晰可辨的事件链**（时间/地点/人物线换了且与候选明显不是同一脉络）时，才 new_story。"
            )
        return (
            "### 本章类别路由倾向（经历叙事）\n"
            "- 以具体事件链为主：**不同事件 / 时期 / 地点** → 可 new_story。\n"
            "- 明显是**同一段经历的续叙、补充细节** → append_story。"
        )
    return (
        "### 本章类别路由倾向（一般）\n"
        "- 同时参考「主题连续性」与「事件切换」两类信号做判断。"
    )


def get_story_route_prompt(
    *,
    chapter_category: str,
    chapter_title: str,
    batch_transcript: str,
    candidate_stories_json: str,
) -> str:
    """Celery 批次：判断写入新 story 还是追加已有 story。输出严格 JSON。

    「故事」= 可独立讲述的一段人生经历；进入本步的批次已归入具体 chapter category
    （含模型返回 none 或零散档案启发式时映射的 summary）。
    """
    merge_hint = story_route_merge_hint_for_category(chapter_category)
    return f"""你是回忆录编辑助手。根据本批用户口述与【候选故事】决定 append_story 或 new_story。

## 两层决策标准（必须先在心里过一遍）
1. **主题连续性信号**：价值观、关系模式、长期总结、同一反思维度；口述是否像在**同一主题容器**里加厚？
2. **事件切换信号**：是否出现**新人物组合、新地点、新时间段、新事件因果链**，与候选正文明显是**另一段经历**？

- 类别 **beliefs / summary**：更重主题连续性；除非事件切换信号极强，否则倾向 append。
- 类别 **career_* / childhood / education**：更重事件链；不同事件可 new，同一经历续聊则 append。
- 类别 **family**：两类信号兼顾——原则/关系反思倾向 append；明确新事件链可 new。

{merge_hint}

**路由边界（必须遵守）**：仅根据下方「本批口述合并文本」判断；不得将系统检索摘要、记忆摘录等当作本批口述内容来匹配候选。

**候选故事说明**：列表项可能含 `summary`、`body_for_route`（正文摘要）或 `opening_snippet`（无 summary 时的纯文本开头提要）；仅含 `preview` 者为索引项，信息不全。**append 时优先匹配带 summary / body / opening_snippet 的条目**；索引项仅作候选 id 备忘。

当前章节（写作容器）：
- category: {chapter_category}
- title: {chapter_title}

【本批口述合并文本】
{batch_transcript}

【候选故事】（append 时 target_story_id 必须来自下列 id，且原样复制）
{candidate_stories_json}

## 输出 JSON（仅此一个对象，不要 markdown）
{{
  "decision": "new_story" | "append_story",
  "target_story_id": "<uuid 或 null；append 时必填且必须来自候选>",
  "reason": "<一句中文理由>"
}}

规则：
- **不要**只因「不太确定」就选 new_story；在主题可并入某一候选时应 append_story。
- 仅当口述与**所有**候选在两层标准下都明显不兼容时，才选 new_story。
- 若已有候选故事（列表非空）且口述是对同一人生阶段的**补述**，却找不到精确 id：仍应 **append_story** 到最相近的一条，而不是 new_story。
"""


def get_story_batch_plan_prompt(
    *,
    chapter_category: str,
    chapter_title: str,
    segments_json: str,
    candidate_stories_json: str,
) -> str:
    """同一章节类别下多 segment：划分为若干写入单元（每单元 new 或 append）。输出严格 JSON。"""
    merge_hint = story_route_merge_hint_for_category(chapter_category)
    return f"""你是回忆录编辑助手。下面同一章节类别下有一批**按时间顺序**的用户口述片段（每段有 id 与文本）。

## 两层决策标准（每一块都要应用）
1. **主题连续性信号**：价值观、关系模式、长期总结、同一反思维度。
2. **事件切换信号**：新人物组合、新地点、新时间段、新事件因果链。

各类别倾向与单段路由一致：beliefs/summary 重主题连续性；career/childhood/education 重事件链；family 兼顾。

{merge_hint}

## 「故事」定义（必须遵守）
一段「故事」= **可独立讲述的一段人生经历**。**同一主题容器内的连续口述**应并入同一块 append，而不是切碎成多个 new_story。

## 任务
将本批 segment **划分为连续若干块**（每块至少一个 segment，顺序不能打乱；每个 segment 必须恰好属于一块）。对每一块决定：
- **append_story**：与某一候选在两层标准下可合并，且能对应到具体 candidate id
- **new_story**：该块与**所有**候选都明显不兼容，或确认为独立新经历

**候选故事说明**：条目可能含 `summary` / `body_for_route` / `opening_snippet`；仅 `preview` 者为索引项。**优先用带摘要、正文摘要或开头提要的条目做 append 目标**。

当前章节（写作容器）：
- category: {chapter_category}
- title: {chapter_title}

【本批口述片段】（JSON 数组，顺序即口述顺序）
{segments_json}

【候选故事】（append 时 target_story_id 必须来自下列 id，且原样复制）
{candidate_stories_json}

## 输出 JSON（仅此一个对象，不要 markdown）
{{
  "units": [
    {{
      "segment_ids": ["<按顺序列出本块包含的 segment id>"],
      "decision": "new_story" | "append_story",
      "target_story_id": "<uuid 或 null；append 时必填且必须来自候选>",
      "reason": "<一句中文理由，可选>"
    }}
  ]
}}

规则：
- `units` 中所有 `segment_ids` 拼接后，必须**不重不漏**地覆盖本批全部 id，且顺序与【本批口述片段】数组一致
- **不要**仅因不确定就对整块选 new_story；能并入候选时应 append_story
- **同一批里 new_story 单元至多 1 个**：除非口述中同时存在**至少两条**与所有候选都不兼容、且彼此也明显无关的独立长经历，否则禁止拆成多个 new_story；连续多段若都在补充同一主题，应合并为**一块 append_story**。
- 候选列表非空时，优先把本批当作「加厚已有篇章」，而不是再开新篇。
"""


def format_narrative_user_content(
    oral_text: str, evidence_text: str = "", language: str = "zh"
) -> str:
    """
    将口述与检索摘录分区，供叙事模型区分「亲历」与参考材料。
    evidence 为空时仅输出口述块。
    """
    oral = (oral_text or "").strip()
    ev = (evidence_text or "").strip()
    if language == "en":
        if not ev:
            return f"[User's oral memory this turn]\n{oral}"
        return (
            "[User's oral memory this turn]\n"
            f"{oral}\n\n"
            "[Reference memory snippets (not this turn's oral memory; do NOT write their specifics as the user's first-hand experience this turn — bridging only)]\n"
            f"{ev}"
        )
    if not ev:
        return f"【本段用户口述】\n{oral}"
    return (
        "【本段用户口述】\n"
        f"{oral}\n\n"
        "【仅供参考的相关记忆摘录（非本段口述；不得把其中具体事实写成本轮亲历经历，仅可作主题衔接）】\n"
        f"{ev}"
    )