life-echo/api/app/agents/memoir/prompts.py

"""
回忆录整理 Agent 提示词模板
"""

import json
import re
from typing import Optional

CHAPTER_CATEGORIES = {
    "childhood": "童年与成长背景",
    "education": "教育经历与青年时期",
    "career_early": "崭露头角",
    "career_achievement": "主要成就与巅峰时刻",
    "career_challenge": "挫折、挑战与重大转折",
    "family": "家庭与情感",
    "beliefs": "信念与价值观",
    "summary": "人生总结",
}

CHAPTER_ORDER = [
    "childhood",
    "education",
    "career_early",
    "career_achievement",
    "career_challenge",
    "family",
    "beliefs",
    "summary",
]

STAGE_TO_ORDER = {
    "childhood": 0,
    "education": 1,
    "career": 2,
    "career_early": 2,
    "career_achievement": 3,
    "career_challenge": 4,
    "family": 5,
    "belief": 6,
    "beliefs": 6,
    "summary": 7,
}

IMAGE_PLACEHOLDER_TEMPLATE = (
    "温暖怀旧风格，年代感复古色调，柔和光影，朴素温馨氛围，安静治愈，低饱和度，"
    "质感柔和细腻，简约构图，充满岁月沉淀感与故事感，高清唯美插画封面，不要包含文字，"
    "要适合老年人审美，画面要真实可信、让老年人产生共鸣与代入感，"
    "场景环境、建筑风格、服饰器物必须严格符合所述时代背景和地域特色，"
    "有朦胧怀旧的年代感。"
)

_IMAGE_PLACEHOLDER_ANY_BRACES_RE = re.compile(
    r"(\{\{)+IMAGE:\s*([^}]+)(\}\})+",
    re.DOTALL,
)


def inject_image_placeholder_template(content: str) -> str:
    """
    对正文中的 IMAGE 占位符拼上固定风格模板（四层花括号）。
    **线上写路径已不使用**；保留供离线迁移脚本处理历史数据。
    """
    if not content or not content.strip():
        return content

    def replace_one(match: re.Match) -> str:
        inner = (match.group(2) or "").strip()
        if not inner:
            return match.group(0)
        if inner.startswith(IMAGE_PLACEHOLDER_TEMPLATE):
            desc = inner[len(IMAGE_PLACEHOLDER_TEMPLATE) :].lstrip("。").strip()
            return (
                "{{{{IMAGE:"
                + IMAGE_PLACEHOLDER_TEMPLATE
                + ("。" + desc if desc else "")
                + "}}}}"
            )
        return "{{{{IMAGE:" + IMAGE_PLACEHOLDER_TEMPLATE + "。" + inner + "}}}}"

    content = _IMAGE_PLACEHOLDER_ANY_BRACES_RE.sub(replace_one, content)
    return content


def get_memoir_editor_system_prompt() -> str:
    """传记整理 Agent 的系统提示词（口语转书面、章节归类；与访谈对话用的 system prompt 不同）。"""
    return """你是一位专业的传记作家和文字编辑，擅长将口语化的对话内容整理成优雅的书面语回忆录章节。

你的任务：
1. 接收对话段落文本（口语化，可能来自语音转写）
2. **先提炼对话中与人生经历相关的核心内容**，过滤掉无关信息
3. 识别内容主题，归类到对应章节（童年/教育/事业/家庭/信念/总结）
4. 将口语化表达改写为书面语，保持原意和情感
5. 生成合适的章节标题和段落结构
6. 提取关键信息，形成连贯的叙述

## 内容筛选原则（最重要）
对话中往往夹杂大量与回忆录无关的噪音，你必须严格筛选，只保留有价值的内容：

应该保留的内容：
- 具体的人生事件、经历、故事
- 提到的人物及其关系（家人、朋友、同事、恩师等）
- 地点、时间、场景描写
- 用户的情感表达、内心感受
- 人生感悟、价值观、信念
- 具体的细节（食物、声音、画面等）

应该过滤掉的内容：
- 语气词、填充词（嗯、啊、那个、就是说、对对对、然后呢等）
- 对话中的寒暄、问候（你好、谢谢、好的等）
- 用户与AI助手之间的交互指令（你帮我、我想问、你说得对等）
- 重复、冗余的表述（取核心含义即可）
- 与个人经历完全无关的闲聊内容

## 改写原则
- 保持用户的真实情感
- 使用优雅但不失亲切的书面语，不要直接引用对话原话
- 适当添加过渡句，使段落连贯
- 保留生动的细节，但将口语表达改写为书面叙述
- 去除口语中的填充词和无意义重复
- 保持时间顺序和逻辑清晰

## 章节分类规则
- 童年相关 → "童年与成长背景"
- 学校、老师、同学 → "教育经历与青年时期"
- 工作、职业、成就 → "主要成就与巅峰时刻" 或 "崭露头角"
- 困难、挫折 → "挫折、挑战与重大转折"
- 伴侣、孩子、家庭生活 → "家庭与情感"
- 价值观、信念、座右铭 → "信念与价值观"
- 总结、感悟、展望 → "人生总结"
"""


def _memoir_fidelity_core_rules() -> str:
    """事实边界 1–4 条（与文体第 5 条拆分，供 story 叙事与标题等复用）。"""
    return """## 事实边界（必须遵守，优先于文采）
1. **正文只能展开「本段用户口述」区块中的内容**。若输入中有「相关记忆摘录」等参考区，其中信息**不得**写成本人本轮亲口经历的细节；最多用一两句作主题衔接，且不得引入摘录里才有的具体人名、地点、时间、对话、数字。
2. **禁止编造**：不得新增用户未提及的具体人物姓名、对话原文、地点、时间、事件经过、因果、数字；不得推断性心理描写或「典型年代场景」填充。
3. **禁止为凑字数扩写**：材料短则输出短；段落数量与长度随材料而定。
4. 允许：去除口语赘词与寒暄、调整语序、合并重复指代、把口语改为书面语；**不得**用虚构细节「让文章更好看」。"""


def _memoir_fidelity_user_profile_rules() -> str:
    return """## 用户档案与阶段信息
- 「用户基本信息」「时间参考」仅可使用其中**已写明**的条目；不得把档案中的出生地等写进正文，除非用户在本段口述里已提及或明确关联。"""


def get_memoir_fidelity_system_prompt() -> str:
    """叙事/标题生成专用：准确性优先，禁止编造事实（与 get_memoir_editor_system_prompt 分离）。"""
    return f"""你是回忆录编辑助手，任务是把用户口述整理为第一人称书面叙述。

{_memoir_fidelity_core_rules()}
5. **叙述风格平实**：少用抒情、比喻与文学铺陈；像清楚记事，不要写成散文。

{_memoir_fidelity_user_profile_rules()}"""


def get_memoir_fidelity_facts_only_prompt() -> str:
    """与 `get_memoir_fidelity_system_prompt` 相同的事实 1–4 条，第 5 条改为允许传记作家式文采（仍禁止编造）。"""
    return f"""你是回忆录编辑助手，任务是把用户口述整理为第一人称书面叙述。

{_memoir_fidelity_core_rules()}
5. **文体**：在遵守第 1–4 条的前提下，可将口语改写为**优雅、连贯的回忆录书面语**（适当过渡句，保留并书面化用户已提及的细节与情感）；文采服务于真实内容，**不得**用虚构描写替代或填补事实。

{_memoir_fidelity_user_profile_rules()}"""


def _memoir_editor_narrative_style_block() -> str:
    """与 `get_memoir_editor_system_prompt` 对齐的传记作家改写要点（用于写入 chapter 的 story 正文）。"""
    return """## 传记作家文体（须同时遵守上文「事实边界」）
你是一位专业的传记作家和文字编辑，擅长将口语化的对话内容整理成优雅的书面语回忆录章节。

### 提炼与筛选
对话中往往夹杂噪音，须严格筛选：保留具体事件、人物关系、时地、情感与信念、用户已提及的细节；过滤语气词、寒暄、与 AI 的交互、无关闲聊、重复冗余。

### 改写原则
- 保持用户的真实情感
- 使用优雅但不失亲切的书面语，不要直接引用对话原话
- 适当添加过渡句，使段落连贯
- 保留生动的细节，但将口语表达改写为书面叙述
- 去除口语中的填充词和无意义重复
- 保持时间顺序和逻辑清晰

### 输出格式约束
- 使用第一人称
- 不使用 Markdown 标题（#、##）、不使用表格
- 如有「衔接上下文」，仅保持语气与时间线连贯，不重复已有段落全文"""


def get_narrative_editor_system_prompt() -> str:
    """故事/章节叙事：传记作家式书面语 + 事实边界（chapter 直接展示 story 时使用）。"""
    return f"""{get_memoir_fidelity_facts_only_prompt()}

{_memoir_editor_narrative_style_block()}"""


def _short_classification_edit_prefix() -> str:
    """章节分类专用短系统前缀（不重复整段 get_memoir_editor_system_prompt）。"""
    return """你是回忆录编辑。先忽略语气词与寒暄，只根据**与人生经历有关的实质内容**判断归类。
保留：事件、人物关系、地点时间、情感与信念。过滤：纯寒暄、与 AI 的交互、无关闲聊。"""


def get_chapter_classification_prompt(segments_text: str) -> str:
    """获取章节分类的提示词（短系统段 + 规则；供纯文本输出路径或兼容）。"""
    return f"""{_short_classification_edit_prefix()}

请分析以下对话内容，判断应归类到哪个章节类别，或是否不足以写入回忆录正文。

## 章节类别
- childhood: 童年与成长背景
- education: 教育经历与青年时期
- career_early: 崭露头角（早期事业）
- career_achievement: 主要成就与巅峰时刻
- career_challenge: 挫折、挑战与重大转折
- family: 家庭与情感
- beliefs: 信念与价值观
- summary: 人生总结

## 何时必须返回 none（与「零散档案点」区分）
若去掉寒暄后，内容仅为**档案式点状信息**，**没有可讲述的叙事骨架**（无事件、场景、过程、互动或情绪展开），则必须返回 **none**，例如：
- 仅出生年份、籍贯一笔、职业名词、姓名等单句事实；
- 仅罗列事实、无画面与过程的短答。

以下情况**不是** none：篇幅短但已构成**微型故事**（有画面、动作、对话、转折、感受），应归入最贴合的章节类别。

## 示例（仅作判断参考）
- 应返回 none：「我1999年出生的。」「籍贯上海。」「工程师。」
- 应返回 childhood（或其它合适类别）：「小学时有次下大雨，爷爷背我过河，鞋全湿了，他一直笑。」

对话内容：
{segments_text}

请只返回章节类别英文 key（如：childhood），不要返回其它说明。
若内容不足以独立成篇、仅为零散信息，返回 none。"""


def get_chapter_classification_json_prompt(segments_text: str) -> str:
    """章节分类：JSON 输出（与 invoke_json_object 配合）。"""
    return f"""{_short_classification_edit_prefix()}

## 章节 key（英文）
childhood, education, career_early, career_achievement, career_challenge, family, beliefs, summary；不足以成篇则 **none**。

规则与「何时必须返回 none」同 `get_chapter_classification_prompt`（档案点、无叙事骨架 → none）。

对话内容：
{segments_text}

**JSON 输出**：`response_format=json_object`，只输出：
{{"category": "childhood|education|career_early|career_achievement|career_challenge|family|beliefs|summary|none"}}
不要其它文字。

若你返回 **none**，服务端会将本段映射到 **summary** 章节并仍写入回忆录正文（不落库丢弃）。"""


def get_state_extraction_prompt(
    user_message: str, current_stage: str, stage_slots: dict
) -> str:
    """抽取结构化信息并判断阶段"""
    slot_keys = list(stage_slots.keys())
    all_stage_slots = {
        "childhood": ["place", "people", "daily_life", "emotion", "turning_event"],
        "education": ["school", "city", "motivation", "challenge", "change"],
        "career": ["job", "environment", "decision", "pressure", "growth"],
        "family": ["relationship", "conflict", "support", "responsibility", "change"],
        "belief": ["value", "regret", "pride", "lesson"],
    }

    return f"""{get_memoir_fidelity_system_prompt()}

你需要从用户话语中**先提炼与人生经历相关的核心内容**，然后抽取结构化信息，并判断用户实际在谈论哪个人生阶段（slots 仅填口述中确有依据的片段）。

**JSON 输出**：接口已启用 `response_format=json_object`，你必须只输出一个合法 JSON 对象，不要 markdown 代码块或其它文字。

系统当前跟踪的阶段：{current_stage}
该阶段可填 slots：{slot_keys}

所有阶段及其 slots 参考：
{json.dumps(all_stage_slots, ensure_ascii=False, indent=2)}

用户话语：
{user_message}

请只返回 JSON，格式如下：
{{
  "detected_stage": "childhood|education|career|family|belief",
  "slots": {{
    "slot_key": "snippet"
  }},
  "emotion": "neutral|warm|low|highlight",
  "is_new_chapter": true
}}

要求：
1. **先忽略话语中的语气词、填充词、寒暄、与AI的交互指令等无关内容**，只关注涉及人生经历的实质信息
2. **detected_stage 必须根据用户话语的实际内容判断**，不要默认沿用系统当前阶段。用户可能在聊不同阶段的事情
3. slots 的 key 必须属于 detected_stage 对应的 slot 列表
4. slots 只填写确实提到的、与人生经历相关的实质内容
5. **snippet 应是提炼后的核心信息**，去除语气词和冗余表达，50 字以内
6. 如果用户话语中没有任何与人生经历相关的实质内容（如纯粹的寒暄、指令、语气词），slots 为空对象
"""


def _build_age_hint(stage: str, birth_year: Optional[int] = None) -> str:
    """根据人生阶段和出生年份推算大致年龄区间"""
    if not birth_year:
        return ""
    stage_age_ranges = {
        "childhood": (0, 12),
        "education": (6, 22),
        "career": (18, 60),
        "career_early": (18, 30),
        "career_achievement": (25, 55),
        "career_challenge": (20, 55),
        "family": (20, 60),
        "belief": (30, 70),
        "beliefs": (30, 70),
        "summary": (50, 80),
    }
    age_range = stage_age_ranges.get(stage)
    if not age_range:
        return ""
    year_start = birth_year + age_range[0]
    year_end = birth_year + age_range[1]
    return f"大约 {year_start}-{year_end} 年（{age_range[0]}-{age_range[1]} 岁）"


def get_creative_title_prompt(
    stage: str,
    emotion: str,
    slots: dict,
    user_profile: str = "",
    birth_year: Optional[int] = None,
) -> str:
    """生成故事标题：概括口述事实或主题，禁止纯意象编造。"""
    age_hint = _build_age_hint(stage, birth_year)
    profile_section = f"\n用户基本信息：\n{user_profile}" if user_profile else ""
    time_section = f"\n时间参考：{age_hint}" if age_hint else ""

    return f"""{get_memoir_fidelity_system_prompt()}

请根据下面「阶段、情绪、可用信息」生成 **1 个**回忆录故事标题。

阶段：{stage}
情绪：{emotion}
可用信息（含口述 slots 与档案）：{slots}{profile_section}{time_section}

要求：
1. 格式：「时间标注 · 标题正文」（时间标注可用年龄、年代或阶段，须与上列信息一致；勿编造未出现的年份）。
2. 标题正文 **12–18 字**，必须概括 **用户口述或 slots 中已出现的主题/事实**；**禁止**文学意象与比喻（如未提巷子/蝉鸣则不得写）。
3. **平实**概括，不得引入口述中不存在的人、事、地、物。

只输出标题这一行文字，不要加引号或书名号。
"""


def get_creative_title_json_prompt(
    stage: str,
    emotion: str,
    slots: dict,
    user_profile: str = "",
    birth_year: Optional[int] = None,
) -> str:
    """生成故事标题（JSON：`{"title":"..."}`），与 invoke_json_object 配合。"""
    base = get_creative_title_prompt(
        stage=stage,
        emotion=emotion,
        slots=slots,
        user_profile=user_profile,
        birth_year=birth_year,
    )
    return (
        base.rstrip()
        + "\n\n**JSON 输出**：`response_format=json_object`，只输出："
        + '\n{"title":"完整标题一行（含时间标注 · 正文格式）"}\n'
        + "不要其它文字。"
    )


def get_narrative_prompt(
    stage: str,
    slots: dict,
    new_content: str,
    existing_content: str = "",
    user_profile: str = "",
    birth_year: Optional[int] = None,
    archived_summaries: str = "",
) -> str:
    """将新对话改写为叙述（只输出新内容的改写，不重复已有内容）"""
    context_tail = ""
    if existing_content:
        context_tail = (
            existing_content[-300:] if len(existing_content) > 300 else existing_content
        )
    context_section = (
        f"\n\n【衔接上下文（已有内容的末尾，仅供参考衔接，不要重复）】：\n{context_tail}"
        if context_tail
        else ""
    )
    archived_section = (
        f"\n\n【已删除的该类别历史章节（仅供参考，请勿直接使用或重复）】：\n{archived_summaries}"
        if archived_summaries
        else ""
    )

    profile_section = f"\n\n用户基本信息：\n{user_profile}" if user_profile else ""
    age_hint = _build_age_hint(stage, birth_year)
    time_section = f"\n时间参考：{age_hint}" if age_hint else ""

    return f"""{get_narrative_editor_system_prompt()}

阶段：{stage}
可用信息（slots，仅可复述其中已出现事实）：{slots}{profile_section}{time_section}

输入材料（请严格区分「本段口述」与参考区，规则见系统说明）：
{new_content}
{context_section}
{archived_section}

## 步骤
1. 从「本段用户口述」提炼可写事实；丢弃语气词、寒暄、与 AI 的交互。
2. 改写为第一人称书面叙述（优雅、连贯，可适当过渡；可调整语序与用词），**不得**新增事实。
3. 若材料中无值得记录的人生经历内容，输出空字符串。

## 格式
- 不要插入章节标题或 `#`、`##`；不要用 Markdown 表格。
- 不要写入与「本段用户口述」无关的交互套话。

只输出改写后的正文。无内容则输出空字符串。
"""


def get_narrative_json_prompt(
    stage: str,
    slots: dict,
    new_content: str,
    existing_content: str = "",
    user_profile: str = "",
    birth_year: Optional[int] = None,
) -> str:
    """将新对话改写为叙述，输出 JSON 格式（paragraphs: [{content, image_description}]）"""
    context_tail = ""
    if existing_content:
        context_tail = (
            existing_content[-300:] if len(existing_content) > 300 else existing_content
        )
    context_section = (
        f"\n\n【衔接上下文（已有内容的末尾，仅供参考衔接，不要重复）】：\n{context_tail}"
        if context_tail
        else ""
    )
    profile_section = f"\n\n用户基本信息：\n{user_profile}" if user_profile else ""
    age_hint = _build_age_hint(stage, birth_year)
    time_section = f"\n时间参考：{age_hint}" if age_hint else ""

    return f"""{get_narrative_editor_system_prompt()}

请将「本段用户口述」改写为第一人称书面叙述，并输出 **纯 JSON**，不要包含任何其他文字或 markdown 代码块。
**JSON 输出**：接口已启用 `response_format=json_object`（与 DeepSeek JSON 模式一致），只输出一个合法 JSON 对象。

阶段：{stage}
可用信息（slots）：{slots}{profile_section}{time_section}

输入材料：
{new_content}
{context_section}

## 要求
1. **只展开「本段用户口述」**；若有参考摘录区，不得把摘录中的具体事实写成本轮亲历经历（见系统说明）。
2. 过滤语气词、寒暄、与 AI 的交互；不重复已有故事全文；本批只写同一主题/事件链。
3. 段落数量与每段长度**随材料而定**，禁止为凑字数编造。
4. 使用第一人称、**优雅书面语**（可适当过渡与铺陈，须基于口述事实）；不要直接引用原话；不要用 `#`、`##`、表格。

## 输出格式（严格 JSON）
{{
  "paragraphs": [
    {{"content": "段落正文"}},
    ...
  ]
}}

- content：仅含正文。

若无值得记录的内容：{{"paragraphs": []}}
"""


# 整篇合并时避免超长上下文：保留首尾，中间省略（字符级）
NARRATIVE_MERGE_EXISTING_MAX_CHARS = 14000
NARRATIVE_MERGE_HEAD_CHARS = 7000
NARRATIVE_MERGE_TAIL_CHARS = 7000


def clip_existing_story_body_for_merge(existing_markdown: str) -> str:
    """供 append 合并提示使用：极长正文截断为 头+尾，避免 token 爆炸。"""
    s = (existing_markdown or "").strip()
    if not s:
        return ""
    if len(s) <= NARRATIVE_MERGE_EXISTING_MAX_CHARS:
        return s
    head = s[:NARRATIVE_MERGE_HEAD_CHARS]
    tail = s[-NARRATIVE_MERGE_TAIL_CHARS:]
    return (
        f"{head}\n\n【…中间省略…】\n\n"
        f"{tail}\n\n（上文为已有故事正文节选，合并时须保留其中全部事实，不得因省略而删事实。）"
    )


def get_narrative_merge_json_prompt(
    stage: str,
    slots: dict,
    new_content: str,
    existing_content: str,
    user_profile: str = "",
    birth_year: Optional[int] = None,
) -> str:
    """
    已有故事追加：将「已有全文（或节选）」与「本段口述」合并为**一篇**第一人称叙述，
    按事件发生顺序组织段落，输出覆盖全篇的 JSON paragraphs。
    """
    clipped = clip_existing_story_body_for_merge(existing_content)
    existing_section = (
        f"\n\n【已有故事正文（须全部保留事实，仅调整顺序与衔接；不得编造）】：\n{clipped}"
        if clipped
        else ""
    )
    profile_section = f"\n\n用户基本信息：\n{user_profile}" if user_profile else ""
    age_hint = _build_age_hint(stage, birth_year)
    time_section = f"\n时间参考：{age_hint}" if age_hint else ""

    return f"""{get_narrative_editor_system_prompt()}

你正在**扩写并重组**一则已有回忆录故事：必须把「已有故事」中的事实全部保留在输出中（可合并重复表述、调整语序），并融入「本段用户口述」中的新事实；按**事件发生的时间顺序**排列段落（早→晚）；禁止丢弃未矛盾的旧内容。

**JSON 输出**：接口已启用 `response_format=json_object`，只输出一个合法 JSON 对象，不要 markdown 代码块。

阶段：{stage}
可用信息（slots）：{slots}{profile_section}{time_section}

【本段用户口述与参考（含证据摘录时遵守系统事实边界）】：
{new_content}
{existing_section}

## 要求
1. 输出为**完整故事正文**（不是仅写本段）：`paragraphs` 须包含重组后的**全文**。
2. **禁止编造**：不得新增用户未在「已有」或「本段」中出现的人名、地点、时间、对话、数字。
3. 若本段与旧文完全重复或无新信息，可仅输出与旧文等价重组后的正文（不得无故缩短到明显少于旧文）。
4. 使用第一人称、**优雅书面语**（与系统说明中的传记作家文体一致）；不要用 `#`、`##`、表格。

## 输出格式（严格 JSON）
{{
  "paragraphs": [
    {{"content": "段落正文"}},
    ...
  ]
}}

若无任何可保留内容：{{"paragraphs": []}}
"""


def get_story_route_prompt(
    *,
    chapter_category: str,
    chapter_title: str,
    batch_transcript: str,
    candidate_stories_json: str,
) -> str:
    """Celery 批次：判断写入新 story 还是追加已有 story。输出严格 JSON。

    「故事」= 可独立讲述的一段人生经历；进入本步的批次已归入具体 chapter category
    （含模型返回 none 或零散档案启发式时映射的 summary）。
    """
    return f"""你是回忆录编辑助手。根据本批用户口述与候选故事列表，决定：
- append_story：内容明显延续、补充某一已有故事的主题与时间线，且能对应到具体 candidate id
- new_story：新话题、新人生阶段片段，或与所有候选故事都不够贴合

**JSON 输出**：接口已启用 `response_format=json_object`，只输出下面 schema 的一个合法 JSON 对象，不要 markdown。

「故事」在此指：**可独立讲述的一段人生经历**——单一主题或同一事件链；不要假设本批里包含多个互不相关的故事（多段由系统其它步骤处理）。

**new_story_title 与 reason 只能依据口述中已有信息概括，不得编造口述未出现的人、事、地、物。**

当前章节（写作容器）：
- category: {chapter_category}
- title: {chapter_title}

【本批口述合并文本】
{batch_transcript}

【候选故事】（仅允许在 append 时选择其中的 id；id 必须原样复制）
{candidate_stories_json}

## 输出 JSON（仅此一个对象，不要 markdown）
{{
  "decision": "new_story" | "append_story",
  "target_story_id": "<uuid 或 null；append 时必填且必须来自候选>",
  "new_story_title": "<短标题，6-20 字；new_story 时必填，append 时可 null>",
  "reason": "<一句中文理由>"
}}

规则：
- 若无法自信匹配某一候选，选 new_story
- new_story_title 应概括本批新内容，不要与候选标题重复
"""


def get_story_batch_plan_prompt(
    *,
    chapter_category: str,
    chapter_title: str,
    segments_json: str,
    candidate_stories_json: str,
) -> str:
    """同一章节类别下多 segment：划分为若干写入单元（每单元 new 或 append）。输出严格 JSON。"""
    return f"""你是回忆录编辑助手。下面同一章节类别下有一批**按时间顺序**的用户口述片段（每段有 id 与文本）。

**JSON 输出**：接口已启用 `response_format=json_object`，只输出下面 schema 的一个合法 JSON 对象，不要 markdown。

## 「故事」定义（必须遵守）
一段「故事」= **可独立讲述的一段人生经历**：单一主题或同一事件链，能单独成篇。若话题切换、时间线跳到另一件事、人物/主线明显变化，应作为**新的故事**（new_story），而不是塞进同一段 append。

**new_story_title 与 reason 只能依据各 segment 文本中已有信息，不得编造口述未出现的事实。**

## 任务
将本批 segment **划分为连续若干块**（每块包含至少一个 segment，顺序不能打乱；每个 segment 必须恰好属于一块）。对每一块决定：
- **append_story**：内容明显延续、补充**某一已有候选故事**的主题与时间线，且能对应到具体 candidate id
- **new_story**：新话题、与所有候选故事都不够贴合、或应独立成篇的片段

当前章节（写作容器）：
- category: {chapter_category}
- title: {chapter_title}

【本批口述片段】（JSON 数组，顺序即口述顺序）
{segments_json}

【候选故事】（仅允许在 append 时选择其中的 id；id 必须原样复制）
{candidate_stories_json}

## 输出 JSON（仅此一个对象，不要 markdown）
{{
  "units": [
    {{
      "segment_ids": ["<按顺序列出本块包含的 segment id>"],
      "decision": "new_story" | "append_story",
      "target_story_id": "<uuid 或 null；append 时必填且必须来自候选>",
      "new_story_title": "<短标题，6-20 字；new_story 时必填，append 时可 null>",
      "reason": "<一句中文理由，可选>"
    }}
  ]
}}

规则：
- `units` 中所有 `segment_ids` 拼接后，必须**不重不漏**地覆盖本批全部 id，且顺序与【本批口述片段】数组一致
- 若无法自信匹配某一候选，对该块选 new_story
- new_story_title 应概括该块内容，不要与候选标题重复
"""


def format_narrative_user_content(oral_text: str, evidence_text: str = "") -> str:
    """
    将口述与检索摘录分区，供叙事模型区分「亲历」与参考材料。
    evidence 为空时仅输出口述块。
    """
    oral = (oral_text or "").strip()
    ev = (evidence_text or "").strip()
    if not ev:
        return f"【本段用户口述】\n{oral}"
    return (
        "【本段用户口述】\n"
        f"{oral}\n\n"
        "【仅供参考的相关记忆摘录（非本段口述；不得把其中具体事实写成本轮亲历经历，仅可作主题衔接）】\n"
        f"{ev}"
    )


def format_evidence_chunks_for_prompt(evidence: dict) -> str:
    """将 retrieve_evidence / retrieve_evidence_sync 结果格式化为简短文本，供叙事 prompt 使用。

    包含 chunks、摘要（若有）、confirmed facts、timeline、故事摘要（若有）。
    """
    chunks = evidence.get("relevant_chunks") or []
    summaries = evidence.get("relevant_summaries") or []
    facts = evidence.get("relevant_facts") or []
    timeline = evidence.get("timeline_hints") or []
    stories = evidence.get("relevant_stories") or []
    parts: list[str] = []
    for c in chunks[:10]:
        content = (
            c.get("content", "") if isinstance(c, dict) else getattr(c, "content", "")
        )
        if content:
            parts.append(content.strip())
    for s in summaries[:3]:
        if isinstance(s, dict):
            st = (s.get("content") or "").strip()
            stype = (s.get("summary_type") or "").strip()
            if st:
                label = f"[摘要:{stype}]" if stype else "[摘要]"
                parts.append(f"{label} {st}")
    for f in facts[:5]:
        if isinstance(f, dict):
            subj = f.get("subject", "")
            pred = f.get("predicate", "")
            obj = f.get("object_json", "")
            if subj or pred:
                parts.append(f"{subj} {pred} {obj}")
        else:
            parts.append(f"{getattr(f, 'subject', '')} {getattr(f, 'predicate', '')}")
    for t in timeline[:5]:
        if isinstance(t, dict):
            title = (t.get("title") or "").strip()
            year = t.get("event_year")
            desc = (t.get("description") or "").strip()
            line = " ".join(
                x for x in (str(year) if year is not None else "", title, desc) if x
            )
            if line:
                parts.append(line)
    for st in stories[:3]:
        if isinstance(st, dict):
            title = (st.get("title") or "").strip()
            summ = (st.get("summary") or "").strip()
            if title or summ:
                parts.append(" ".join(x for x in (title, summ) if x))
    return "\n\n".join(parts) if parts else ""


# 向后兼容：旧代码中的 get_system_prompt 指「回忆录编辑」系统提示，勿与访谈模块的 get_system_prompt 混淆
get_system_prompt = get_memoir_editor_system_prompt