feat(api): 访谈人格/回复长度策略、口述归一、背景语气与输入净稿全链路

Chat 访谈 - 新增 persona 系统（default / warm_listener / curious_guide）与 background_voice 语气层 - 回复长度由 compute_reply_plan 统一决策（brief / standard / expanded），融合信息密度启发式 - 输入净稿（input_normalize）：编排层可选 rules/llm 归一用户口语后再喂模型与记忆检索 - 记忆证据注入：按用户话检索 memory evidence 并注入 prompt Memoir 回忆录 - 口述归一（oral_normalize）：segment 原文保留，story 管线取派生净稿作叙事输入 - segment 入队批次门闸：累计字数 + 最长等待秒数，减少零碎提交 - fidelity_check / prompts / narrative_agent 微调 - Alembic 0005：清理跨章节 story 外键 Infra - Dockerfile 加入 ffmpeg - pyproject.toml 新增依赖并同步 uv.lock - .env.example / .env.production 补全新配置项 Tests - 新增 test_background_voice、test_chat_input_normalize、test_experience_regressions - 扩展 test_interview_prompts、test_interview_reply_length、test_story_route_oral_invariant Made-with: Cursor
2026-03-31 23:55:26 +08:00
parent 42ae2a5e91
commit 69a673e6c6
44 changed files with 2998 additions and 259 deletions
--- a/api/app/agents/memoir/classification_agent.py
+++ b/api/app/agents/memoir/classification_agent.py
@@ -10,6 +10,7 @@ from __future__ import annotations

 import json
 import re
+from dataclasses import dataclass
 from typing import Any

 from app.agents.memoir.prompts import (
@@ -95,6 +96,14 @@ def _normalize_llm_category(raw: str) -> str:
    return s


+@dataclass(frozen=True)
+class ChapterClassifyResult:
+    """章节分类结果；``llm_said_none`` 仅当走 LLM 且解析为 none 时为 True（fragment 启发式不为 True）。"""
+
+    category: str
+    llm_said_none: bool = False
+
+
 def _parse_category_from_llm_response(raw: str) -> str:
    """优先解析 JSON ``{"category": "..."}``，失败则按纯文本 key 处理。"""
    s = (raw or "").strip()
@@ -119,10 +128,11 @@ class ClassificationAgent:
        llm: Any,
        *,
        segment_id: str | None = None,
-    ) -> str:
+    ) -> ChapterClassifyResult:
        """
        分类到 8 个章节类别之一。
-        LLM 返回 none 或启发式为零散档案时，返回 ``summary``（仍走回忆录流水线）。
+        LLM 返回 none 或启发式为零散档案时，``category`` 为 ``summary``（仍可走回忆录流水线；
+        ``llm_said_none`` 仅在 LLM 明确返回 none 时为 True，供空转抑制判断）。
        llm 需支持 .invoke(prompt) 同步调用。
        """
        if _looks_like_fragment_only(text):
@@ -133,7 +143,10 @@ class ClassificationAgent:
                len(text or ""),
                _SUMMARY_FALLBACK_CATEGORY,
            )
-            return _SUMMARY_FALLBACK_CATEGORY
+            return ChapterClassifyResult(
+                category=_SUMMARY_FALLBACK_CATEGORY,
+                llm_said_none=False,
+            )

        if llm:
            try:
@@ -153,14 +166,18 @@ class ClassificationAgent:
                        len(text or ""),
                        _SUMMARY_FALLBACK_CATEGORY,
                    )
-                    return _SUMMARY_FALLBACK_CATEGORY
+                    return ChapterClassifyResult(
+                        category=_SUMMARY_FALLBACK_CATEGORY,
+                        llm_said_none=True,
+                    )
                if category in CHAPTER_CATEGORIES:
-                    return category
+                    return ChapterClassifyResult(category=category, llm_said_none=False)
            except Exception as e:
                logger.warning("ClassificationAgent LLM 章节分类失败: {}", e)

        stage = _detect_stage(text, fallback_stage)
-        return _STAGE_TO_DEFAULT_CATEGORY.get(
+        cat = _STAGE_TO_DEFAULT_CATEGORY.get(
            stage,
            _STAGE_TO_DEFAULT_CATEGORY.get(fallback_stage, "childhood"),
        )
+        return ChapterClassifyResult(category=cat, llm_said_none=False)