feat(api): 访谈人格/回复长度策略、口述归一、背景语气与输入净稿全链路

Chat 访谈
- 新增 persona 系统(default / warm_listener / curious_guide)与 background_voice 语气层
- 回复长度由 compute_reply_plan 统一决策(brief / standard / expanded),融合信息密度启发式
- 输入净稿(input_normalize):编排层可选 rules/llm 归一用户口语后再喂模型与记忆检索
- 记忆证据注入:按用户话检索 memory evidence 并注入 prompt

Memoir 回忆录
- 口述归一(oral_normalize):segment 原文保留,story 管线取派生净稿作叙事输入
- segment 入队批次门闸:累计字数 + 最长等待秒数,减少零碎提交
- fidelity_check / prompts / narrative_agent 微调
- Alembic 0005:清理跨章节 story 外键

Infra
- Dockerfile 加入 ffmpeg
- pyproject.toml 新增依赖并同步 uv.lock
- .env.example / .env.production 补全新配置项

Tests
- 新增 test_background_voice、test_chat_input_normalize、test_experience_regressions
- 扩展 test_interview_prompts、test_interview_reply_length、test_story_route_oral_invariant

Made-with: Cursor
This commit is contained in:
Kevin
2026-03-31 23:55:26 +08:00
parent 42ae2a5e91
commit 69a673e6c6
44 changed files with 2998 additions and 259 deletions

View File

@@ -0,0 +1,123 @@
"""
从用户档案「职业」等文本推断访谈/叙事语气维度(干部形、军队形)。
与 chat_interview_persona温柔倾听等正交可叠加。
"""
from __future__ import annotations
from typing import Final, Literal
BackgroundVoice = Literal["default", "cadre", "military"]
# 军队系优先:含「军、部队」等则走军队形,避免与泛「干部」冲突。
_MILITARY_NEEDLES: Final[tuple[str, ...]] = (
"军人",
"军官",
"士兵",
"部队",
"入伍",
"服役",
"退伍",
"转业",
"武警",
"解放军",
"陆军",
"海军",
"空军",
"火箭军",
"军区",
"军营",
"军校",
"文职干部",
"军队文职",
"现役",
"预备役",
)
# 干部/机关系(避免过短词误判:如「机关」→机关枪、「主任」→班主任)
_CADRE_NEEDLES: Final[tuple[str, ...]] = (
"公务员",
"党政机关",
"党政",
"组织部",
"党委书记",
"党组书记",
"书记",
"处长",
"科长",
"局长",
"厅长",
"部长",
"国企",
"事业单位",
"干部",
"科级",
"处级",
"厅级",
)
def infer_background_voice(occupation: str | None) -> BackgroundVoice:
"""
据职业自由文本推断背景语气。军队关键词优先于干部关键词。
无匹配或未填 → default。
"""
if not occupation or not str(occupation).strip():
return "default"
t = str(occupation).strip().casefold()
for n in _MILITARY_NEEDLES:
if n.casefold() in t:
return "military"
for n in _CADRE_NEEDLES:
if n.casefold() in t:
return "cadre"
return "default"
def normalize_background_voice(voice: str | None) -> BackgroundVoice:
"""调用方传入已归一化枚举或原始职业文本均可。"""
if not voice:
return "default"
s = voice.strip()
if s in ("default", "cadre", "military"):
return s # type: ignore[return-value]
return infer_background_voice(s)
def get_background_voice_chat_block(voice: str | None) -> str:
"""注入访谈 guided/opening 的「背景语气」段落default 返回空串。"""
v = normalize_background_voice(voice)
if v == "default":
return ""
if v == "military":
return (
"## 背景语气:军队语境(仅语气,不编造事实)\n"
"称呼得体、句子简洁利落、条理清楚;避免网络梗与油滑套话。\n"
"先简短接住对方,再**最多一个**具体问题;不写命令式、不做思想政治表态。\n"
"涉及纪律、集体、任务等措辞,**仅当用户口述已出现相关事实时**自然呼应,禁止堆砌军事化辞藻或虚构经历。"
)
# cadre
return (
"## 背景语气:干部/机关语境(仅语气,不编造事实)\n"
"稳重、有分寸,敬语适度;句子可略完整,但仍控制总字数,避免官样文章与排比空话。\n"
"先回应对方内容,再**最多一个**具体问题;不写公文套话、不做政治评价。\n"
"涉及职务与组织时,**不得编造**用户未提及的职级、单位与荣誉。"
)
def get_background_voice_narrative_block(voice: str | None) -> str:
"""附在叙事系统提示后的文体补充default 返回空串。"""
v = normalize_background_voice(voice)
if v == "default":
return ""
if v == "military":
return (
"## 背景文体(军队,须遵守上文事实边界)\n"
"叙事紧凑、层次清楚;若口述已出现纪律、集体、任务等语境,可适度用书面语呼应,**禁止**堆砌口号式军事辞藻或虚构军旅细节。\n"
"不新增军衔、单位番号、表彰等口述未出现的信息。"
)
return (
"## 背景文体(干部/机关,须遵守上文事实边界)\n"
"段落层次清晰,用语庄重自然,避免口语碎词与段子感;**不得编造**职务、荣誉、单位名称与组织细节。\n"
"文采服务于真实内容,不写成公文或汇报腔。"
)