Chat 访谈 - 新增 persona 系统(default / warm_listener / curious_guide)与 background_voice 语气层 - 回复长度由 compute_reply_plan 统一决策(brief / standard / expanded),融合信息密度启发式 - 输入净稿(input_normalize):编排层可选 rules/llm 归一用户口语后再喂模型与记忆检索 - 记忆证据注入:按用户话检索 memory evidence 并注入 prompt Memoir 回忆录 - 口述归一(oral_normalize):segment 原文保留,story 管线取派生净稿作叙事输入 - segment 入队批次门闸:累计字数 + 最长等待秒数,减少零碎提交 - fidelity_check / prompts / narrative_agent 微调 - Alembic 0005:清理跨章节 story 外键 Infra - Dockerfile 加入 ffmpeg - pyproject.toml 新增依赖并同步 uv.lock - .env.example / .env.production 补全新配置项 Tests - 新增 test_background_voice、test_chat_input_normalize、test_experience_regressions - 扩展 test_interview_prompts、test_interview_reply_length、test_story_route_oral_invariant Made-with: Cursor
124 lines
4.1 KiB
Python
124 lines
4.1 KiB
Python
"""
|
||
从用户档案「职业」等文本推断访谈/叙事语气维度(干部形、军队形)。
|
||
与 chat_interview_persona(温柔倾听等)正交,可叠加。
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from typing import Final, Literal
|
||
|
||
BackgroundVoice = Literal["default", "cadre", "military"]
|
||
|
||
# 军队系优先:含「军、部队」等则走军队形,避免与泛「干部」冲突。
|
||
_MILITARY_NEEDLES: Final[tuple[str, ...]] = (
|
||
"军人",
|
||
"军官",
|
||
"士兵",
|
||
"部队",
|
||
"入伍",
|
||
"服役",
|
||
"退伍",
|
||
"转业",
|
||
"武警",
|
||
"解放军",
|
||
"陆军",
|
||
"海军",
|
||
"空军",
|
||
"火箭军",
|
||
"军区",
|
||
"军营",
|
||
"军校",
|
||
"文职干部",
|
||
"军队文职",
|
||
"现役",
|
||
"预备役",
|
||
)
|
||
|
||
# 干部/机关系(避免过短词误判:如「机关」→机关枪、「主任」→班主任)
|
||
_CADRE_NEEDLES: Final[tuple[str, ...]] = (
|
||
"公务员",
|
||
"党政机关",
|
||
"党政",
|
||
"组织部",
|
||
"党委书记",
|
||
"党组书记",
|
||
"书记",
|
||
"处长",
|
||
"科长",
|
||
"局长",
|
||
"厅长",
|
||
"部长",
|
||
"国企",
|
||
"事业单位",
|
||
"干部",
|
||
"科级",
|
||
"处级",
|
||
"厅级",
|
||
)
|
||
|
||
|
||
def infer_background_voice(occupation: str | None) -> BackgroundVoice:
|
||
"""
|
||
据职业自由文本推断背景语气。军队关键词优先于干部关键词。
|
||
无匹配或未填 → default。
|
||
"""
|
||
if not occupation or not str(occupation).strip():
|
||
return "default"
|
||
t = str(occupation).strip().casefold()
|
||
for n in _MILITARY_NEEDLES:
|
||
if n.casefold() in t:
|
||
return "military"
|
||
for n in _CADRE_NEEDLES:
|
||
if n.casefold() in t:
|
||
return "cadre"
|
||
return "default"
|
||
|
||
|
||
def normalize_background_voice(voice: str | None) -> BackgroundVoice:
|
||
"""调用方传入已归一化枚举或原始职业文本均可。"""
|
||
if not voice:
|
||
return "default"
|
||
s = voice.strip()
|
||
if s in ("default", "cadre", "military"):
|
||
return s # type: ignore[return-value]
|
||
return infer_background_voice(s)
|
||
|
||
|
||
def get_background_voice_chat_block(voice: str | None) -> str:
|
||
"""注入访谈 guided/opening 的「背景语气」段落;default 返回空串。"""
|
||
v = normalize_background_voice(voice)
|
||
if v == "default":
|
||
return ""
|
||
if v == "military":
|
||
return (
|
||
"## 背景语气:军队语境(仅语气,不编造事实)\n"
|
||
"称呼得体、句子简洁利落、条理清楚;避免网络梗与油滑套话。\n"
|
||
"先简短接住对方,再**最多一个**具体问题;不写命令式、不做思想政治表态。\n"
|
||
"涉及纪律、集体、任务等措辞,**仅当用户口述已出现相关事实时**自然呼应,禁止堆砌军事化辞藻或虚构经历。"
|
||
)
|
||
# cadre
|
||
return (
|
||
"## 背景语气:干部/机关语境(仅语气,不编造事实)\n"
|
||
"稳重、有分寸,敬语适度;句子可略完整,但仍控制总字数,避免官样文章与排比空话。\n"
|
||
"先回应对方内容,再**最多一个**具体问题;不写公文套话、不做政治评价。\n"
|
||
"涉及职务与组织时,**不得编造**用户未提及的职级、单位与荣誉。"
|
||
)
|
||
|
||
|
||
def get_background_voice_narrative_block(voice: str | None) -> str:
|
||
"""附在叙事系统提示后的文体补充;default 返回空串。"""
|
||
v = normalize_background_voice(voice)
|
||
if v == "default":
|
||
return ""
|
||
if v == "military":
|
||
return (
|
||
"## 背景文体(军队,须遵守上文事实边界)\n"
|
||
"叙事紧凑、层次清楚;若口述已出现纪律、集体、任务等语境,可适度用书面语呼应,**禁止**堆砌口号式军事辞藻或虚构军旅细节。\n"
|
||
"不新增军衔、单位番号、表彰等口述未出现的信息。"
|
||
)
|
||
return (
|
||
"## 背景文体(干部/机关,须遵守上文事实边界)\n"
|
||
"段落层次清晰,用语庄重自然,避免口语碎词与段子感;**不得编造**职务、荣誉、单位名称与组织细节。\n"
|
||
"文采服务于真实内容,不写成公文或汇报腔。"
|
||
)
|