feat(api): 访谈人格/回复长度策略、口述归一、背景语气与输入净稿全链路
Chat 访谈 - 新增 persona 系统(default / warm_listener / curious_guide)与 background_voice 语气层 - 回复长度由 compute_reply_plan 统一决策(brief / standard / expanded),融合信息密度启发式 - 输入净稿(input_normalize):编排层可选 rules/llm 归一用户口语后再喂模型与记忆检索 - 记忆证据注入:按用户话检索 memory evidence 并注入 prompt Memoir 回忆录 - 口述归一(oral_normalize):segment 原文保留,story 管线取派生净稿作叙事输入 - segment 入队批次门闸:累计字数 + 最长等待秒数,减少零碎提交 - fidelity_check / prompts / narrative_agent 微调 - Alembic 0005:清理跨章节 story 外键 Infra - Dockerfile 加入 ffmpeg - pyproject.toml 新增依赖并同步 uv.lock - .env.example / .env.production 补全新配置项 Tests - 新增 test_background_voice、test_chat_input_normalize、test_experience_regressions - 扩展 test_interview_prompts、test_interview_reply_length、test_story_route_oral_invariant Made-with: Cursor
This commit is contained in:
123
api/app/agents/chat/background_voice.py
Normal file
123
api/app/agents/chat/background_voice.py
Normal file
@@ -0,0 +1,123 @@
|
||||
"""
|
||||
从用户档案「职业」等文本推断访谈/叙事语气维度(干部形、军队形)。
|
||||
与 chat_interview_persona(温柔倾听等)正交,可叠加。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Final, Literal
|
||||
|
||||
BackgroundVoice = Literal["default", "cadre", "military"]
|
||||
|
||||
# 军队系优先:含「军、部队」等则走军队形,避免与泛「干部」冲突。
|
||||
_MILITARY_NEEDLES: Final[tuple[str, ...]] = (
|
||||
"军人",
|
||||
"军官",
|
||||
"士兵",
|
||||
"部队",
|
||||
"入伍",
|
||||
"服役",
|
||||
"退伍",
|
||||
"转业",
|
||||
"武警",
|
||||
"解放军",
|
||||
"陆军",
|
||||
"海军",
|
||||
"空军",
|
||||
"火箭军",
|
||||
"军区",
|
||||
"军营",
|
||||
"军校",
|
||||
"文职干部",
|
||||
"军队文职",
|
||||
"现役",
|
||||
"预备役",
|
||||
)
|
||||
|
||||
# 干部/机关系(避免过短词误判:如「机关」→机关枪、「主任」→班主任)
|
||||
_CADRE_NEEDLES: Final[tuple[str, ...]] = (
|
||||
"公务员",
|
||||
"党政机关",
|
||||
"党政",
|
||||
"组织部",
|
||||
"党委书记",
|
||||
"党组书记",
|
||||
"书记",
|
||||
"处长",
|
||||
"科长",
|
||||
"局长",
|
||||
"厅长",
|
||||
"部长",
|
||||
"国企",
|
||||
"事业单位",
|
||||
"干部",
|
||||
"科级",
|
||||
"处级",
|
||||
"厅级",
|
||||
)
|
||||
|
||||
|
||||
def infer_background_voice(occupation: str | None) -> BackgroundVoice:
|
||||
"""
|
||||
据职业自由文本推断背景语气。军队关键词优先于干部关键词。
|
||||
无匹配或未填 → default。
|
||||
"""
|
||||
if not occupation or not str(occupation).strip():
|
||||
return "default"
|
||||
t = str(occupation).strip().casefold()
|
||||
for n in _MILITARY_NEEDLES:
|
||||
if n.casefold() in t:
|
||||
return "military"
|
||||
for n in _CADRE_NEEDLES:
|
||||
if n.casefold() in t:
|
||||
return "cadre"
|
||||
return "default"
|
||||
|
||||
|
||||
def normalize_background_voice(voice: str | None) -> BackgroundVoice:
|
||||
"""调用方传入已归一化枚举或原始职业文本均可。"""
|
||||
if not voice:
|
||||
return "default"
|
||||
s = voice.strip()
|
||||
if s in ("default", "cadre", "military"):
|
||||
return s # type: ignore[return-value]
|
||||
return infer_background_voice(s)
|
||||
|
||||
|
||||
def get_background_voice_chat_block(voice: str | None) -> str:
|
||||
"""注入访谈 guided/opening 的「背景语气」段落;default 返回空串。"""
|
||||
v = normalize_background_voice(voice)
|
||||
if v == "default":
|
||||
return ""
|
||||
if v == "military":
|
||||
return (
|
||||
"## 背景语气:军队语境(仅语气,不编造事实)\n"
|
||||
"称呼得体、句子简洁利落、条理清楚;避免网络梗与油滑套话。\n"
|
||||
"先简短接住对方,再**最多一个**具体问题;不写命令式、不做思想政治表态。\n"
|
||||
"涉及纪律、集体、任务等措辞,**仅当用户口述已出现相关事实时**自然呼应,禁止堆砌军事化辞藻或虚构经历。"
|
||||
)
|
||||
# cadre
|
||||
return (
|
||||
"## 背景语气:干部/机关语境(仅语气,不编造事实)\n"
|
||||
"稳重、有分寸,敬语适度;句子可略完整,但仍控制总字数,避免官样文章与排比空话。\n"
|
||||
"先回应对方内容,再**最多一个**具体问题;不写公文套话、不做政治评价。\n"
|
||||
"涉及职务与组织时,**不得编造**用户未提及的职级、单位与荣誉。"
|
||||
)
|
||||
|
||||
|
||||
def get_background_voice_narrative_block(voice: str | None) -> str:
|
||||
"""附在叙事系统提示后的文体补充;default 返回空串。"""
|
||||
v = normalize_background_voice(voice)
|
||||
if v == "default":
|
||||
return ""
|
||||
if v == "military":
|
||||
return (
|
||||
"## 背景文体(军队,须遵守上文事实边界)\n"
|
||||
"叙事紧凑、层次清楚;若口述已出现纪律、集体、任务等语境,可适度用书面语呼应,**禁止**堆砌口号式军事辞藻或虚构军旅细节。\n"
|
||||
"不新增军衔、单位番号、表彰等口述未出现的信息。"
|
||||
)
|
||||
return (
|
||||
"## 背景文体(干部/机关,须遵守上文事实边界)\n"
|
||||
"段落层次清晰,用语庄重自然,避免口语碎词与段子感;**不得编造**职务、荣誉、单位名称与组织细节。\n"
|
||||
"文采服务于真实内容,不写成公文或汇报腔。"
|
||||
)
|
||||
Reference in New Issue
Block a user