Files
life-echo/api/app/agents/chat/background_voice.py
Kevin 69a673e6c6 feat(api): 访谈人格/回复长度策略、口述归一、背景语气与输入净稿全链路
Chat 访谈
- 新增 persona 系统(default / warm_listener / curious_guide)与 background_voice 语气层
- 回复长度由 compute_reply_plan 统一决策(brief / standard / expanded),融合信息密度启发式
- 输入净稿(input_normalize):编排层可选 rules/llm 归一用户口语后再喂模型与记忆检索
- 记忆证据注入:按用户话检索 memory evidence 并注入 prompt

Memoir 回忆录
- 口述归一(oral_normalize):segment 原文保留,story 管线取派生净稿作叙事输入
- segment 入队批次门闸:累计字数 + 最长等待秒数,减少零碎提交
- fidelity_check / prompts / narrative_agent 微调
- Alembic 0005:清理跨章节 story 外键

Infra
- Dockerfile 加入 ffmpeg
- pyproject.toml 新增依赖并同步 uv.lock
- .env.example / .env.production 补全新配置项

Tests
- 新增 test_background_voice、test_chat_input_normalize、test_experience_regressions
- 扩展 test_interview_prompts、test_interview_reply_length、test_story_route_oral_invariant

Made-with: Cursor
2026-03-31 23:55:26 +08:00

124 lines
4.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
从用户档案「职业」等文本推断访谈/叙事语气维度(干部形、军队形)。
与 chat_interview_persona温柔倾听等正交可叠加。
"""
from __future__ import annotations
from typing import Final, Literal
BackgroundVoice = Literal["default", "cadre", "military"]
# 军队系优先:含「军、部队」等则走军队形,避免与泛「干部」冲突。
_MILITARY_NEEDLES: Final[tuple[str, ...]] = (
"军人",
"军官",
"士兵",
"部队",
"入伍",
"服役",
"退伍",
"转业",
"武警",
"解放军",
"陆军",
"海军",
"空军",
"火箭军",
"军区",
"军营",
"军校",
"文职干部",
"军队文职",
"现役",
"预备役",
)
# 干部/机关系(避免过短词误判:如「机关」→机关枪、「主任」→班主任)
_CADRE_NEEDLES: Final[tuple[str, ...]] = (
"公务员",
"党政机关",
"党政",
"组织部",
"党委书记",
"党组书记",
"书记",
"处长",
"科长",
"局长",
"厅长",
"部长",
"国企",
"事业单位",
"干部",
"科级",
"处级",
"厅级",
)
def infer_background_voice(occupation: str | None) -> BackgroundVoice:
"""
据职业自由文本推断背景语气。军队关键词优先于干部关键词。
无匹配或未填 → default。
"""
if not occupation or not str(occupation).strip():
return "default"
t = str(occupation).strip().casefold()
for n in _MILITARY_NEEDLES:
if n.casefold() in t:
return "military"
for n in _CADRE_NEEDLES:
if n.casefold() in t:
return "cadre"
return "default"
def normalize_background_voice(voice: str | None) -> BackgroundVoice:
"""调用方传入已归一化枚举或原始职业文本均可。"""
if not voice:
return "default"
s = voice.strip()
if s in ("default", "cadre", "military"):
return s # type: ignore[return-value]
return infer_background_voice(s)
def get_background_voice_chat_block(voice: str | None) -> str:
"""注入访谈 guided/opening 的「背景语气」段落default 返回空串。"""
v = normalize_background_voice(voice)
if v == "default":
return ""
if v == "military":
return (
"## 背景语气:军队语境(仅语气,不编造事实)\n"
"称呼得体、句子简洁利落、条理清楚;避免网络梗与油滑套话。\n"
"先简短接住对方,再**最多一个**具体问题;不写命令式、不做思想政治表态。\n"
"涉及纪律、集体、任务等措辞,**仅当用户口述已出现相关事实时**自然呼应,禁止堆砌军事化辞藻或虚构经历。"
)
# cadre
return (
"## 背景语气:干部/机关语境(仅语气,不编造事实)\n"
"稳重、有分寸,敬语适度;句子可略完整,但仍控制总字数,避免官样文章与排比空话。\n"
"先回应对方内容,再**最多一个**具体问题;不写公文套话、不做政治评价。\n"
"涉及职务与组织时,**不得编造**用户未提及的职级、单位与荣誉。"
)
def get_background_voice_narrative_block(voice: str | None) -> str:
"""附在叙事系统提示后的文体补充default 返回空串。"""
v = normalize_background_voice(voice)
if v == "default":
return ""
if v == "military":
return (
"## 背景文体(军队,须遵守上文事实边界)\n"
"叙事紧凑、层次清楚;若口述已出现纪律、集体、任务等语境,可适度用书面语呼应,**禁止**堆砌口号式军事辞藻或虚构军旅细节。\n"
"不新增军衔、单位番号、表彰等口述未出现的信息。"
)
return (
"## 背景文体(干部/机关,须遵守上文事实边界)\n"
"段落层次清晰,用语庄重自然,避免口语碎词与段子感;**不得编造**职务、荣誉、单位名称与组织细节。\n"
"文采服务于真实内容,不写成公文或汇报腔。"
)