- 回忆录:事实边界补充允许清单;传记文体示例与 JSON 叙事要求对齐 - default 职业提示 occupation_context;cadre/military 退休语境 - GET 章节读路径零写入,prepare_chapter_read_view + markdown_for_response - 文本归一抽到 core/text_normalize;移除弃用 reply 策略与 recompose_chapters_for_story - ConversationService:WS 连接/用户段落/结束对话;对外错误固定文案 - 测试:HTTP 脱敏契约、章节读视图、occupation 与 background_voice
312 lines
8.4 KiB
Python
312 lines
8.4 KiB
Python
"""
|
||
访谈回复长度:由用户本轮文本 + 启发式(新细节 / 闲聊 / 信息密度)决定档位,
|
||
与 max_tokens、max_chars_per_segment 联动;单一 ReplyPlan 供 prompt 与截断共用。
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from dataclasses import dataclass
|
||
from enum import Enum
|
||
from typing import TYPE_CHECKING
|
||
|
||
from app.agents.chat.background_voice import normalize_background_voice
|
||
|
||
if TYPE_CHECKING:
|
||
from app.core.config import Settings
|
||
|
||
|
||
class ReplyLengthMode(str, Enum):
|
||
"""brief:极短;standard:默认;expanded:值得展开承接时稍长。"""
|
||
|
||
brief = "brief"
|
||
standard = "standard"
|
||
expanded = "expanded"
|
||
|
||
|
||
# 用户本轮字符数分桶(strip 后按 len,中文友好)
|
||
_LEN_BRIEF_MAX = 20
|
||
_LEN_MID_EXPAND_MIN = 40
|
||
_LEN_LONG_MIN = 80
|
||
|
||
|
||
def heuristic_likely_new_detail(user_message: str) -> bool:
|
||
"""
|
||
轻量启发:本轮是否很可能补充了新人名、新关系或新情节(追问触发与长度共用)。
|
||
"""
|
||
m = (user_message or "").strip()
|
||
if len(m) < 2:
|
||
return False
|
||
needles = (
|
||
"叫",
|
||
"名字",
|
||
"名叫",
|
||
"同桌",
|
||
"初恋",
|
||
"现实里",
|
||
"戏里",
|
||
"饰演",
|
||
"演我",
|
||
"第一次",
|
||
"认识",
|
||
"没想到",
|
||
"猜猜",
|
||
)
|
||
return any(n in m for n in needles)
|
||
|
||
|
||
def heuristic_information_rich(user_message: str) -> bool:
|
||
"""
|
||
轻量启发:短句也可能信息密度高(新转折、重大事件、时间锚点),用于避免误压成 brief。
|
||
"""
|
||
m = (user_message or "").strip()
|
||
if len(m) < 2:
|
||
return False
|
||
needles = (
|
||
"突然",
|
||
"那年",
|
||
"后来",
|
||
"记得",
|
||
"第一次",
|
||
"没想到",
|
||
"离开",
|
||
"去世",
|
||
"走了",
|
||
"结婚",
|
||
"离婚",
|
||
"生病",
|
||
"辍学",
|
||
"退学",
|
||
"下岗",
|
||
"破产",
|
||
"我爸",
|
||
"我妈",
|
||
"爷爷",
|
||
"奶奶",
|
||
)
|
||
return any(n in m for n in needles)
|
||
|
||
|
||
def heuristic_likely_emotional(user_message: str) -> bool:
|
||
"""
|
||
轻量启发:用户本轮是否在表达较强情绪(需要更多承接空间、不应被压成 brief)。
|
||
"""
|
||
m = (user_message or "").strip()
|
||
if len(m) < 4:
|
||
return False
|
||
needles = (
|
||
"想哭",
|
||
"哭了",
|
||
"难过",
|
||
"伤心",
|
||
"心酸",
|
||
"感动",
|
||
"激动",
|
||
"害怕",
|
||
"委屈",
|
||
"后悔",
|
||
"对不起",
|
||
"愧疚",
|
||
"感激",
|
||
"谢谢你",
|
||
"想念",
|
||
"想他",
|
||
"想她",
|
||
"舍不得",
|
||
"不容易",
|
||
"太难了",
|
||
"崩溃",
|
||
"绝望",
|
||
"幸福",
|
||
"骄傲",
|
||
"自豪",
|
||
)
|
||
return any(n in m for n in needles)
|
||
|
||
|
||
def heuristic_likely_chit_chat(user_message: str) -> bool:
|
||
"""
|
||
轻量启发:本轮是否偏闲聊(放宽长句里纯寒暄/天气类)。
|
||
"""
|
||
m = (user_message or "").strip()
|
||
if len(m) > 200:
|
||
return False
|
||
|
||
needles_short = (
|
||
"天气",
|
||
"谢谢",
|
||
"哈哈",
|
||
"呵呵",
|
||
"在吗",
|
||
"吃了吗",
|
||
"早上好",
|
||
"晚安",
|
||
"闲聊",
|
||
"逗你",
|
||
)
|
||
if len(m) > 48:
|
||
head = m[:100]
|
||
if any(n in head for n in needles_short):
|
||
if not heuristic_information_rich(m) and not heuristic_likely_new_detail(m):
|
||
return True
|
||
return False
|
||
|
||
if any(n in m for n in needles_short):
|
||
return True
|
||
if len(m) <= 8 and m in ("嗯", "好", "行的", "谢谢", "哈哈", "可以", "没事"):
|
||
return True
|
||
return False
|
||
|
||
|
||
@dataclass(frozen=True)
|
||
class ReplyPlan:
|
||
"""单一计划:prompt 展示档位与数值上限一致(含背景语气微调)。"""
|
||
|
||
mode: ReplyLengthMode
|
||
max_tokens: int
|
||
max_chars_per_segment: int
|
||
max_segments: int
|
||
likely_new_detail: bool
|
||
likely_chit_chat: bool
|
||
information_rich: bool
|
||
|
||
|
||
def compute_reply_plan(
|
||
user_message: str,
|
||
*,
|
||
background_voice: str | None,
|
||
settings: "Settings",
|
||
) -> ReplyPlan:
|
||
"""
|
||
信息量与情绪优先,字数次之:
|
||
- 短输入且无新信息、无情绪 → brief
|
||
- 短输入但有新细节/高密度/强情绪 → standard
|
||
- 中段(40-79)有实质/情绪 → expanded(给足承接空间)
|
||
- 中段无实质 → standard
|
||
- 长输入:闲聊为主 → standard;有展开价值 → expanded
|
||
"""
|
||
norm = (user_message or "").strip()
|
||
n = max(0, len(norm))
|
||
max_segments = int(settings.chat_interview_max_segments)
|
||
|
||
likely_new = heuristic_likely_new_detail(norm)
|
||
likely_chit = heuristic_likely_chit_chat(norm)
|
||
info_rich = heuristic_information_rich(norm)
|
||
emotional = heuristic_likely_emotional(norm)
|
||
substantive = likely_new or info_rich or emotional
|
||
|
||
def _mk(m: ReplyLengthMode) -> ReplyPlan:
|
||
return _plan_from_mode(
|
||
m,
|
||
max_segments=max_segments,
|
||
settings=settings,
|
||
background_voice=background_voice,
|
||
likely_new=likely_new,
|
||
likely_chit=likely_chit,
|
||
info_rich=info_rich,
|
||
)
|
||
|
||
if likely_chit and not substantive:
|
||
return _mk(
|
||
ReplyLengthMode.brief if n <= _LEN_BRIEF_MAX else ReplyLengthMode.standard
|
||
)
|
||
|
||
if n <= _LEN_BRIEF_MAX:
|
||
return _mk(ReplyLengthMode.standard if substantive else ReplyLengthMode.brief)
|
||
|
||
if n < _LEN_MID_EXPAND_MIN:
|
||
return _mk(ReplyLengthMode.standard)
|
||
|
||
if n < _LEN_LONG_MIN:
|
||
return _mk(
|
||
ReplyLengthMode.expanded if substantive else ReplyLengthMode.standard
|
||
)
|
||
|
||
return _mk(ReplyLengthMode.expanded if substantive else ReplyLengthMode.standard)
|
||
|
||
|
||
def _plan_from_mode(
|
||
mode: ReplyLengthMode,
|
||
*,
|
||
max_segments: int,
|
||
settings: "Settings",
|
||
background_voice: str | None,
|
||
likely_new: bool,
|
||
likely_chit: bool,
|
||
info_rich: bool,
|
||
) -> ReplyPlan:
|
||
if mode == ReplyLengthMode.brief:
|
||
base = ReplyPlan(
|
||
mode=mode,
|
||
max_tokens=int(settings.chat_interview_brief_max_tokens),
|
||
max_chars_per_segment=int(
|
||
settings.chat_interview_brief_max_chars_per_segment
|
||
),
|
||
max_segments=max_segments,
|
||
likely_new_detail=likely_new,
|
||
likely_chit_chat=likely_chit,
|
||
information_rich=info_rich,
|
||
)
|
||
elif mode == ReplyLengthMode.expanded:
|
||
base = ReplyPlan(
|
||
mode=mode,
|
||
max_tokens=int(settings.chat_interview_expanded_max_tokens),
|
||
max_chars_per_segment=int(
|
||
settings.chat_interview_expanded_max_chars_per_segment
|
||
),
|
||
max_segments=max_segments,
|
||
likely_new_detail=likely_new,
|
||
likely_chit_chat=likely_chit,
|
||
information_rich=info_rich,
|
||
)
|
||
else:
|
||
base = ReplyPlan(
|
||
mode=ReplyLengthMode.standard,
|
||
max_tokens=int(settings.chat_interview_max_tokens),
|
||
max_chars_per_segment=int(settings.chat_interview_max_chars_per_segment),
|
||
max_segments=max_segments,
|
||
likely_new_detail=likely_new,
|
||
likely_chit_chat=likely_chit,
|
||
information_rich=info_rich,
|
||
)
|
||
return bump_reply_plan_for_background_voice(
|
||
base, background_voice=background_voice, settings=settings
|
||
)
|
||
|
||
|
||
def bump_reply_plan_for_background_voice(
|
||
plan: ReplyPlan,
|
||
*,
|
||
background_voice: str | None,
|
||
settings: "Settings",
|
||
) -> ReplyPlan:
|
||
"""
|
||
干部/军队背景时,仅对 standard 档小幅提高 token 与单段字数;**展示档位不变**(仍为 standard)。
|
||
"""
|
||
if normalize_background_voice(background_voice) == "default":
|
||
return plan
|
||
if plan.mode != ReplyLengthMode.standard:
|
||
return plan
|
||
extra_t = int(
|
||
getattr(
|
||
settings,
|
||
"chat_interview_cadre_military_standard_extra_tokens",
|
||
0,
|
||
)
|
||
)
|
||
extra_c = int(
|
||
getattr(
|
||
settings,
|
||
"chat_interview_cadre_military_standard_extra_chars",
|
||
0,
|
||
)
|
||
)
|
||
return ReplyPlan(
|
||
mode=plan.mode,
|
||
max_tokens=plan.max_tokens + extra_t,
|
||
max_chars_per_segment=plan.max_chars_per_segment + extra_c,
|
||
max_segments=plan.max_segments,
|
||
likely_new_detail=plan.likely_new_detail,
|
||
likely_chit_chat=plan.likely_chit_chat,
|
||
information_rich=plan.information_rich,
|
||
)
|