Files
life-echo/api/app/agents/chat/reply_limits.py
yangshilin 9af2060259 fix:
1. 修复安卓部分机型顶部安全区遮挡回忆录标题的问题;
2. 降低封面图生成阈值和展示逻辑,独立封面图未生成时,使用正文图;
3. 去掉“嗯。”生硬回答,去掉不合理段首承接词;
4. 新增章节封面所需最少插图数的配置项
2026-04-16 20:42:54 +08:00

148 lines
5.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""访谈/资料追问:回复条数与单条字数硬限制(不靠长 prompt"""
from __future__ import annotations
import re
def strip_markdown_for_chat(text: str) -> str:
"""
将模型偶然输出的常见 Markdown 剥成纯文本,供 App 聊天气泡展示。
保留换行与字面量 [SPLIT];不做完整 MD 解析,以简单可预测为主。
"""
if not text:
return text
s = text
# 围栏代码块(含首行语言标记):整段替换为块内正文,去掉栅栏
s = re.sub(
r"```(?:[^\n`]*)\n([\s\S]*?)```",
r"\1",
s,
flags=re.MULTILINE,
)
s = s.replace("```", "")
# 图片 ![alt](url) → alt链接 [label](url) → label
s = re.sub(r"!\[([^\]]*)\]\([^)]*\)", r"\1", s)
s = re.sub(r"\[([^\]]*)\]\([^)]*\)", r"\1", s)
# ATX 标题
s = re.sub(r"(?m)^#{1,6}\s+", "", s)
# 无序列表行首(仅限行首减号/星号/+ 后接空格,避免误判「—」)
s = re.sub(r"(?m)^\s*[-*+]\s+", "", s)
# 有序列表「数字. 」仅行首
s = re.sub(r"(?m)^\s*\d+\.\s+", "", s)
# 粗体/删除线常见标记
s = s.replace("**", "").replace("__", "")
s = s.replace("~~", "")
# 行内反引号
s = s.replace("`", "")
# 孤立 emphasis*词* 或 _词_不含跨行
s = re.sub(r"(?<![*])\*([^*\n]+)\*(?![*])", r"\1", s)
s = re.sub(r"(?<![_])_([^_\n]+)_(?![_])", r"\1", s)
# 分割线
s = re.sub(r"(?m)^\s*---+\s*$", "", s)
return s
def strip_parenthetical_asides_for_chat(text: str) -> str:
"""
去掉模型输出的表演性括注(全角「(…)」与半角「(...)」),迭代至不再有可删对。
口述回忆录场景下助理回复几乎不需要夹注若写成「约1993年」等说明也会被删属产品上有意识取舍
与禁止「(轻轻笑)」类舞台说明一致。须在 strip_markdown_for_chat 之后调用(链接里的 () 已先处理)。
"""
if not text:
return text
s = text
prev: str | None = None
while prev != s:
prev = s
s = re.sub(r"[^]*", "", s)
s = re.sub(r"\([^)]*\)", "", s)
s = re.sub(r"[ \t]{2,}", " ", s)
return s.strip()
def strip_leading_en_period_ack_for_chat(text: str) -> str:
"""
去掉段首生硬的「嗯。」(可重复),即使后面还有正文;只剥字符串开头,不误伤句中「嗯。」。
支持全角/半角句号。
"""
s = (text or "").strip()
if not s:
return s
# 允许多次「嗯。」/「嗯嗯。」叠在段首;句号仅匹配全角 。、. 与 ASCII `.`
s2 = re.sub(r"^(?:嗯+(?:。||\.)+\s*)+", "", s)
return s2.strip()
def segments_from_llm_response(
response_text: str,
*,
max_segments: int = 3,
min_paragraph_chars: int = 12,
) -> list[str]:
"""
优先按字面 [SPLIT] 拆段;若模型只输出一段、但用空行写了多段,再按段落拆。
解决「两段话 + 换行」却未写 [SPLIT] 时仍要拆气泡 / 多段 TTS 的情况。
"""
text = strip_markdown_for_chat((response_text or "").strip())
text = strip_parenthetical_asides_for_chat(text)
if not text:
return []
primary = [
strip_leading_en_period_ack_for_chat(p)
for p in text.split("[SPLIT]")
if strip_leading_en_period_ack_for_chat(p).strip()
]
if len(primary) > 1:
return primary[:max_segments]
blob = primary[0] if primary else strip_leading_en_period_ack_for_chat(text)
blob = strip_leading_en_period_ack_for_chat(blob)
if "\n" not in blob:
return [blob]
paras = [
strip_leading_en_period_ack_for_chat(p)
for p in re.split(r"\n\s*\n+", blob)
if strip_leading_en_period_ack_for_chat(p).strip()
]
if len(paras) < 2:
return [blob]
paras = [p for p in paras if len(p) >= min_paragraph_chars]
if len(paras) < 2:
return [blob]
return paras[:max_segments]
def nonempty_segments_or_fallback(
segments: list[str],
*,
fallback: str,
) -> list[str]:
"""去掉空段;若全部为空白/空串则返回单条 fallback避免 WS 下发空 text。"""
cleaned = [s for s in segments if (s or "").strip()]
if cleaned:
return cleaned
fb = (fallback or "").strip()
return [fb] if fb else [""]
def truncate_chat_segments(
segments: list[str],
*,
max_segments: int,
max_chars_per_segment: int,
) -> list[str]:
"""保留前 max_segments 条,每条截断至 max_chars_per_segment按字符数中文友好"""
if not segments:
return []
out: list[str] = []
for raw in segments[:max_segments]:
s = (raw or "").strip()
if not s:
continue
if len(s) > max_chars_per_segment:
# 保留 1 个字符给省略号,使总长度不超过上限
s = s[: max_chars_per_segment - 1].rstrip() + ""
out.append(s)
return out