fix/ 修复AI聊天时回复markdown导致聊天气泡布局问题
This commit is contained in:
@@ -2,9 +2,11 @@
|
||||
|
||||
|
||||
def chat_output_rules() -> str:
|
||||
"""用户可见回复共用禁令(括号/元注释/采访腔/编造等)。"""
|
||||
"""用户可见回复共用禁令(括号/元注释/采访腔/编造/Markdown 等)。"""
|
||||
return (
|
||||
"**禁止**输出括号、括号内的策略/舞台说明(例如「(先接住情绪)」「(共情)」)、"
|
||||
"**禁止**输出 Markdown 或类排版符号:不要出现标题井号、加粗/斜体星号与下划线、"
|
||||
"反引号代码、`[]()` 链接、列表符号或渲染用符号;只输出连贯口语,**可以**在需要分两气泡时使用字面量 "
|
||||
"`[SPLIT]`(仅此一处方括号用法);**禁止**输出括号、括号内的策略/舞台说明(例如「(先接住情绪)」「(共情)」)、"
|
||||
"思考过程或任何元注释——这些只存在于系统指令里,**绝不可**出现在你对用户说的话中;"
|
||||
"采访腔(「我注意到」「我想了解」);重复确认对方已经说过或能推断出的信息;编造对方没说的细节。"
|
||||
)
|
||||
|
||||
@@ -181,7 +181,7 @@ def get_opening_prompt(
|
||||
|
||||
{style_examples}
|
||||
|
||||
直接输出(仅自然口语):"""
|
||||
直接输出(仅自然口语,无 Markdown):"""
|
||||
|
||||
|
||||
def _build_era_context(current_stage: str, user_profile_context: str) -> str:
|
||||
@@ -455,6 +455,6 @@ def get_guided_conversation_prompt(
|
||||
## 不要做的
|
||||
{chat_output_rules()}
|
||||
|
||||
直接输出(仅自然口语,无任何括号前缀或旁白):"""
|
||||
直接输出(仅自然口语,无 Markdown,无任何括号前缀或旁白):"""
|
||||
|
||||
return prompt
|
||||
|
||||
@@ -5,6 +5,44 @@ from __future__ import annotations
|
||||
import re
|
||||
|
||||
|
||||
def strip_markdown_for_chat(text: str) -> str:
|
||||
"""
|
||||
将模型偶然输出的常见 Markdown 剥成纯文本,供 App 聊天气泡展示。
|
||||
保留换行与字面量 [SPLIT];不做完整 MD 解析,以简单可预测为主。
|
||||
"""
|
||||
if not text:
|
||||
return text
|
||||
s = text
|
||||
# 围栏代码块(含首行语言标记):整段替换为块内正文,去掉栅栏
|
||||
s = re.sub(
|
||||
r"```(?:[^\n`]*)\n([\s\S]*?)```",
|
||||
r"\1",
|
||||
s,
|
||||
flags=re.MULTILINE,
|
||||
)
|
||||
s = s.replace("```", "")
|
||||
# 图片  → alt;链接 [label](url) → label
|
||||
s = re.sub(r"!\[([^\]]*)\]\([^)]*\)", r"\1", s)
|
||||
s = re.sub(r"\[([^\]]*)\]\([^)]*\)", r"\1", s)
|
||||
# ATX 标题
|
||||
s = re.sub(r"(?m)^#{1,6}\s+", "", s)
|
||||
# 无序列表行首(仅限行首减号/星号/+ 后接空格,避免误判「—」)
|
||||
s = re.sub(r"(?m)^\s*[-*+]\s+", "", s)
|
||||
# 有序列表「数字. 」仅行首
|
||||
s = re.sub(r"(?m)^\s*\d+\.\s+", "", s)
|
||||
# 粗体/删除线常见标记
|
||||
s = s.replace("**", "").replace("__", "")
|
||||
s = s.replace("~~", "")
|
||||
# 行内反引号
|
||||
s = s.replace("`", "")
|
||||
# 孤立 emphasis:*词* 或 _词_(不含跨行)
|
||||
s = re.sub(r"(?<![*])\*([^*\n]+)\*(?![*])", r"\1", s)
|
||||
s = re.sub(r"(?<![_])_([^_\n]+)_(?![_])", r"\1", s)
|
||||
# 分割线
|
||||
s = re.sub(r"(?m)^\s*---+\s*$", "", s)
|
||||
return s
|
||||
|
||||
|
||||
def segments_from_llm_response(
|
||||
response_text: str,
|
||||
*,
|
||||
@@ -15,7 +53,7 @@ def segments_from_llm_response(
|
||||
优先按字面 [SPLIT] 拆段;若模型只输出一段、但用空行写了多段,再按段落拆。
|
||||
解决「两段话 + 换行」却未写 [SPLIT] 时仍要拆气泡 / 多段 TTS 的情况。
|
||||
"""
|
||||
text = (response_text or "").strip()
|
||||
text = strip_markdown_for_chat((response_text or "").strip())
|
||||
if not text:
|
||||
return []
|
||||
primary = [p.strip() for p in text.split("[SPLIT]") if p.strip()]
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
from app.agents.chat.reply_limits import (
|
||||
nonempty_segments_or_fallback,
|
||||
segments_from_llm_response,
|
||||
strip_markdown_for_chat,
|
||||
)
|
||||
|
||||
|
||||
@@ -23,3 +24,20 @@ def test_short_paragraphs_not_split():
|
||||
|
||||
def test_nonempty_fallback_when_all_blank():
|
||||
assert nonempty_segments_or_fallback(["", " "], fallback="ok") == ["ok"]
|
||||
|
||||
|
||||
def test_split_marker_strips_markdown():
|
||||
assert segments_from_llm_response("**A**[SPLIT]_B_", max_segments=3) == ["A", "B"]
|
||||
|
||||
|
||||
def test_paragraph_split_strips_markdown():
|
||||
a = "**太为你高兴了!在上海大剧院的舞台绽放,聚光灯下的你。**"
|
||||
b = "[详情](https://e.com)说到舞台,我忽然想起你黄浦江边的童年。"
|
||||
assert segments_from_llm_response(f"{a}\n\n{b}", max_segments=3) == [
|
||||
"太为你高兴了!在上海大剧院的舞台绽放,聚光灯下的你。",
|
||||
"详情说到舞台,我忽然想起你黄浦江边的童年。",
|
||||
]
|
||||
|
||||
|
||||
def test_strip_markdown_for_chat_preserves_split_token():
|
||||
assert "[SPLIT]" in strip_markdown_for_chat("a **b** [SPLIT] c")
|
||||
|
||||
Reference in New Issue
Block a user