fix/ 修复AI聊天时回复markdown导致聊天气泡布局问题

2026-04-03 14:06:55 +08:00
parent 4cfa3843a7
commit 828a29748e
7 changed files with 131 additions and 362 deletions
--- a/api/app/agents/chat/output_rules.py
+++ b/api/app/agents/chat/output_rules.py
@@ -2,9 +2,11 @@


 def chat_output_rules() -> str:
-    """用户可见回复共用禁令（括号/元注释/采访腔/编造等）。"""
+    """用户可见回复共用禁令（括号/元注释/采访腔/编造/Markdown 等）。"""
    return (
-        "**禁止**输出括号、括号内的策略/舞台说明（例如「（先接住情绪）」「（共情）」）、"
+        "**禁止**输出 Markdown 或类排版符号：不要出现标题井号、加粗/斜体星号与下划线、"
+        "反引号代码、`[]()` 链接、列表符号或渲染用符号；只输出连贯口语，**可以**在需要分两气泡时使用字面量 "
+        "`[SPLIT]`（仅此一处方括号用法）；**禁止**输出括号、括号内的策略/舞台说明（例如「（先接住情绪）」「（共情）」）、"
        "思考过程或任何元注释——这些只存在于系统指令里，**绝不可**出现在你对用户说的话中；"
        "采访腔（「我注意到」「我想了解」）；重复确认对方已经说过或能推断出的信息；编造对方没说的细节。"
    )
--- a/api/app/agents/chat/prompts_conversation.py
+++ b/api/app/agents/chat/prompts_conversation.py
@@ -181,7 +181,7 @@ def get_opening_prompt(

 {style_examples}

-直接输出（仅自然口语）："""
+直接输出（仅自然口语，无 Markdown）："""


 def _build_era_context(current_stage: str, user_profile_context: str) -> str:
@@ -455,6 +455,6 @@ def get_guided_conversation_prompt(
 ## 不要做的
 {chat_output_rules()}

-直接输出（仅自然口语，无任何括号前缀或旁白）："""
+直接输出（仅自然口语，无 Markdown，无任何括号前缀或旁白）："""

    return prompt
--- a/api/app/agents/chat/reply_limits.py
+++ b/api/app/agents/chat/reply_limits.py
@@ -5,6 +5,44 @@ from __future__ import annotations
 import re


+def strip_markdown_for_chat(text: str) -> str:
+    """
+    将模型偶然输出的常见 Markdown 剥成纯文本，供 App 聊天气泡展示。
+    保留换行与字面量 [SPLIT]；不做完整 MD 解析，以简单可预测为主。
+    """
+    if not text:
+        return text
+    s = text
+    # 围栏代码块（含首行语言标记）：整段替换为块内正文，去掉栅栏
+    s = re.sub(
+        r"```(?:[^\n`]*)\n([\s\S]*?)```",
+        r"\1",
+        s,
+        flags=re.MULTILINE,
+    )
+    s = s.replace("```", "")
+    # 图片 ![alt](url) → alt；链接 [label](url) → label
+    s = re.sub(r"!\[([^\]]*)\]\([^)]*\)", r"\1", s)
+    s = re.sub(r"\[([^\]]*)\]\([^)]*\)", r"\1", s)
+    # ATX 标题
+    s = re.sub(r"(?m)^#{1,6}\s+", "", s)
+    # 无序列表行首（仅限行首减号/星号/+ 后接空格，避免误判「—」）
+    s = re.sub(r"(?m)^\s*[-*+]\s+", "", s)
+    # 有序列表「数字. 」仅行首
+    s = re.sub(r"(?m)^\s*\d+\.\s+", "", s)
+    # 粗体/删除线常见标记
+    s = s.replace("**", "").replace("__", "")
+    s = s.replace("~~", "")
+    # 行内反引号
+    s = s.replace("`", "")
+    # 孤立 emphasis：*词* 或 _词_（不含跨行）
+    s = re.sub(r"(?<![*])\*([^*\n]+)\*(?![*])", r"\1", s)
+    s = re.sub(r"(?<![_])_([^_\n]+)_(?![_])", r"\1", s)
+    # 分割线
+    s = re.sub(r"(?m)^\s*---+\s*$", "", s)
+    return s
+
+
 def segments_from_llm_response(
    response_text: str,
    *,
@@ -15,7 +53,7 @@ def segments_from_llm_response(
    优先按字面 [SPLIT] 拆段；若模型只输出一段、但用空行写了多段，再按段落拆。
    解决「两段话 + 换行」却未写 [SPLIT] 时仍要拆气泡 / 多段 TTS 的情况。
    """
-    text = (response_text or "").strip()
+    text = strip_markdown_for_chat((response_text or "").strip())
    if not text:
        return []
    primary = [p.strip() for p in text.split("[SPLIT]") if p.strip()]
--- a/api/tests/test_reply_segments.py
+++ b/api/tests/test_reply_segments.py
@@ -3,6 +3,7 @@
 from app.agents.chat.reply_limits import (
    nonempty_segments_or_fallback,
    segments_from_llm_response,
+    strip_markdown_for_chat,
 )


@@ -23,3 +24,20 @@ def test_short_paragraphs_not_split():

 def test_nonempty_fallback_when_all_blank():
    assert nonempty_segments_or_fallback(["", "  "], fallback="ok") == ["ok"]
+
+
+def test_split_marker_strips_markdown():
+    assert segments_from_llm_response("**A**[SPLIT]_B_", max_segments=3) == ["A", "B"]
+
+
+def test_paragraph_split_strips_markdown():
+    a = "**太为你高兴了！在上海大剧院的舞台绽放，聚光灯下的你。**"
+    b = "[详情](https://e.com)说到舞台，我忽然想起你黄浦江边的童年。"
+    assert segments_from_llm_response(f"{a}\n\n{b}", max_segments=3) == [
+        "太为你高兴了！在上海大剧院的舞台绽放，聚光灯下的你。",
+        "详情说到舞台，我忽然想起你黄浦江边的童年。",
+    ]
+
+
+def test_strip_markdown_for_chat_preserves_split_token():
+    assert "[SPLIT]" in strip_markdown_for_chat("a **b** [SPLIT] c")