feat(chat): host-style memoir prompts and strip parenthetical stage directions

- Add strip_parenthetical_asides_for_chat in reply pipeline before [SPLIT] - Expand output_rules bans (performance parens) and voice as warm host - Refocus opening/guided prompts on pulling conversation toward memoir oral history - Align interview opening fallbacks with memoir-first tone - Add unit tests for parenthetical stripping
2026-04-10 13:55:08 +08:00
parent deeacfb7ee
commit df6eafeae2
5 changed files with 69 additions and 24 deletions
--- a/api/app/agents/chat/reply_limits.py
+++ b/api/app/agents/chat/reply_limits.py
@@ -43,6 +43,25 @@ def strip_markdown_for_chat(text: str) -> str:
    return s


+def strip_parenthetical_asides_for_chat(text: str) -> str:
+    """
+    去掉模型输出的表演性括注（全角「（…）」与半角「(...)」），迭代至不再有可删对。
+
+    口述回忆录场景下助理回复几乎不需要夹注；若写成「（约1993年）」等说明也会被删，属产品上有意识取舍，
+    与禁止「（轻轻笑）」类舞台说明一致。须在 strip_markdown_for_chat 之后调用（链接里的 () 已先处理）。
+    """
+    if not text:
+        return text
+    s = text
+    prev: str | None = None
+    while prev != s:
+        prev = s
+        s = re.sub(r"（[^）]*）", "", s)
+        s = re.sub(r"\([^)]*\)", "", s)
+    s = re.sub(r"[ \t]{2,}", " ", s)
+    return s.strip()
+
+
 def segments_from_llm_response(
    response_text: str,
    *,
@@ -54,6 +73,7 @@ def segments_from_llm_response(
    解决「两段话 + 换行」却未写 [SPLIT] 时仍要拆气泡 / 多段 TTS 的情况。
    """
    text = strip_markdown_for_chat((response_text or "").strip())
+    text = strip_parenthetical_asides_for_chat(text)
    if not text:
        return []
    primary = [p.strip() for p in text.split("[SPLIT]") if p.strip()]