fix(tts): gate auto reply by ENABLE_TTS; allow on-demand and manual playback

- Pipeline: skip _send_tts_audio only for non-manual when ENABLE_TTS=false; remove enable_tts early return from handle_tts_request_on_demand. - Tencent TTS: PrimaryLanguage/chunking follow user language preference only. - Expo: let manual tts_audio bypass late-segment playback gate after interrupt. - Docs: clarify ENABLE_TTS vs tts_request in api/.env.example and TTSProvider port. - Tests: add manual bypass cases; adjust pipeline language tests for en+Chinese text. Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-11 17:15:02 +08:00
parent ccdc4e4277
commit 93be60f74c
7 changed files with 101 additions and 17 deletions
--- a/api/app/adapters/tts/tencent_tts.py
+++ b/api/app/adapters/tts/tencent_tts.py
@@ -82,9 +82,8 @@ class TencentTTSProvider:
        self._secret_id = secret_id
        self._secret_key = secret_key
        self._voice_type = voice_type
-        # 英文音色未单独配置时回落到 501004（月华，腾讯云大模型音色，支持中英混合）。
-        # 大模型音色 501xxx 系列在 PrimaryLanguage=1/2 下均支持中英混读，不会被 Tencent
-        # 以 InvalidParameterValue.PrimaryLanguage 拒绝；与之对应必须配合 ModelType=1。
+        # 英文音色未单独配置时回落到 501004（月华，腾讯云大模型音色）。
+        # 大模型音色 501xxx 须配合 ModelType=1（见 Tencent TextToVoice 文档）。
        self._voice_type_en = voice_type_en if voice_type_en is not None else 501004
        self._codec = codec
        self._client = None
@@ -211,6 +210,8 @@ class TencentTTSProvider:
            )
            return b""

+        # ``language`` 由 pipeline 从用户 ``language_preference`` 解析（仅 'en' / 其它→中文路径），
+        # 与助手正文实际语种无关：产品规则是 TTS 主语言跟用户语言一致。
        is_en = (language or "zh").strip().lower() == "en"
        primary_language = PRIMARY_LANGUAGE_EN if is_en else PRIMARY_LANGUAGE_ZH
        default_voice = self._voice_type_en if is_en else self._voice_type
--- a/api/app/features/conversation/ws/pipeline.py
+++ b/api/app/features/conversation/ws/pipeline.py
@@ -115,7 +115,8 @@ async def _send_tts_audio(
        settings.enable_tts,
        settings.tts_provider,
    )
-    if not settings.enable_tts:
+    # enable_tts：仅禁用「助手回复自动生成 TTS」（want_tts 路径）；用户点喇叭（manual=True）仍可合成。
+    if not manual and not settings.enable_tts:
        logger.info(
            "pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=False "
            "url_set=False audio_bytes_len=0 reason=enable_tts_false",
@@ -269,14 +270,6 @@ async def handle_tts_request_on_demand(
        settings.enable_tts,
        settings.tts_provider,
    )
-    if not settings.enable_tts:
-        logger.info(
-            "pipeline.handle_tts_request_on_demand result ok=False reason=未开启语音合成 "
-            "conversation_id={} assistant_message_id={}",
-            conversation_id,
-            assistant_message_id,
-        )
-        return False, "未开启语音合成"

    conv = await db.get(Conversation, conversation_id)
    if not conv or conv.user_id != user_id or conv.deleted_at is not None:
--- a/api/app/ports/tts.py
+++ b/api/app/ports/tts.py
@@ -10,6 +10,6 @@ class TTSProvider(Protocol):
    ) -> bytes:
        """Convert text to speech audio bytes.

-        language: 'zh' or 'en'. Adapters that natively detect language may ignore it.
+        language: 'zh' or 'en' — 调用方应使用用户语言偏好（与正文语种无关）；各 adapter 按自身能力解释。
        """
        ...