fix(tts): gate auto reply by ENABLE_TTS; allow on-demand and manual playback

- Pipeline: skip _send_tts_audio only for non-manual when ENABLE_TTS=false;
  remove enable_tts early return from handle_tts_request_on_demand.
- Tencent TTS: PrimaryLanguage/chunking follow user language preference only.
- Expo: let manual tts_audio bypass late-segment playback gate after interrupt.
- Docs: clarify ENABLE_TTS vs tts_request in api/.env.example and TTSProvider port.
- Tests: add manual bypass cases; adjust pipeline language tests for en+Chinese text.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Kevin
2026-05-11 17:15:02 +08:00
parent ccdc4e4277
commit 93be60f74c
7 changed files with 101 additions and 17 deletions

View File

@@ -82,9 +82,8 @@ class TencentTTSProvider:
self._secret_id = secret_id
self._secret_key = secret_key
self._voice_type = voice_type
# 英文音色未单独配置时回落到 501004月华腾讯云大模型音色,支持中英混合)。
# 大模型音色 501xxx 系列在 PrimaryLanguage=1/2 下均支持中英混读,不会被 Tencent
# 以 InvalidParameterValue.PrimaryLanguage 拒绝;与之对应必须配合 ModelType=1。
# 英文音色未单独配置时回落到 501004月华腾讯云大模型音色
# 大模型音色 501xxx 须配合 ModelType=1见 Tencent TextToVoice 文档)。
self._voice_type_en = voice_type_en if voice_type_en is not None else 501004
self._codec = codec
self._client = None
@@ -211,6 +210,8 @@ class TencentTTSProvider:
)
return b""
# ``language`` 由 pipeline 从用户 ``language_preference`` 解析(仅 'en' / 其它→中文路径),
# 与助手正文实际语种无关:产品规则是 TTS 主语言跟用户语言一致。
is_en = (language or "zh").strip().lower() == "en"
primary_language = PRIMARY_LANGUAGE_EN if is_en else PRIMARY_LANGUAGE_ZH
default_voice = self._voice_type_en if is_en else self._voice_type

View File

@@ -115,7 +115,8 @@ async def _send_tts_audio(
settings.enable_tts,
settings.tts_provider,
)
if not settings.enable_tts:
# enable_tts仅禁用「助手回复自动生成 TTS」want_tts 路径用户点喇叭manual=True仍可合成。
if not manual and not settings.enable_tts:
logger.info(
"pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=False "
"url_set=False audio_bytes_len=0 reason=enable_tts_false",
@@ -269,14 +270,6 @@ async def handle_tts_request_on_demand(
settings.enable_tts,
settings.tts_provider,
)
if not settings.enable_tts:
logger.info(
"pipeline.handle_tts_request_on_demand result ok=False reason=未开启语音合成 "
"conversation_id={} assistant_message_id={}",
conversation_id,
assistant_message_id,
)
return False, "未开启语音合成"
conv = await db.get(Conversation, conversation_id)
if not conv or conv.user_id != user_id or conv.deleted_at is not None:

View File

@@ -10,6 +10,6 @@ class TTSProvider(Protocol):
) -> bytes:
"""Convert text to speech audio bytes.
language: 'zh' or 'en'. Adapters that natively detect language may ignore it.
language: 'zh' or 'en' — 调用方应使用用户语言偏好(与正文语种无关);各 adapter 按自身能力解释。
"""
...