fix(tts): gate auto reply by ENABLE_TTS; allow on-demand and manual playback
- Pipeline: skip _send_tts_audio only for non-manual when ENABLE_TTS=false; remove enable_tts early return from handle_tts_request_on_demand. - Tencent TTS: PrimaryLanguage/chunking follow user language preference only. - Expo: let manual tts_audio bypass late-segment playback gate after interrupt. - Docs: clarify ENABLE_TTS vs tts_request in api/.env.example and TTSProvider port. - Tests: add manual bypass cases; adjust pipeline language tests for en+Chinese text. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -82,9 +82,8 @@ class TencentTTSProvider:
|
||||
self._secret_id = secret_id
|
||||
self._secret_key = secret_key
|
||||
self._voice_type = voice_type
|
||||
# 英文音色未单独配置时回落到 501004(月华,腾讯云大模型音色,支持中英混合)。
|
||||
# 大模型音色 501xxx 系列在 PrimaryLanguage=1/2 下均支持中英混读,不会被 Tencent
|
||||
# 以 InvalidParameterValue.PrimaryLanguage 拒绝;与之对应必须配合 ModelType=1。
|
||||
# 英文音色未单独配置时回落到 501004(月华,腾讯云大模型音色)。
|
||||
# 大模型音色 501xxx 须配合 ModelType=1(见 Tencent TextToVoice 文档)。
|
||||
self._voice_type_en = voice_type_en if voice_type_en is not None else 501004
|
||||
self._codec = codec
|
||||
self._client = None
|
||||
@@ -211,6 +210,8 @@ class TencentTTSProvider:
|
||||
)
|
||||
return b""
|
||||
|
||||
# ``language`` 由 pipeline 从用户 ``language_preference`` 解析(仅 'en' / 其它→中文路径),
|
||||
# 与助手正文实际语种无关:产品规则是 TTS 主语言跟用户语言一致。
|
||||
is_en = (language or "zh").strip().lower() == "en"
|
||||
primary_language = PRIMARY_LANGUAGE_EN if is_en else PRIMARY_LANGUAGE_ZH
|
||||
default_voice = self._voice_type_en if is_en else self._voice_type
|
||||
|
||||
@@ -115,7 +115,8 @@ async def _send_tts_audio(
|
||||
settings.enable_tts,
|
||||
settings.tts_provider,
|
||||
)
|
||||
if not settings.enable_tts:
|
||||
# enable_tts:仅禁用「助手回复自动生成 TTS」(want_tts 路径);用户点喇叭(manual=True)仍可合成。
|
||||
if not manual and not settings.enable_tts:
|
||||
logger.info(
|
||||
"pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=False "
|
||||
"url_set=False audio_bytes_len=0 reason=enable_tts_false",
|
||||
@@ -269,14 +270,6 @@ async def handle_tts_request_on_demand(
|
||||
settings.enable_tts,
|
||||
settings.tts_provider,
|
||||
)
|
||||
if not settings.enable_tts:
|
||||
logger.info(
|
||||
"pipeline.handle_tts_request_on_demand result ok=False reason=未开启语音合成 "
|
||||
"conversation_id={} assistant_message_id={}",
|
||||
conversation_id,
|
||||
assistant_message_id,
|
||||
)
|
||||
return False, "未开启语音合成"
|
||||
|
||||
conv = await db.get(Conversation, conversation_id)
|
||||
if not conv or conv.user_id != user_id or conv.deleted_at is not None:
|
||||
|
||||
@@ -10,6 +10,6 @@ class TTSProvider(Protocol):
|
||||
) -> bytes:
|
||||
"""Convert text to speech audio bytes.
|
||||
|
||||
language: 'zh' or 'en'. Adapters that natively detect language may ignore it.
|
||||
language: 'zh' or 'en' — 调用方应使用用户语言偏好(与正文语种无关);各 adapter 按自身能力解释。
|
||||
"""
|
||||
...
|
||||
|
||||
Reference in New Issue
Block a user