Files
life-echo/api/tests/test_pipeline_language_skip_tts.py
Kevin 93be60f74c fix(tts): gate auto reply by ENABLE_TTS; allow on-demand and manual playback
- Pipeline: skip _send_tts_audio only for non-manual when ENABLE_TTS=false;
  remove enable_tts early return from handle_tts_request_on_demand.
- Tencent TTS: PrimaryLanguage/chunking follow user language preference only.
- Expo: let manual tts_audio bypass late-segment playback gate after interrupt.
- Docs: clarify ENABLE_TTS vs tts_request in api/.env.example and TTSProvider port.
- Tests: add manual bypass cases; adjust pipeline language tests for en+Chinese text.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-11 17:15:02 +08:00

224 lines
8.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""WS pipeline 语言解析与 Tencent TTS 英文合成参数。"""
from __future__ import annotations
from types import SimpleNamespace
from unittest.mock import MagicMock, patch
import pytest
from app.adapters.tts.tencent_tts import (
MODEL_TYPE_LLM,
PRIMARY_LANGUAGE_EN,
PRIMARY_LANGUAGE_ZH,
TencentTTSProvider,
)
from app.features.conversation.ws.pipeline import _resolve_user_language
# ── pipeline._resolve_user_language ─────────────────────────────────
def test_resolve_user_language_zh_default_when_missing() -> None:
assert _resolve_user_language(None) == "zh"
assert _resolve_user_language(SimpleNamespace()) == "zh"
assert _resolve_user_language(SimpleNamespace(language_preference=None)) == "zh"
assert _resolve_user_language(SimpleNamespace(language_preference="zh")) == "zh"
def test_resolve_user_language_en_only_for_en_token() -> None:
assert _resolve_user_language(SimpleNamespace(language_preference="en")) == "en"
assert _resolve_user_language(SimpleNamespace(language_preference="EN")) == "en"
assert _resolve_user_language(SimpleNamespace(language_preference=" en ")) == "en"
def test_resolve_user_language_unknown_falls_back_to_zh() -> None:
assert _resolve_user_language(SimpleNamespace(language_preference="ja")) == "zh"
assert _resolve_user_language(SimpleNamespace(language_preference="")) == "zh"
# ── TencentTTSProvider 语言分支 ──────────────────────────────────────
@pytest.mark.asyncio
async def test_tencent_tts_zh_uses_primary_language_1_and_zh_voice() -> None:
provider = TencentTTSProvider(
secret_id="id",
secret_key="key",
voice_type=501004,
voice_type_en=501004,
)
seen: dict = {}
def fake_sync(text: str, voice_type: int, primary_language: int) -> bytes:
seen["text"] = text
seen["voice_type"] = voice_type
seen["primary_language"] = primary_language
return b"AUDIO"
with patch.object(provider, "_synthesize_sync", side_effect=fake_sync):
out = await provider.synthesize("你好", language="zh")
assert out == b"AUDIO"
assert seen["primary_language"] == PRIMARY_LANGUAGE_ZH
assert seen["voice_type"] == 501004
@pytest.mark.asyncio
async def test_tencent_tts_en_user_language_uses_primary_en_even_if_text_is_chinese() -> None:
"""主语言与用户偏好一致:即使用户语言为 en 且正文为中文,也向 Tencent 提交 PrimaryLanguage=2。"""
provider = TencentTTSProvider(
secret_id="id",
secret_key="key",
voice_type=501004,
voice_type_en=501004,
)
seen: dict = {}
def fake_sync(text: str, voice_type: int, primary_language: int) -> bytes:
seen["primary_language"] = primary_language
seen["voice_type"] = voice_type
return b"OK"
with patch.object(provider, "_synthesize_sync", side_effect=fake_sync):
out = await provider.synthesize("这是中文回复。", language="en")
assert out == b"OK"
assert seen["primary_language"] == PRIMARY_LANGUAGE_EN
@pytest.mark.asyncio
async def test_tencent_tts_en_uses_primary_language_2_and_en_voice() -> None:
provider = TencentTTSProvider(
secret_id="id",
secret_key="key",
voice_type=501004,
voice_type_en=501004,
)
seen: dict = {}
def fake_sync(text: str, voice_type: int, primary_language: int) -> bytes:
seen["text"] = text
seen["voice_type"] = voice_type
seen["primary_language"] = primary_language
return b"AUDIO_EN"
with patch.object(provider, "_synthesize_sync", side_effect=fake_sync):
out = await provider.synthesize("Hello there.", language="en")
assert out == b"AUDIO_EN"
assert seen["primary_language"] == PRIMARY_LANGUAGE_EN
assert seen["voice_type"] == 501004
@pytest.mark.asyncio
async def test_tencent_tts_en_uses_relaxed_chunk_size() -> None:
"""English text up to ~480 letters fits in a single chunk; zh path would split it."""
provider = TencentTTSProvider(
secret_id="id",
secret_key="key",
voice_type=501004,
voice_type_en=501004,
)
en_chunks: list[int] = []
zh_chunks: list[int] = []
def fake_en(text: str, voice_type: int, primary_language: int) -> bytes:
en_chunks.append(len(text))
return b"X"
def fake_zh(text: str, voice_type: int, primary_language: int) -> bytes:
zh_chunks.append(len(text))
return b"X"
text_400 = ("Word " * 80).strip() # 399 chars, no sentence terminators
with patch.object(provider, "_synthesize_sync", side_effect=fake_en):
await provider.synthesize(text_400, language="en")
with patch.object(provider, "_synthesize_sync", side_effect=fake_zh):
await provider.synthesize(text_400, language="zh")
# English allows the 400-char text in a single request; Chinese path must split
assert en_chunks == [len(text_400)]
assert len(zh_chunks) > 1
@pytest.mark.asyncio
async def test_tencent_tts_returns_empty_when_credentials_missing() -> None:
provider = TencentTTSProvider(secret_id="", secret_key="")
out = await provider.synthesize("Hello", language="en")
assert out == b""
@pytest.mark.asyncio
async def test_tencent_tts_voice_type_en_falls_back_to_english_voice_when_unset() -> None:
"""缺省 voice_type_en 时回落到 501004月华大模型音色原生中英混合"""
provider = TencentTTSProvider(secret_id="id", secret_key="key")
seen: dict = {}
def fake_sync(text: str, voice_type: int, primary_language: int) -> bytes:
seen["voice_type"] = voice_type
return b"X"
with patch.object(provider, "_synthesize_sync", side_effect=fake_sync):
await provider.synthesize("Hi", language="en")
assert seen["voice_type"] == 501004
# 显式断言不是中文老精品音色(防止回归):禁止回落到 1001 / 1002 等
assert seen["voice_type"] not in (1001, 1002)
# ── 关键回归_synthesize_sync 必须在请求中设置 ModelType=1大模型音色路由所需 ──
@pytest.mark.asyncio
async def test_tencent_tts_synthesize_sync_sets_model_type_1() -> None:
"""501004 月华属于大模型音色TextToVoice 必须显式带 ModelType=1否则会被旧模型
拒绝并静默返回空音频。这里 mock SDK client 捕获 req.ModelType 防止回归。"""
import base64 as _b64
provider = TencentTTSProvider(
secret_id="id",
secret_key="key",
voice_type=501004,
voice_type_en=501004,
)
captured: dict = {}
def _fake_text_to_voice(req):
captured["VoiceType"] = req.VoiceType
captured["PrimaryLanguage"] = req.PrimaryLanguage
captured["ModelType"] = req.ModelType
captured["Codec"] = req.Codec
captured["SampleRate"] = req.SampleRate
captured["Text"] = req.Text
fake_resp = MagicMock()
fake_resp.Audio = _b64.b64encode(b"AUDIO").decode("ascii")
fake_resp.RequestId = "req-test"
return fake_resp
fake_client = MagicMock()
fake_client.TextToVoice.side_effect = _fake_text_to_voice
with patch.object(provider, "_get_client", return_value=fake_client):
out = await provider.synthesize("你好", language="zh")
assert out == b"AUDIO"
assert captured["ModelType"] == MODEL_TYPE_LLM == 1
assert captured["VoiceType"] == 501004
assert captured["PrimaryLanguage"] == PRIMARY_LANGUAGE_ZH
assert captured["Text"] == "你好"
# ── port 兼容性OpenAI 实现接受 language kwarg ──────────────────────
@pytest.mark.asyncio
async def test_openai_tts_accepts_language_kwarg() -> None:
"""端口签名兼容OpenAI adapter 必须接受 language即使不使用"""
from app.adapters.tts.openai_tts import OpenAITTSProvider
provider = OpenAITTSProvider(api_key="") # No client → returns b""
assert await provider.synthesize("hi", language="en") == b""
assert await provider.synthesize("你好", language="zh") == b""