"""WS pipeline 语言解析与 Tencent TTS 英文合成参数。""" from __future__ import annotations from types import SimpleNamespace from unittest.mock import MagicMock, patch import pytest from app.adapters.tts.tencent_tts import ( MODEL_TYPE_LLM, PRIMARY_LANGUAGE_EN, PRIMARY_LANGUAGE_ZH, TencentTTSProvider, ) from app.features.conversation.ws.pipeline import _resolve_user_language # ── pipeline._resolve_user_language ───────────────────────────────── def test_resolve_user_language_zh_default_when_missing() -> None: assert _resolve_user_language(None) == "zh" assert _resolve_user_language(SimpleNamespace()) == "zh" assert _resolve_user_language(SimpleNamespace(language_preference=None)) == "zh" assert _resolve_user_language(SimpleNamespace(language_preference="zh")) == "zh" def test_resolve_user_language_en_only_for_en_token() -> None: assert _resolve_user_language(SimpleNamespace(language_preference="en")) == "en" assert _resolve_user_language(SimpleNamespace(language_preference="EN")) == "en" assert _resolve_user_language(SimpleNamespace(language_preference=" en ")) == "en" def test_resolve_user_language_unknown_falls_back_to_zh() -> None: assert _resolve_user_language(SimpleNamespace(language_preference="ja")) == "zh" assert _resolve_user_language(SimpleNamespace(language_preference="")) == "zh" # ── TencentTTSProvider 语言分支 ────────────────────────────────────── @pytest.mark.asyncio async def test_tencent_tts_zh_uses_primary_language_1_and_zh_voice() -> None: provider = TencentTTSProvider( secret_id="id", secret_key="key", voice_type=501004, voice_type_en=501004, ) seen: dict = {} def fake_sync(text: str, voice_type: int, primary_language: int) -> bytes: seen["text"] = text seen["voice_type"] = voice_type seen["primary_language"] = primary_language return b"AUDIO" with patch.object(provider, "_synthesize_sync", side_effect=fake_sync): out = await provider.synthesize("你好", language="zh") assert out == b"AUDIO" assert seen["primary_language"] == PRIMARY_LANGUAGE_ZH assert seen["voice_type"] == 501004 @pytest.mark.asyncio async def test_tencent_tts_en_user_language_uses_primary_en_even_if_text_is_chinese() -> None: """主语言与用户偏好一致:即使用户语言为 en 且正文为中文,也向 Tencent 提交 PrimaryLanguage=2。""" provider = TencentTTSProvider( secret_id="id", secret_key="key", voice_type=501004, voice_type_en=501004, ) seen: dict = {} def fake_sync(text: str, voice_type: int, primary_language: int) -> bytes: seen["primary_language"] = primary_language seen["voice_type"] = voice_type return b"OK" with patch.object(provider, "_synthesize_sync", side_effect=fake_sync): out = await provider.synthesize("这是中文回复。", language="en") assert out == b"OK" assert seen["primary_language"] == PRIMARY_LANGUAGE_EN @pytest.mark.asyncio async def test_tencent_tts_en_uses_primary_language_2_and_en_voice() -> None: provider = TencentTTSProvider( secret_id="id", secret_key="key", voice_type=501004, voice_type_en=501004, ) seen: dict = {} def fake_sync(text: str, voice_type: int, primary_language: int) -> bytes: seen["text"] = text seen["voice_type"] = voice_type seen["primary_language"] = primary_language return b"AUDIO_EN" with patch.object(provider, "_synthesize_sync", side_effect=fake_sync): out = await provider.synthesize("Hello there.", language="en") assert out == b"AUDIO_EN" assert seen["primary_language"] == PRIMARY_LANGUAGE_EN assert seen["voice_type"] == 501004 @pytest.mark.asyncio async def test_tencent_tts_en_uses_relaxed_chunk_size() -> None: """English text up to ~480 letters fits in a single chunk; zh path would split it.""" provider = TencentTTSProvider( secret_id="id", secret_key="key", voice_type=501004, voice_type_en=501004, ) en_chunks: list[int] = [] zh_chunks: list[int] = [] def fake_en(text: str, voice_type: int, primary_language: int) -> bytes: en_chunks.append(len(text)) return b"X" def fake_zh(text: str, voice_type: int, primary_language: int) -> bytes: zh_chunks.append(len(text)) return b"X" text_400 = ("Word " * 80).strip() # 399 chars, no sentence terminators with patch.object(provider, "_synthesize_sync", side_effect=fake_en): await provider.synthesize(text_400, language="en") with patch.object(provider, "_synthesize_sync", side_effect=fake_zh): await provider.synthesize(text_400, language="zh") # English allows the 400-char text in a single request; Chinese path must split assert en_chunks == [len(text_400)] assert len(zh_chunks) > 1 @pytest.mark.asyncio async def test_tencent_tts_returns_empty_when_credentials_missing() -> None: provider = TencentTTSProvider(secret_id="", secret_key="") out = await provider.synthesize("Hello", language="en") assert out == b"" @pytest.mark.asyncio async def test_tencent_tts_voice_type_en_falls_back_to_english_voice_when_unset() -> None: """缺省 voice_type_en 时回落到 501004(月华,大模型音色,原生中英混合)。""" provider = TencentTTSProvider(secret_id="id", secret_key="key") seen: dict = {} def fake_sync(text: str, voice_type: int, primary_language: int) -> bytes: seen["voice_type"] = voice_type return b"X" with patch.object(provider, "_synthesize_sync", side_effect=fake_sync): await provider.synthesize("Hi", language="en") assert seen["voice_type"] == 501004 # 显式断言不是中文老精品音色(防止回归):禁止回落到 1001 / 1002 等 assert seen["voice_type"] not in (1001, 1002) # ── 关键回归:_synthesize_sync 必须在请求中设置 ModelType=1(大模型音色路由所需) ── @pytest.mark.asyncio async def test_tencent_tts_synthesize_sync_sets_model_type_1() -> None: """501004 月华属于大模型音色,TextToVoice 必须显式带 ModelType=1,否则会被旧模型 拒绝并静默返回空音频。这里 mock SDK client 捕获 req.ModelType 防止回归。""" import base64 as _b64 provider = TencentTTSProvider( secret_id="id", secret_key="key", voice_type=501004, voice_type_en=501004, ) captured: dict = {} def _fake_text_to_voice(req): captured["VoiceType"] = req.VoiceType captured["PrimaryLanguage"] = req.PrimaryLanguage captured["ModelType"] = req.ModelType captured["Codec"] = req.Codec captured["SampleRate"] = req.SampleRate captured["Text"] = req.Text fake_resp = MagicMock() fake_resp.Audio = _b64.b64encode(b"AUDIO").decode("ascii") fake_resp.RequestId = "req-test" return fake_resp fake_client = MagicMock() fake_client.TextToVoice.side_effect = _fake_text_to_voice with patch.object(provider, "_get_client", return_value=fake_client): out = await provider.synthesize("你好", language="zh") assert out == b"AUDIO" assert captured["ModelType"] == MODEL_TYPE_LLM == 1 assert captured["VoiceType"] == 501004 assert captured["PrimaryLanguage"] == PRIMARY_LANGUAGE_ZH assert captured["Text"] == "你好" # ── port 兼容性:OpenAI 实现接受 language kwarg ────────────────────── @pytest.mark.asyncio async def test_openai_tts_accepts_language_kwarg() -> None: """端口签名兼容:OpenAI adapter 必须接受 language(即使不使用)。""" from app.adapters.tts.openai_tts import OpenAITTSProvider provider = OpenAITTSProvider(api_key="") # No client → returns b"" assert await provider.synthesize("hi", language="en") == b"" assert await provider.synthesize("你好", language="zh") == b""