"""Tencent Cloud TTS adapter — implements TTSProvider port.""" import asyncio import base64 import re import uuid from app.core.logging import get_logger logger = get_logger(__name__) # OpenAI voice name -> Tencent VoiceType ID VOICE_MAP: dict[str, int] = { "alloy": 1001, "echo": 1002, "fable": 1003, "onyx": 1004, "nova": 1005, "shimmer": 1006, } # 中文 150 字 / 英文 500 字母,取保守值 MAX_CHARS_PER_REQUEST = 150 def _chunk_text(text: str, max_chars: int = MAX_CHARS_PER_REQUEST) -> list[str]: """Split text into chunks within API limit.""" text = text.strip() if not text: return [] if len(text) <= max_chars: return [text] chunks: list[str] = [] # Split by sentence boundaries first pattern = r"[。!?.!?\n]+" parts = re.split(f"({pattern})", text) current = "" for i, p in enumerate(parts): if re.match(pattern, p): current += p if current.strip(): chunks.append(current.strip()) current = "" else: if len(current) + len(p) <= max_chars: current += p else: if current.strip(): chunks.append(current.strip()) current = "" # Single part exceeds limit, split by length while p: chunk = p[:max_chars] p = p[max_chars:] chunks.append(chunk) if current.strip(): chunks.append(current.strip()) return chunks class TencentTTSProvider: def __init__( self, secret_id: str, secret_key: str, voice_type: int = 1001, codec: str = "mp3", ): self._secret_id = secret_id self._secret_key = secret_key self._voice_type = voice_type self._codec = codec self._client = None def _get_client(self): if self._client is not None: return self._client try: from tencentcloud.common import credential from tencentcloud.common.profile.client_profile import ClientProfile from tencentcloud.common.profile.http_profile import HttpProfile from tencentcloud.tts.v20190823 import tts_client cred = credential.Credential(self._secret_id, self._secret_key) http_profile = HttpProfile() http_profile.endpoint = "tts.tencentcloudapi.com" client_profile = ClientProfile() client_profile.httpProfile = http_profile self._client = tts_client.TtsClient(cred, "", client_profile) return self._client except Exception as e: logger.error("Tencent TTS client init failed: {}", e) return None def _synthesize_sync(self, text: str, voice_type: int) -> bytes: client = self._get_client() if not client: return b"" try: from tencentcloud.common.exception.tencent_cloud_sdk_exception import ( TencentCloudSDKException, ) from tencentcloud.tts.v20190823 import models req = models.TextToVoiceRequest() req.Text = text req.SessionId = uuid.uuid4().hex req.VoiceType = voice_type req.PrimaryLanguage = 1 req.SampleRate = 16000 req.Codec = self._codec resp = client.TextToVoice(req) if not resp or not resp.Audio: return b"" return base64.b64decode(resp.Audio) except TencentCloudSDKException as e: logger.error("Tencent TTS SDK error: {}", e) return b"" except Exception as e: logger.error("Tencent TTS synthesize failed: {}", e) return b"" async def synthesize(self, text: str, voice: str = "alloy") -> bytes: if not self._secret_id or not self._secret_key: logger.error("Tencent TTS credentials not configured") return b"" # Default "alloy" aligns with OpenAI TTS naming; Tencent uses VoiceType IDs from settings. v = voice.lower() if v == "alloy": voice_type = self._voice_type else: voice_type = VOICE_MAP.get(v, self._voice_type) chunks = _chunk_text(text) if not chunks: return b"" results: list[bytes] = [] for chunk in chunks: audio = await asyncio.to_thread(self._synthesize_sync, chunk, voice_type) if not audio: return b"" results.append(audio) return b"".join(results)