api/app/adapters/tts/tencent_tts.py

"""Tencent Cloud TTS adapter — implements TTSProvider port.

API: https://cloud.tencent.com/document/product/1073/37995
"""

import asyncio
import base64
import uuid

from app.core.logging import get_logger

logger = get_logger(__name__)


class TencentTTSProvider:
    def __init__(
        self,
        secret_id: str,
        secret_key: str,
        voice_type: int = 603004,
        codec: str = "mp3",
        sample_rate: int = 16000,
    ):
        self._secret_id = secret_id
        self._secret_key = secret_key
        self._voice_type = voice_type
        self._codec = codec
        self._sample_rate = sample_rate
        self._client = None

    def _get_client(self):
        if self._client is not None:
            return self._client
        try:
            from tencentcloud.common import credential
            from tencentcloud.common.profile.client_profile import ClientProfile
            from tencentcloud.common.profile.http_profile import HttpProfile
            from tencentcloud.tts.v20190823 import tts_client

            cred = credential.Credential(self._secret_id, self._secret_key)
            http_profile = HttpProfile()
            http_profile.endpoint = "tts.tencentcloudapi.com"
            client_profile = ClientProfile()
            client_profile.httpProfile = http_profile
            self._client = tts_client.TtsClient(cred, "", client_profile)
            return self._client
        except Exception as e:
            logger.error("Tencent TTS client init failed: %s", e)
            return None

    def _synthesize_sync(self, text: str) -> bytes:
        """Sync synthesis (run in executor)."""
        client = self._get_client()
        if not client:
            return b""
        from tencentcloud.tts.v20190823 import models

        req = models.TextToVoiceRequest()
        req.Text = text[:500]  # 中文约150字，英文约500字母，保守截断
        req.SessionId = f"tts-{uuid.uuid4().hex}"
        req.VoiceType = self._voice_type
        req.Codec = self._codec
        req.SampleRate = self._sample_rate
        req.PrimaryLanguage = 1  # 1=中文

        resp = client.TextToVoice(req)
        if resp.Audio:
            return base64.b64decode(resp.Audio)
        return b""

    async def synthesize(self, text: str, voice: str = "alloy") -> bytes:
        """Convert text to speech. Returns mp3 bytes."""
        if not text or not self._secret_id or not self._secret_key:
            return b""
        try:
            return await asyncio.to_thread(self._synthesize_sync, text)
        except Exception as e:
            logger.error("Tencent TTS synthesize failed: %s", e)
            return b""