80 lines
2.6 KiB
Python
80 lines
2.6 KiB
Python
|
|
"""Tencent Cloud TTS adapter — implements TTSProvider port.
|
|||
|
|
|
|||
|
|
API: https://cloud.tencent.com/document/product/1073/37995
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import asyncio
|
|||
|
|
import base64
|
|||
|
|
import uuid
|
|||
|
|
|
|||
|
|
from app.core.logging import get_logger
|
|||
|
|
|
|||
|
|
logger = get_logger(__name__)
|
|||
|
|
|
|||
|
|
|
|||
|
|
class TencentTTSProvider:
|
|||
|
|
def __init__(
|
|||
|
|
self,
|
|||
|
|
secret_id: str,
|
|||
|
|
secret_key: str,
|
|||
|
|
voice_type: int = 603004,
|
|||
|
|
codec: str = "mp3",
|
|||
|
|
sample_rate: int = 16000,
|
|||
|
|
):
|
|||
|
|
self._secret_id = secret_id
|
|||
|
|
self._secret_key = secret_key
|
|||
|
|
self._voice_type = voice_type
|
|||
|
|
self._codec = codec
|
|||
|
|
self._sample_rate = sample_rate
|
|||
|
|
self._client = None
|
|||
|
|
|
|||
|
|
def _get_client(self):
|
|||
|
|
if self._client is not None:
|
|||
|
|
return self._client
|
|||
|
|
try:
|
|||
|
|
from tencentcloud.common import credential
|
|||
|
|
from tencentcloud.common.profile.client_profile import ClientProfile
|
|||
|
|
from tencentcloud.common.profile.http_profile import HttpProfile
|
|||
|
|
from tencentcloud.tts.v20190823 import tts_client
|
|||
|
|
|
|||
|
|
cred = credential.Credential(self._secret_id, self._secret_key)
|
|||
|
|
http_profile = HttpProfile()
|
|||
|
|
http_profile.endpoint = "tts.tencentcloudapi.com"
|
|||
|
|
client_profile = ClientProfile()
|
|||
|
|
client_profile.httpProfile = http_profile
|
|||
|
|
self._client = tts_client.TtsClient(cred, "", client_profile)
|
|||
|
|
return self._client
|
|||
|
|
except Exception as e:
|
|||
|
|
logger.error("Tencent TTS client init failed: %s", e)
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
def _synthesize_sync(self, text: str) -> bytes:
|
|||
|
|
"""Sync synthesis (run in executor)."""
|
|||
|
|
client = self._get_client()
|
|||
|
|
if not client:
|
|||
|
|
return b""
|
|||
|
|
from tencentcloud.tts.v20190823 import models
|
|||
|
|
|
|||
|
|
req = models.TextToVoiceRequest()
|
|||
|
|
req.Text = text[:500] # 中文约150字,英文约500字母,保守截断
|
|||
|
|
req.SessionId = f"tts-{uuid.uuid4().hex}"
|
|||
|
|
req.VoiceType = self._voice_type
|
|||
|
|
req.Codec = self._codec
|
|||
|
|
req.SampleRate = self._sample_rate
|
|||
|
|
req.PrimaryLanguage = 1 # 1=中文
|
|||
|
|
|
|||
|
|
resp = client.TextToVoice(req)
|
|||
|
|
if resp.Audio:
|
|||
|
|
return base64.b64decode(resp.Audio)
|
|||
|
|
return b""
|
|||
|
|
|
|||
|
|
async def synthesize(self, text: str, voice: str = "alloy") -> bytes:
|
|||
|
|
"""Convert text to speech. Returns mp3 bytes."""
|
|||
|
|
if not text or not self._secret_id or not self._secret_key:
|
|||
|
|
return b""
|
|||
|
|
try:
|
|||
|
|
return await asyncio.to_thread(self._synthesize_sync, text)
|
|||
|
|
except Exception as e:
|
|||
|
|
logger.error("Tencent TTS synthesize failed: %s", e)
|
|||
|
|
return b""
|