2026-03-19 09:11:25 +08:00
|
|
|
"""Tencent Cloud TTS adapter — implements TTSProvider port."""
|
|
|
|
|
|
|
|
|
|
import asyncio
|
|
|
|
|
import base64
|
|
|
|
|
import re
|
|
|
|
|
import uuid
|
|
|
|
|
|
|
|
|
|
from app.core.logging import get_logger
|
|
|
|
|
|
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
|
|
|
|
|
|
# OpenAI voice name -> Tencent VoiceType ID
|
|
|
|
|
VOICE_MAP: dict[str, int] = {
|
|
|
|
|
"alloy": 1001,
|
|
|
|
|
"echo": 1002,
|
|
|
|
|
"fable": 1003,
|
|
|
|
|
"onyx": 1004,
|
|
|
|
|
"nova": 1005,
|
|
|
|
|
"shimmer": 1006,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# 中文 150 字 / 英文 500 字母,取保守值
|
|
|
|
|
MAX_CHARS_PER_REQUEST = 150
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _chunk_text(text: str, max_chars: int = MAX_CHARS_PER_REQUEST) -> list[str]:
|
|
|
|
|
"""Split text into chunks within API limit."""
|
|
|
|
|
text = text.strip()
|
|
|
|
|
if not text:
|
|
|
|
|
return []
|
|
|
|
|
if len(text) <= max_chars:
|
|
|
|
|
return [text]
|
|
|
|
|
|
|
|
|
|
chunks: list[str] = []
|
|
|
|
|
# Split by sentence boundaries first
|
|
|
|
|
pattern = r"[。!?.!?\n]+"
|
|
|
|
|
parts = re.split(f"({pattern})", text)
|
|
|
|
|
current = ""
|
|
|
|
|
for i, p in enumerate(parts):
|
|
|
|
|
if re.match(pattern, p):
|
|
|
|
|
current += p
|
|
|
|
|
if current.strip():
|
|
|
|
|
chunks.append(current.strip())
|
|
|
|
|
current = ""
|
|
|
|
|
else:
|
|
|
|
|
if len(current) + len(p) <= max_chars:
|
|
|
|
|
current += p
|
|
|
|
|
else:
|
|
|
|
|
if current.strip():
|
|
|
|
|
chunks.append(current.strip())
|
|
|
|
|
current = ""
|
|
|
|
|
# Single part exceeds limit, split by length
|
|
|
|
|
while p:
|
|
|
|
|
chunk = p[:max_chars]
|
|
|
|
|
p = p[max_chars:]
|
|
|
|
|
chunks.append(chunk)
|
|
|
|
|
if current.strip():
|
|
|
|
|
chunks.append(current.strip())
|
|
|
|
|
return chunks
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TencentTTSProvider:
|
|
|
|
|
def __init__(
|
|
|
|
|
self,
|
|
|
|
|
secret_id: str,
|
|
|
|
|
secret_key: str,
|
|
|
|
|
voice_type: int = 1001,
|
|
|
|
|
codec: str = "mp3",
|
|
|
|
|
):
|
|
|
|
|
self._secret_id = secret_id
|
|
|
|
|
self._secret_key = secret_key
|
|
|
|
|
self._voice_type = voice_type
|
|
|
|
|
self._codec = codec
|
|
|
|
|
self._client = None
|
|
|
|
|
|
|
|
|
|
def _get_client(self):
|
|
|
|
|
if self._client is not None:
|
|
|
|
|
return self._client
|
|
|
|
|
try:
|
|
|
|
|
from tencentcloud.common import credential
|
|
|
|
|
from tencentcloud.common.profile.client_profile import ClientProfile
|
|
|
|
|
from tencentcloud.common.profile.http_profile import HttpProfile
|
|
|
|
|
from tencentcloud.tts.v20190823 import tts_client
|
|
|
|
|
|
|
|
|
|
cred = credential.Credential(self._secret_id, self._secret_key)
|
|
|
|
|
http_profile = HttpProfile()
|
|
|
|
|
http_profile.endpoint = "tts.tencentcloudapi.com"
|
|
|
|
|
client_profile = ClientProfile()
|
|
|
|
|
client_profile.httpProfile = http_profile
|
|
|
|
|
self._client = tts_client.TtsClient(cred, "", client_profile)
|
|
|
|
|
return self._client
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error("Tencent TTS client init failed: %s", e)
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
def _synthesize_sync(self, text: str, voice_type: int) -> bytes:
|
|
|
|
|
client = self._get_client()
|
|
|
|
|
if not client:
|
|
|
|
|
return b""
|
|
|
|
|
try:
|
|
|
|
|
from tencentcloud.common.exception.tencent_cloud_sdk_exception import (
|
|
|
|
|
TencentCloudSDKException,
|
|
|
|
|
)
|
|
|
|
|
from tencentcloud.tts.v20190823 import models
|
|
|
|
|
|
|
|
|
|
req = models.TextToVoiceRequest()
|
|
|
|
|
req.Text = text
|
|
|
|
|
req.SessionId = uuid.uuid4().hex
|
|
|
|
|
req.VoiceType = voice_type
|
|
|
|
|
req.PrimaryLanguage = 1
|
|
|
|
|
req.SampleRate = 16000
|
|
|
|
|
req.Codec = self._codec
|
|
|
|
|
|
|
|
|
|
resp = client.TextToVoice(req)
|
|
|
|
|
if not resp or not resp.Audio:
|
|
|
|
|
return b""
|
|
|
|
|
return base64.b64decode(resp.Audio)
|
|
|
|
|
except TencentCloudSDKException as e:
|
|
|
|
|
logger.error("Tencent TTS SDK error: %s", e)
|
|
|
|
|
return b""
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error("Tencent TTS synthesize failed: %s", e)
|
|
|
|
|
return b""
|
|
|
|
|
|
|
|
|
|
async def synthesize(self, text: str, voice: str = "alloy") -> bytes:
|
|
|
|
|
if not self._secret_id or not self._secret_key:
|
|
|
|
|
logger.error("Tencent TTS credentials not configured")
|
|
|
|
|
return b""
|
|
|
|
|
|
2026-03-20 15:15:35 +08:00
|
|
|
# Default "alloy" aligns with OpenAI TTS naming; Tencent uses VoiceType IDs from settings.
|
|
|
|
|
v = voice.lower()
|
|
|
|
|
if v == "alloy":
|
|
|
|
|
voice_type = self._voice_type
|
|
|
|
|
else:
|
|
|
|
|
voice_type = VOICE_MAP.get(v, self._voice_type)
|
2026-03-19 09:11:25 +08:00
|
|
|
chunks = _chunk_text(text)
|
|
|
|
|
if not chunks:
|
|
|
|
|
return b""
|
|
|
|
|
|
|
|
|
|
results: list[bytes] = []
|
|
|
|
|
for chunk in chunks:
|
2026-03-19 14:36:14 +08:00
|
|
|
audio = await asyncio.to_thread(self._synthesize_sync, chunk, voice_type)
|
2026-03-19 09:11:25 +08:00
|
|
|
if not audio:
|
|
|
|
|
return b""
|
|
|
|
|
results.append(audio)
|
|
|
|
|
|
|
|
|
|
return b"".join(results)
|