feat/tts (#15)

Co-authored-by: Kevin <kevin@brighteng.org>
This commit is contained in:
Sully
2026-03-19 09:11:25 +08:00
committed by GitHub
parent faf7607bf9
commit 92b7848c48
5 changed files with 192 additions and 1 deletions

View File

@@ -0,0 +1,144 @@
"""Tencent Cloud TTS adapter — implements TTSProvider port."""
import asyncio
import base64
import re
import uuid
from app.core.logging import get_logger
logger = get_logger(__name__)
# OpenAI voice name -> Tencent VoiceType ID
VOICE_MAP: dict[str, int] = {
"alloy": 1001,
"echo": 1002,
"fable": 1003,
"onyx": 1004,
"nova": 1005,
"shimmer": 1006,
}
# 中文 150 字 / 英文 500 字母,取保守值
MAX_CHARS_PER_REQUEST = 150
def _chunk_text(text: str, max_chars: int = MAX_CHARS_PER_REQUEST) -> list[str]:
"""Split text into chunks within API limit."""
text = text.strip()
if not text:
return []
if len(text) <= max_chars:
return [text]
chunks: list[str] = []
# Split by sentence boundaries first
pattern = r"[。!?.!?\n]+"
parts = re.split(f"({pattern})", text)
current = ""
for i, p in enumerate(parts):
if re.match(pattern, p):
current += p
if current.strip():
chunks.append(current.strip())
current = ""
else:
if len(current) + len(p) <= max_chars:
current += p
else:
if current.strip():
chunks.append(current.strip())
current = ""
# Single part exceeds limit, split by length
while p:
chunk = p[:max_chars]
p = p[max_chars:]
chunks.append(chunk)
if current.strip():
chunks.append(current.strip())
return chunks
class TencentTTSProvider:
def __init__(
self,
secret_id: str,
secret_key: str,
voice_type: int = 1001,
codec: str = "mp3",
):
self._secret_id = secret_id
self._secret_key = secret_key
self._voice_type = voice_type
self._codec = codec
self._client = None
def _get_client(self):
if self._client is not None:
return self._client
try:
from tencentcloud.common import credential
from tencentcloud.common.profile.client_profile import ClientProfile
from tencentcloud.common.profile.http_profile import HttpProfile
from tencentcloud.tts.v20190823 import tts_client
cred = credential.Credential(self._secret_id, self._secret_key)
http_profile = HttpProfile()
http_profile.endpoint = "tts.tencentcloudapi.com"
client_profile = ClientProfile()
client_profile.httpProfile = http_profile
self._client = tts_client.TtsClient(cred, "", client_profile)
return self._client
except Exception as e:
logger.error("Tencent TTS client init failed: %s", e)
return None
def _synthesize_sync(self, text: str, voice_type: int) -> bytes:
client = self._get_client()
if not client:
return b""
try:
from tencentcloud.common.exception.tencent_cloud_sdk_exception import (
TencentCloudSDKException,
)
from tencentcloud.tts.v20190823 import models
req = models.TextToVoiceRequest()
req.Text = text
req.SessionId = uuid.uuid4().hex
req.VoiceType = voice_type
req.PrimaryLanguage = 1
req.SampleRate = 16000
req.Codec = self._codec
resp = client.TextToVoice(req)
if not resp or not resp.Audio:
return b""
return base64.b64decode(resp.Audio)
except TencentCloudSDKException as e:
logger.error("Tencent TTS SDK error: %s", e)
return b""
except Exception as e:
logger.error("Tencent TTS synthesize failed: %s", e)
return b""
async def synthesize(self, text: str, voice: str = "alloy") -> bytes:
if not self._secret_id or not self._secret_key:
logger.error("Tencent TTS credentials not configured")
return b""
voice_type = VOICE_MAP.get(voice.lower(), self._voice_type)
chunks = _chunk_text(text)
if not chunks:
return b""
results: list[bytes] = []
for chunk in chunks:
audio = await asyncio.to_thread(
self._synthesize_sync, chunk, voice_type
)
if not audio:
return b""
results.append(audio)
return b"".join(results)