api/app/adapters/tts/tencent_tts.py

"""Tencent Cloud TTS adapter — implements TTSProvider port."""

import asyncio
import base64
import re
import uuid

from app.core.logging import get_logger

logger = get_logger(__name__)

# OpenAI voice name -> Tencent VoiceType ID
VOICE_MAP: dict[str, int] = {
    "alloy": 1001,
    "echo": 1002,
    "fable": 1003,
    "onyx": 1004,
    "nova": 1005,
    "shimmer": 1006,
}

# 中文 150 字 / 英文 500 字母，取保守值
MAX_CHARS_PER_REQUEST = 150


def _chunk_text(text: str, max_chars: int = MAX_CHARS_PER_REQUEST) -> list[str]:
    """Split text into chunks within API limit."""
    text = text.strip()
    if not text:
        return []
    if len(text) <= max_chars:
        return [text]

    chunks: list[str] = []
    # Split by sentence boundaries first
    pattern = r"[。！？.!?\n]+"
    parts = re.split(f"({pattern})", text)
    current = ""
    for i, p in enumerate(parts):
        if re.match(pattern, p):
            current += p
            if current.strip():
                chunks.append(current.strip())
                current = ""
        else:
            if len(current) + len(p) <= max_chars:
                current += p
            else:
                if current.strip():
                    chunks.append(current.strip())
                    current = ""
                # Single part exceeds limit, split by length
                while p:
                    chunk = p[:max_chars]
                    p = p[max_chars:]
                    chunks.append(chunk)
    if current.strip():
        chunks.append(current.strip())
    return chunks


class TencentTTSProvider:
    def __init__(
        self,
        secret_id: str,
        secret_key: str,
        voice_type: int = 1001,
        codec: str = "mp3",
    ):
        self._secret_id = secret_id
        self._secret_key = secret_key
        self._voice_type = voice_type
        self._codec = codec
        self._client = None

    def _get_client(self):
        if self._client is not None:
            return self._client
        try:
            from tencentcloud.common import credential
            from tencentcloud.common.profile.client_profile import ClientProfile
            from tencentcloud.common.profile.http_profile import HttpProfile
            from tencentcloud.tts.v20190823 import tts_client

            cred = credential.Credential(self._secret_id, self._secret_key)
            http_profile = HttpProfile()
            http_profile.endpoint = "tts.tencentcloudapi.com"
            client_profile = ClientProfile()
            client_profile.httpProfile = http_profile
            self._client = tts_client.TtsClient(cred, "", client_profile)
            return self._client
        except Exception as e:
            logger.error("Tencent TTS client init failed: %s", e)
            return None

    def _synthesize_sync(self, text: str, voice_type: int) -> bytes:
        client = self._get_client()
        if not client:
            return b""
        try:
            from tencentcloud.common.exception.tencent_cloud_sdk_exception import (
                TencentCloudSDKException,
            )
            from tencentcloud.tts.v20190823 import models

            req = models.TextToVoiceRequest()
            req.Text = text
            req.SessionId = uuid.uuid4().hex
            req.VoiceType = voice_type
            req.PrimaryLanguage = 1
            req.SampleRate = 16000
            req.Codec = self._codec

            resp = client.TextToVoice(req)
            if not resp or not resp.Audio:
                return b""
            return base64.b64decode(resp.Audio)
        except TencentCloudSDKException as e:
            logger.error("Tencent TTS SDK error: %s", e)
            return b""
        except Exception as e:
            logger.error("Tencent TTS synthesize failed: %s", e)
            return b""

    async def synthesize(self, text: str, voice: str = "alloy") -> bytes:
        if not self._secret_id or not self._secret_key:
            logger.error("Tencent TTS credentials not configured")
            return b""

        # Default "alloy" aligns with OpenAI TTS naming; Tencent uses VoiceType IDs from settings.
        v = voice.lower()
        if v == "alloy":
            voice_type = self._voice_type
        else:
            voice_type = VOICE_MAP.get(v, self._voice_type)
        chunks = _chunk_text(text)
        if not chunks:
            return b""

        results: list[bytes] = []
        for chunk in chunks:
            audio = await asyncio.to_thread(self._synthesize_sync, chunk, voice_type)
            if not audio:
                return b""
            results.append(audio)

        return b"".join(results)
feat/tts (#15) Co-authored-by: Kevin <kevin@brighteng.org> 2026-03-19 09:11:25 +08:00			`"""Tencent Cloud TTS adapter — implements TTSProvider port."""`

			`import asyncio`
			`import base64`
			`import re`
			`import uuid`

			`from app.core.logging import get_logger`

			`logger = get_logger(__name__)`

			`# OpenAI voice name -> Tencent VoiceType ID`
			`VOICE_MAP: dict[str, int] = {`
			`"alloy": 1001,`
			`"echo": 1002,`
			`"fable": 1003,`
			`"onyx": 1004,`
			`"nova": 1005,`
			`"shimmer": 1006,`
			`}`

			`# 中文 150 字 / 英文 500 字母，取保守值`
			`MAX_CHARS_PER_REQUEST = 150`


			`def _chunk_text(text: str, max_chars: int = MAX_CHARS_PER_REQUEST) -> list[str]:`
			`"""Split text into chunks within API limit."""`
			`text = text.strip()`
			`if not text:`
			`return []`
			`if len(text) <= max_chars:`
			`return [text]`

			`chunks: list[str] = []`
			`# Split by sentence boundaries first`
			`pattern = r"[。！？.!?\n]+"`
			`parts = re.split(f"({pattern})", text)`
			`current = ""`
			`for i, p in enumerate(parts):`
			`if re.match(pattern, p):`
			`current += p`
			`if current.strip():`
			`chunks.append(current.strip())`
			`current = ""`
			`else:`
			`if len(current) + len(p) <= max_chars:`
			`current += p`
			`else:`
			`if current.strip():`
			`chunks.append(current.strip())`
			`current = ""`
			`# Single part exceeds limit, split by length`
			`while p:`
			`chunk = p[:max_chars]`
			`p = p[max_chars:]`
			`chunks.append(chunk)`
			`if current.strip():`
			`chunks.append(current.strip())`
			`return chunks`


			`class TencentTTSProvider:`
			`def __init__(`
			`self,`
			`secret_id: str,`
			`secret_key: str,`
			`voice_type: int = 1001,`
			`codec: str = "mp3",`
			`):`
			`self._secret_id = secret_id`
			`self._secret_key = secret_key`
			`self._voice_type = voice_type`
			`self._codec = codec`
			`self._client = None`

			`def _get_client(self):`
			`if self._client is not None:`
			`return self._client`
			`try:`
			`from tencentcloud.common import credential`
			`from tencentcloud.common.profile.client_profile import ClientProfile`
			`from tencentcloud.common.profile.http_profile import HttpProfile`
			`from tencentcloud.tts.v20190823 import tts_client`

			`cred = credential.Credential(self._secret_id, self._secret_key)`
			`http_profile = HttpProfile()`
			`http_profile.endpoint = "tts.tencentcloudapi.com"`
			`client_profile = ClientProfile()`
			`client_profile.httpProfile = http_profile`
			`self._client = tts_client.TtsClient(cred, "", client_profile)`
			`return self._client`
			`except Exception as e:`
			`logger.error("Tencent TTS client init failed: %s", e)`
			`return None`

			`def _synthesize_sync(self, text: str, voice_type: int) -> bytes:`
			`client = self._get_client()`
			`if not client:`
			`return b""`
			`try:`
			`from tencentcloud.common.exception.tencent_cloud_sdk_exception import (`
			`TencentCloudSDKException,`
			`)`
			`from tencentcloud.tts.v20190823 import models`

			`req = models.TextToVoiceRequest()`
			`req.Text = text`
			`req.SessionId = uuid.uuid4().hex`
			`req.VoiceType = voice_type`
			`req.PrimaryLanguage = 1`
			`req.SampleRate = 16000`
			`req.Codec = self._codec`

			`resp = client.TextToVoice(req)`
			`if not resp or not resp.Audio:`
			`return b""`
			`return base64.b64decode(resp.Audio)`
			`except TencentCloudSDKException as e:`
			`logger.error("Tencent TTS SDK error: %s", e)`
			`return b""`
			`except Exception as e:`
			`logger.error("Tencent TTS synthesize failed: %s", e)`
			`return b""`

			`async def synthesize(self, text: str, voice: str = "alloy") -> bytes:`
			`if not self._secret_id or not self._secret_key:`
			`logger.error("Tencent TTS credentials not configured")`
			`return b""`

fix/various fixes 2026-03-20 15:15:35 +08:00			`# Default "alloy" aligns with OpenAI TTS naming; Tencent uses VoiceType IDs from settings.`
			`v = voice.lower()`
			`if v == "alloy":`
			`voice_type = self._voice_type`
			`else:`
			`voice_type = VOICE_MAP.get(v, self._voice_type)`
feat/tts (#15) Co-authored-by: Kevin <kevin@brighteng.org> 2026-03-19 09:11:25 +08:00			`chunks = _chunk_text(text)`
			`if not chunks:`
			`return b""`

			`results: list[bytes] = []`
			`for chunk in chunks:`
chore/ 删除无用文件 2026-03-19 14:36:14 +08:00			`audio = await asyncio.to_thread(self._synthesize_sync, chunk, voice_type)`
feat/tts (#15) Co-authored-by: Kevin <kevin@brighteng.org> 2026-03-19 09:11:25 +08:00			`if not audio:`
			`return b""`
			`results.append(audio)`

			`return b"".join(results)`