Files
life-echo/api/app/adapters/tts/tencent_tts.py
Sully 53e0065e3e refactor(api): TOML 配置 SSOT、统一错误契约、Auth/事务加固与可观测性 (#33)
配置 SSOT(TOML + .env)
统一错误契约
Auth 与事务边界
Redis / Celery 可靠性:业务 Redis(DB/0)与 Celery broker/backend(DB/1)显式拆分;连接池、sync client
可观测性(OpenTelemetry + LGTM)
2026-05-22 13:44:50 +08:00

254 lines
9.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Tencent Cloud TTS adapter — implements TTSProvider port."""
import asyncio
import base64
import re
import uuid
from app.core.business_telemetry import business_span
from app.core.logging import get_logger
from app.core.runtime_constants import tts_defaults
logger = get_logger(__name__)
# OpenAI voice name -> Tencent VoiceType ID
VOICE_MAP: dict[str, int] = {
"alloy": 1001,
"echo": 1002,
"fable": 1003,
"onyx": 1004,
"nova": 1005,
"shimmer": 1006,
}
# Tencent TTS API limit: ≤150 Chinese chars or ≤500 letters (英文按字母放宽到 ~480 留余量)
MAX_CHARS_PER_REQUEST_ZH = 150
MAX_CHARS_PER_REQUEST_EN = 480
# Tencent PrimaryLanguage: 1=中文含中英混读2=英文
PRIMARY_LANGUAGE_ZH = 1
PRIMARY_LANGUAGE_EN = 2
# Tencent ModelType: 1=新模型(覆盖大模型音色 501xxx 系列与新版精品音色)。
# 大模型音色(如 501004 月华)必须显式传 ModelType=1否则可能被旧模型拒绝并返回空音频
# 老精品音色(如 1001/101050 等)也接受 ModelType=1因此无条件设置不会破坏老链路。
# 文档https://cloud.tencent.com/document/api/1073/37995
MODEL_TYPE_LLM = 1
def _chunk_text(text: str, max_chars: int = MAX_CHARS_PER_REQUEST_ZH) -> list[str]:
"""Split text into chunks within API limit."""
text = text.strip()
if not text:
return []
if len(text) <= max_chars:
return [text]
chunks: list[str] = []
# Split by sentence boundaries first
pattern = r"[。!?.!?\n]+"
parts = re.split(f"({pattern})", text)
current = ""
for i, p in enumerate(parts):
if re.match(pattern, p):
current += p
if current.strip():
chunks.append(current.strip())
current = ""
else:
if len(current) + len(p) <= max_chars:
current += p
else:
if current.strip():
chunks.append(current.strip())
current = ""
# Single part exceeds limit, split by length
while p:
chunk = p[:max_chars]
p = p[max_chars:]
chunks.append(chunk)
if current.strip():
chunks.append(current.strip())
return chunks
class TencentTTSProvider:
def __init__(
self,
secret_id: str,
secret_key: str,
voice_type: int = 1001,
codec: str = "mp3",
voice_type_en: int | None = None,
):
self._secret_id = secret_id
self._secret_key = secret_key
self._voice_type = voice_type
# 英文音色未单独配置时回落到 501004月华腾讯云大模型音色
# 大模型音色 501xxx 须配合 ModelType=1见 Tencent TextToVoice 文档)。
self._voice_type_en = voice_type_en if voice_type_en is not None else 501004
self._codec = codec
self._client = None
def _get_client(self):
if self._client is not None:
return self._client
try:
from tencentcloud.common import credential
from tencentcloud.common.profile.client_profile import ClientProfile
from tencentcloud.common.profile.http_profile import HttpProfile
from tencentcloud.tts.v20190823 import tts_client
cred = credential.Credential(self._secret_id, self._secret_key)
http_profile = HttpProfile()
http_profile.endpoint = "tts.tencentcloudapi.com"
client_profile = ClientProfile()
client_profile.httpProfile = http_profile
self._client = tts_client.TtsClient(cred, "", client_profile)
return self._client
except Exception as e:
logger.error("Tencent TTS client init failed: {}", e)
return None
def _synthesize_sync(
self,
text: str,
voice_type: int,
primary_language: int = PRIMARY_LANGUAGE_ZH,
) -> bytes:
client = self._get_client()
if not client:
logger.warning(
"tencent_tts._synthesize_sync no client provider=tencent voice_type={}",
voice_type,
)
return b""
try:
from tencentcloud.common.exception.tencent_cloud_sdk_exception import (
TencentCloudSDKException,
)
from tencentcloud.tts.v20190823 import models
req = models.TextToVoiceRequest()
req.Text = text
req.SessionId = uuid.uuid4().hex
req.VoiceType = voice_type
req.PrimaryLanguage = primary_language
req.SampleRate = 16000
req.Codec = self._codec
# 显式声明使用新模型大模型音色501xxx若不带该字段会被旧模型拒绝并静默返回空音频。
req.ModelType = MODEL_TYPE_LLM
resp = client.TextToVoice(req)
request_id = getattr(resp, "RequestId", None) if resp is not None else None
audio_b64 = getattr(resp, "Audio", "") if resp is not None else ""
if not audio_b64:
logger.warning(
"tencent_tts._synthesize_sync empty audio voice_type={} "
"primary_language={} model_type={} request_id={}",
voice_type,
primary_language,
MODEL_TYPE_LLM,
request_id,
)
return b""
audio_bytes = base64.b64decode(audio_b64)
return audio_bytes
except TencentCloudSDKException as e:
logger.error(
"Tencent TTS SDK error provider=tencent voice_type={} primary_language={} "
"model_type={} code={} message={} request_id={} raw={}",
voice_type,
primary_language,
MODEL_TYPE_LLM,
getattr(e, "code", None),
getattr(e, "message", None),
getattr(e, "requestId", None),
e,
)
return b""
except Exception as e:
logger.error(
"Tencent TTS synthesize failed provider=tencent voice_type={} primary_language={}: {}",
voice_type,
primary_language,
e,
)
return b""
async def synthesize(
self,
text: str,
voice: str = "alloy",
*,
language: str = "zh",
) -> bytes:
with business_span("tts.synthesize", provider="tencent"):
return await self._synthesize_inner(text, voice, language=language)
async def _synthesize_inner(
self,
text: str,
voice: str = "alloy",
*,
language: str = "zh",
) -> bytes:
if not self._secret_id or not self._secret_key:
logger.error(
"Tencent TTS credentials not configured provider=tencent secret_id_set={} secret_key_set={}",
bool(self._secret_id),
bool(self._secret_key),
)
return b""
# ``language`` 由 pipeline 从用户 ``language_preference`` 解析(仅 'en' / 其它→中文路径),
# 与助手正文实际语种无关:产品规则是 TTS 主语言跟用户语言一致。
is_en = (language or "zh").strip().lower() == "en"
primary_language = PRIMARY_LANGUAGE_EN if is_en else PRIMARY_LANGUAGE_ZH
default_voice = self._voice_type_en if is_en else self._voice_type
max_chars = MAX_CHARS_PER_REQUEST_EN if is_en else MAX_CHARS_PER_REQUEST_ZH
# Default "alloy" aligns with OpenAI TTS naming. Caller 链路里目前不会传具体音色,
# 因此实际只走 default_voice 分支,对应 tts_defaults.voice_type / tts_voice_type_en。
v = voice.lower()
if v == "alloy":
voice_type = default_voice
else:
voice_type = VOICE_MAP.get(v, default_voice)
chunks = _chunk_text(text, max_chars=max_chars)
if not chunks:
return b""
results: list[bytes] = []
for idx, chunk in enumerate(chunks):
audio = await asyncio.to_thread(
self._synthesize_sync, chunk, voice_type, primary_language
)
if not audio:
logger.warning(
"tencent_tts.synthesize chunk failed chunk_index={} chunk_chars={} "
"voice_type={} primary_language={}",
idx,
len(chunk),
voice_type,
primary_language,
)
return b""
logger.debug(
"tencent_tts.synthesize chunk ok chunk_index={} chunk_chars={} audio_bytes_len={}",
idx,
len(chunk),
len(audio),
)
results.append(audio)
merged = b"".join(results)
logger.debug(
"tencent_tts.synthesize done language={} voice_type={} chunks={} total_bytes={}",
language,
voice_type,
len(chunks),
len(merged),
)
return merged