* add staging ios app build script * feat(api): add OpenTelemetry LGTM stack for local observability Wire OTel traces, metrics, and logs through a collector to Tempo, Prometheus, and Loki, with custom LLM instrumentation, dev compose overlay, Grafana provisioning, env templates, and development.sh auto-start. Co-authored-by: Cursor <cursoragent@cursor.com> * feat: expand observability, harden dev tooling, and fix expo staging UX Add business and LLM Prometheus metrics with Grafana dashboards, alerting, and a metrics verification script. Wire telemetry through adapters and core LLM paths, and document the local LGTM workflow. Fix development.sh for macOS bash 3.2, open Grafana and eval-web in Chrome, and repair eval-web auto-open (unbound EVAL_WEB_BROWSER_SCHEDULED). Merge internal-eval into the main dev script with improved compose handling. Require EXPO_PUBLIC_* at build time, improve iOS HTTP ATS for staging IPs, show memoir empty state instead of load errors when no chapters exist, and add jest env setup plus chapter list response normalization. Co-authored-by: Cursor <cursoragent@cursor.com> * chore: enable Grafana Assistant Cursor plugin Co-authored-by: Cursor <cursoragent@cursor.com> * fix: memoir empty state and repair withdrawn 0020_chapters_book_id stamp Show empty memoir UI when the chapter list succeeds with no items; treat auth/404 as non-fatal. Extend alembic revision repair so local dev DBs stamped with the removed 0020_chapters_book_id migration can roll back and upgrade to 0019. Co-authored-by: Cursor <cursoragent@cursor.com> --------- Co-authored-by: Kevin <kevin@brighteng.org> Co-authored-by: Cursor <cursoragent@cursor.com>
253 lines
9.0 KiB
Python
253 lines
9.0 KiB
Python
"""Tencent Cloud TTS adapter — implements TTSProvider port."""
|
||
|
||
import asyncio
|
||
import base64
|
||
import re
|
||
import uuid
|
||
|
||
from app.core.business_telemetry import business_span
|
||
from app.core.logging import get_logger
|
||
|
||
logger = get_logger(__name__)
|
||
|
||
# OpenAI voice name -> Tencent VoiceType ID
|
||
VOICE_MAP: dict[str, int] = {
|
||
"alloy": 1001,
|
||
"echo": 1002,
|
||
"fable": 1003,
|
||
"onyx": 1004,
|
||
"nova": 1005,
|
||
"shimmer": 1006,
|
||
}
|
||
|
||
# Tencent TTS API limit: ≤150 Chinese chars or ≤500 letters (英文按字母放宽到 ~480 留余量)
|
||
MAX_CHARS_PER_REQUEST_ZH = 150
|
||
MAX_CHARS_PER_REQUEST_EN = 480
|
||
|
||
# Tencent PrimaryLanguage: 1=中文(含中英混读),2=英文
|
||
PRIMARY_LANGUAGE_ZH = 1
|
||
PRIMARY_LANGUAGE_EN = 2
|
||
|
||
# Tencent ModelType: 1=新模型(覆盖大模型音色 501xxx 系列与新版精品音色)。
|
||
# 大模型音色(如 501004 月华)必须显式传 ModelType=1,否则可能被旧模型拒绝并返回空音频;
|
||
# 老精品音色(如 1001/101050 等)也接受 ModelType=1,因此无条件设置不会破坏老链路。
|
||
# 文档:https://cloud.tencent.com/document/api/1073/37995
|
||
MODEL_TYPE_LLM = 1
|
||
|
||
|
||
def _chunk_text(text: str, max_chars: int = MAX_CHARS_PER_REQUEST_ZH) -> list[str]:
|
||
"""Split text into chunks within API limit."""
|
||
text = text.strip()
|
||
if not text:
|
||
return []
|
||
if len(text) <= max_chars:
|
||
return [text]
|
||
|
||
chunks: list[str] = []
|
||
# Split by sentence boundaries first
|
||
pattern = r"[。!?.!?\n]+"
|
||
parts = re.split(f"({pattern})", text)
|
||
current = ""
|
||
for i, p in enumerate(parts):
|
||
if re.match(pattern, p):
|
||
current += p
|
||
if current.strip():
|
||
chunks.append(current.strip())
|
||
current = ""
|
||
else:
|
||
if len(current) + len(p) <= max_chars:
|
||
current += p
|
||
else:
|
||
if current.strip():
|
||
chunks.append(current.strip())
|
||
current = ""
|
||
# Single part exceeds limit, split by length
|
||
while p:
|
||
chunk = p[:max_chars]
|
||
p = p[max_chars:]
|
||
chunks.append(chunk)
|
||
if current.strip():
|
||
chunks.append(current.strip())
|
||
return chunks
|
||
|
||
|
||
class TencentTTSProvider:
|
||
def __init__(
|
||
self,
|
||
secret_id: str,
|
||
secret_key: str,
|
||
voice_type: int = 1001,
|
||
codec: str = "mp3",
|
||
voice_type_en: int | None = None,
|
||
):
|
||
self._secret_id = secret_id
|
||
self._secret_key = secret_key
|
||
self._voice_type = voice_type
|
||
# 英文音色未单独配置时回落到 501004(月华,腾讯云大模型音色)。
|
||
# 大模型音色 501xxx 须配合 ModelType=1(见 Tencent TextToVoice 文档)。
|
||
self._voice_type_en = voice_type_en if voice_type_en is not None else 501004
|
||
self._codec = codec
|
||
self._client = None
|
||
|
||
def _get_client(self):
|
||
if self._client is not None:
|
||
return self._client
|
||
try:
|
||
from tencentcloud.common import credential
|
||
from tencentcloud.common.profile.client_profile import ClientProfile
|
||
from tencentcloud.common.profile.http_profile import HttpProfile
|
||
from tencentcloud.tts.v20190823 import tts_client
|
||
|
||
cred = credential.Credential(self._secret_id, self._secret_key)
|
||
http_profile = HttpProfile()
|
||
http_profile.endpoint = "tts.tencentcloudapi.com"
|
||
client_profile = ClientProfile()
|
||
client_profile.httpProfile = http_profile
|
||
self._client = tts_client.TtsClient(cred, "", client_profile)
|
||
return self._client
|
||
except Exception as e:
|
||
logger.error("Tencent TTS client init failed: {}", e)
|
||
return None
|
||
|
||
def _synthesize_sync(
|
||
self,
|
||
text: str,
|
||
voice_type: int,
|
||
primary_language: int = PRIMARY_LANGUAGE_ZH,
|
||
) -> bytes:
|
||
client = self._get_client()
|
||
if not client:
|
||
logger.warning(
|
||
"tencent_tts._synthesize_sync no client provider=tencent voice_type={}",
|
||
voice_type,
|
||
)
|
||
return b""
|
||
try:
|
||
from tencentcloud.common.exception.tencent_cloud_sdk_exception import (
|
||
TencentCloudSDKException,
|
||
)
|
||
from tencentcloud.tts.v20190823 import models
|
||
|
||
req = models.TextToVoiceRequest()
|
||
req.Text = text
|
||
req.SessionId = uuid.uuid4().hex
|
||
req.VoiceType = voice_type
|
||
req.PrimaryLanguage = primary_language
|
||
req.SampleRate = 16000
|
||
req.Codec = self._codec
|
||
# 显式声明使用新模型;大模型音色(501xxx)若不带该字段会被旧模型拒绝并静默返回空音频。
|
||
req.ModelType = MODEL_TYPE_LLM
|
||
|
||
resp = client.TextToVoice(req)
|
||
request_id = getattr(resp, "RequestId", None) if resp is not None else None
|
||
audio_b64 = getattr(resp, "Audio", "") if resp is not None else ""
|
||
if not audio_b64:
|
||
logger.warning(
|
||
"tencent_tts._synthesize_sync empty audio voice_type={} "
|
||
"primary_language={} model_type={} request_id={}",
|
||
voice_type,
|
||
primary_language,
|
||
MODEL_TYPE_LLM,
|
||
request_id,
|
||
)
|
||
return b""
|
||
audio_bytes = base64.b64decode(audio_b64)
|
||
return audio_bytes
|
||
except TencentCloudSDKException as e:
|
||
logger.error(
|
||
"Tencent TTS SDK error provider=tencent voice_type={} primary_language={} "
|
||
"model_type={} code={} message={} request_id={} raw={}",
|
||
voice_type,
|
||
primary_language,
|
||
MODEL_TYPE_LLM,
|
||
getattr(e, "code", None),
|
||
getattr(e, "message", None),
|
||
getattr(e, "requestId", None),
|
||
e,
|
||
)
|
||
return b""
|
||
except Exception as e:
|
||
logger.error(
|
||
"Tencent TTS synthesize failed provider=tencent voice_type={} primary_language={}: {}",
|
||
voice_type,
|
||
primary_language,
|
||
e,
|
||
)
|
||
return b""
|
||
|
||
async def synthesize(
|
||
self,
|
||
text: str,
|
||
voice: str = "alloy",
|
||
*,
|
||
language: str = "zh",
|
||
) -> bytes:
|
||
with business_span("tts.synthesize", provider="tencent"):
|
||
return await self._synthesize_inner(text, voice, language=language)
|
||
|
||
async def _synthesize_inner(
|
||
self,
|
||
text: str,
|
||
voice: str = "alloy",
|
||
*,
|
||
language: str = "zh",
|
||
) -> bytes:
|
||
if not self._secret_id or not self._secret_key:
|
||
logger.error(
|
||
"Tencent TTS credentials not configured provider=tencent secret_id_set={} secret_key_set={}",
|
||
bool(self._secret_id),
|
||
bool(self._secret_key),
|
||
)
|
||
return b""
|
||
|
||
# ``language`` 由 pipeline 从用户 ``language_preference`` 解析(仅 'en' / 其它→中文路径),
|
||
# 与助手正文实际语种无关:产品规则是 TTS 主语言跟用户语言一致。
|
||
is_en = (language or "zh").strip().lower() == "en"
|
||
primary_language = PRIMARY_LANGUAGE_EN if is_en else PRIMARY_LANGUAGE_ZH
|
||
default_voice = self._voice_type_en if is_en else self._voice_type
|
||
max_chars = MAX_CHARS_PER_REQUEST_EN if is_en else MAX_CHARS_PER_REQUEST_ZH
|
||
|
||
# Default "alloy" aligns with OpenAI TTS naming. Caller 链路里目前不会传具体音色,
|
||
# 因此实际只走 default_voice 分支,对应 settings.tts_voice_type / tts_voice_type_en。
|
||
v = voice.lower()
|
||
if v == "alloy":
|
||
voice_type = default_voice
|
||
else:
|
||
voice_type = VOICE_MAP.get(v, default_voice)
|
||
chunks = _chunk_text(text, max_chars=max_chars)
|
||
if not chunks:
|
||
return b""
|
||
|
||
results: list[bytes] = []
|
||
for idx, chunk in enumerate(chunks):
|
||
audio = await asyncio.to_thread(
|
||
self._synthesize_sync, chunk, voice_type, primary_language
|
||
)
|
||
if not audio:
|
||
logger.warning(
|
||
"tencent_tts.synthesize chunk failed chunk_index={} chunk_chars={} "
|
||
"voice_type={} primary_language={}",
|
||
idx,
|
||
len(chunk),
|
||
voice_type,
|
||
primary_language,
|
||
)
|
||
return b""
|
||
logger.debug(
|
||
"tencent_tts.synthesize chunk ok chunk_index={} chunk_chars={} audio_bytes_len={}",
|
||
idx,
|
||
len(chunk),
|
||
len(audio),
|
||
)
|
||
results.append(audio)
|
||
|
||
merged = b"".join(results)
|
||
logger.debug(
|
||
"tencent_tts.synthesize done language={} voice_type={} chunks={} total_bytes={}",
|
||
language,
|
||
voice_type,
|
||
len(chunks),
|
||
len(merged),
|
||
)
|
||
return merged
|