feat/ 添加app-expo三种环境切换,待测试 调整tts

This commit is contained in:
Kevin
2026-03-19 09:58:02 +08:00
parent faf7607bf9
commit 15512834d2
12 changed files with 187 additions and 18 deletions

View File

@@ -65,6 +65,16 @@ TENCENT_SECRET_ID=your_tencent_asr_secret_id
TENCENT_SECRET_KEY=your_tencent_asr_secret_key
# TENCENT_ASR_APP_ID=
# =============================================================================
# TTS (openai | tencent)
# =============================================================================
TTS_PROVIDER=tencent
# 仅 TTS_PROVIDER=openai 时需要
# OPENAI_API_KEY=your_openai_api_key
# 仅 TTS_PROVIDER=tencent 时生效,与 ASR 共用 TENCENT_SECRET_ID / TENCENT_SECRET_KEY
# 音色 ID 见 https://cloud.tencent.com/document/product/1073/92668
TTS_VOICE_TYPE=603004
# =============================================================================
# WeChat Pay
# =============================================================================

View File

@@ -0,0 +1,79 @@
"""Tencent Cloud TTS adapter — implements TTSProvider port.
API: https://cloud.tencent.com/document/product/1073/37995
"""
import asyncio
import base64
import uuid
from app.core.logging import get_logger
logger = get_logger(__name__)
class TencentTTSProvider:
def __init__(
self,
secret_id: str,
secret_key: str,
voice_type: int = 603004,
codec: str = "mp3",
sample_rate: int = 16000,
):
self._secret_id = secret_id
self._secret_key = secret_key
self._voice_type = voice_type
self._codec = codec
self._sample_rate = sample_rate
self._client = None
def _get_client(self):
if self._client is not None:
return self._client
try:
from tencentcloud.common import credential
from tencentcloud.common.profile.client_profile import ClientProfile
from tencentcloud.common.profile.http_profile import HttpProfile
from tencentcloud.tts.v20190823 import tts_client
cred = credential.Credential(self._secret_id, self._secret_key)
http_profile = HttpProfile()
http_profile.endpoint = "tts.tencentcloudapi.com"
client_profile = ClientProfile()
client_profile.httpProfile = http_profile
self._client = tts_client.TtsClient(cred, "", client_profile)
return self._client
except Exception as e:
logger.error("Tencent TTS client init failed: %s", e)
return None
def _synthesize_sync(self, text: str) -> bytes:
"""Sync synthesis (run in executor)."""
client = self._get_client()
if not client:
return b""
from tencentcloud.tts.v20190823 import models
req = models.TextToVoiceRequest()
req.Text = text[:500] # 中文约150字英文约500字母保守截断
req.SessionId = f"tts-{uuid.uuid4().hex}"
req.VoiceType = self._voice_type
req.Codec = self._codec
req.SampleRate = self._sample_rate
req.PrimaryLanguage = 1 # 1=中文
resp = client.TextToVoice(req)
if resp.Audio:
return base64.b64decode(resp.Audio)
return b""
async def synthesize(self, text: str, voice: str = "alloy") -> bytes:
"""Convert text to speech. Returns mp3 bytes."""
if not text or not self._secret_id or not self._secret_key:
return b""
try:
return await asyncio.to_thread(self._synthesize_sync, text)
except Exception as e:
logger.error("Tencent TTS synthesize failed: %s", e)
return b""

View File

@@ -59,8 +59,10 @@ class Settings(BaseSettings):
tencent_secret_key: str = ""
tencent_asr_app_id: str = ""
# ── OpenAI (TTS) ─────────────────────────────────────────
# ── TTS (openai | tencent) ───────────────────────────────
tts_provider: str = "tencent"
openai_api_key: str = ""
tts_voice_type: int = 603004 # Tencent 音色 ID见 https://cloud.tencent.com/document/product/1073/92668
# ── WeChat Pay ───────────────────────────────────────────
wechat_pay_app_id: str = ""

View File

@@ -60,6 +60,15 @@ def get_llm_provider() -> LLMProvider:
@lru_cache
def get_tts_provider() -> TTSProvider:
if settings.tts_provider == "tencent":
from app.adapters.tts.tencent_tts import TencentTTSProvider
return TencentTTSProvider(
secret_id=settings.tencent_secret_id,
secret_key=settings.tencent_secret_key,
voice_type=settings.tts_voice_type,
codec="mp3",
)
from app.adapters.tts.openai_tts import OpenAITTSProvider
return OpenAITTSProvider(api_key=settings.openai_api_key)

View File

@@ -26,11 +26,41 @@ from app.features.conversation.ws.profile_collector import (
get_missing_profile_fields,
)
from app.features.user.models import User
from app.core.dependencies import get_asr_provider
from app.core.dependencies import get_asr_provider, get_tts_provider
from app.features.memoir.state_service import get_or_create_state
logger = get_logger(__name__)
async def _send_tts_audio(conversation_id: str, text: str) -> None:
"""Synthesize text to speech and send TTS_AUDIO if successful."""
try:
tts = get_tts_provider()
audio_bytes = await tts.synthesize(text)
if not audio_bytes:
logger.warning(
"TTS skipped: synthesize returned empty. Check TTS config in .env"
)
return
await manager.send_message(conversation_id, {
"type": MessageType.TTS_AUDIO,
"conversation_id": conversation_id,
"data": {
"audio_base64": base64.b64encode(audio_bytes).decode("utf-8"),
"format": "mp3",
},
"timestamp": datetime.now(timezone.utc).isoformat(),
})
except Exception as e:
err_str = str(e)
if "PkgExhausted" in err_str:
logger.warning(
"TTS skipped: 腾讯云语音合成资源包已用尽,请在控制台购买或开通后付费: %s",
err_str[:100],
)
else:
logger.error("TTS synthesize failed: %s", e)
# ── Agent 实例(从 ConnectionManager 移出) ─────────────────────
conversation_agent = ConversationAgent()
memory_agent = MemoryAgent()
@@ -447,6 +477,7 @@ async def process_user_message(
"data": {"text": response_text, "index": i, "total": len(responses)},
"timestamp": datetime.now(timezone.utc).isoformat(),
})
await _send_tts_audio(conversation_id, response_text)
if i < len(responses) - 1:
await asyncio.sleep(0.5)
return
@@ -498,6 +529,7 @@ async def process_user_message(
"data": {"text": response_text, "index": i, "total": len(responses)},
"timestamp": datetime.now(timezone.utc).isoformat(),
})
await _send_tts_audio(conversation_id, response_text)
if i < len(responses) - 1:
await asyncio.sleep(0.5)