feat/ 添加app-expo三种环境切换,待测试 调整tts
This commit is contained in:
9
.github/workflows/app-expo-deploy.yml
vendored
9
.github/workflows/app-expo-deploy.yml
vendored
@@ -92,6 +92,15 @@ jobs:
|
||||
npm run lint
|
||||
npm run test:ci
|
||||
|
||||
- name: Set API environment
|
||||
working-directory: app-expo
|
||||
run: |
|
||||
case "${{ steps.env.outputs.env }}" in
|
||||
prod) node scripts/use-env.js production ;;
|
||||
stage) node scripts/use-env.js staging ;;
|
||||
*) node scripts/use-env.js development ;;
|
||||
esac
|
||||
|
||||
- name: Export web build
|
||||
working-directory: app-expo
|
||||
run: npx expo export -p web
|
||||
|
||||
@@ -65,6 +65,16 @@ TENCENT_SECRET_ID=your_tencent_asr_secret_id
|
||||
TENCENT_SECRET_KEY=your_tencent_asr_secret_key
|
||||
# TENCENT_ASR_APP_ID=
|
||||
|
||||
# =============================================================================
|
||||
# TTS (openai | tencent)
|
||||
# =============================================================================
|
||||
TTS_PROVIDER=tencent
|
||||
# 仅 TTS_PROVIDER=openai 时需要
|
||||
# OPENAI_API_KEY=your_openai_api_key
|
||||
# 仅 TTS_PROVIDER=tencent 时生效,与 ASR 共用 TENCENT_SECRET_ID / TENCENT_SECRET_KEY
|
||||
# 音色 ID 见 https://cloud.tencent.com/document/product/1073/92668
|
||||
TTS_VOICE_TYPE=603004
|
||||
|
||||
# =============================================================================
|
||||
# WeChat Pay
|
||||
# =============================================================================
|
||||
|
||||
79
api/app/adapters/tts/tencent_tts.py
Normal file
79
api/app/adapters/tts/tencent_tts.py
Normal file
@@ -0,0 +1,79 @@
|
||||
"""Tencent Cloud TTS adapter — implements TTSProvider port.
|
||||
|
||||
API: https://cloud.tencent.com/document/product/1073/37995
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import uuid
|
||||
|
||||
from app.core.logging import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class TencentTTSProvider:
|
||||
def __init__(
|
||||
self,
|
||||
secret_id: str,
|
||||
secret_key: str,
|
||||
voice_type: int = 603004,
|
||||
codec: str = "mp3",
|
||||
sample_rate: int = 16000,
|
||||
):
|
||||
self._secret_id = secret_id
|
||||
self._secret_key = secret_key
|
||||
self._voice_type = voice_type
|
||||
self._codec = codec
|
||||
self._sample_rate = sample_rate
|
||||
self._client = None
|
||||
|
||||
def _get_client(self):
|
||||
if self._client is not None:
|
||||
return self._client
|
||||
try:
|
||||
from tencentcloud.common import credential
|
||||
from tencentcloud.common.profile.client_profile import ClientProfile
|
||||
from tencentcloud.common.profile.http_profile import HttpProfile
|
||||
from tencentcloud.tts.v20190823 import tts_client
|
||||
|
||||
cred = credential.Credential(self._secret_id, self._secret_key)
|
||||
http_profile = HttpProfile()
|
||||
http_profile.endpoint = "tts.tencentcloudapi.com"
|
||||
client_profile = ClientProfile()
|
||||
client_profile.httpProfile = http_profile
|
||||
self._client = tts_client.TtsClient(cred, "", client_profile)
|
||||
return self._client
|
||||
except Exception as e:
|
||||
logger.error("Tencent TTS client init failed: %s", e)
|
||||
return None
|
||||
|
||||
def _synthesize_sync(self, text: str) -> bytes:
|
||||
"""Sync synthesis (run in executor)."""
|
||||
client = self._get_client()
|
||||
if not client:
|
||||
return b""
|
||||
from tencentcloud.tts.v20190823 import models
|
||||
|
||||
req = models.TextToVoiceRequest()
|
||||
req.Text = text[:500] # 中文约150字,英文约500字母,保守截断
|
||||
req.SessionId = f"tts-{uuid.uuid4().hex}"
|
||||
req.VoiceType = self._voice_type
|
||||
req.Codec = self._codec
|
||||
req.SampleRate = self._sample_rate
|
||||
req.PrimaryLanguage = 1 # 1=中文
|
||||
|
||||
resp = client.TextToVoice(req)
|
||||
if resp.Audio:
|
||||
return base64.b64decode(resp.Audio)
|
||||
return b""
|
||||
|
||||
async def synthesize(self, text: str, voice: str = "alloy") -> bytes:
|
||||
"""Convert text to speech. Returns mp3 bytes."""
|
||||
if not text or not self._secret_id or not self._secret_key:
|
||||
return b""
|
||||
try:
|
||||
return await asyncio.to_thread(self._synthesize_sync, text)
|
||||
except Exception as e:
|
||||
logger.error("Tencent TTS synthesize failed: %s", e)
|
||||
return b""
|
||||
@@ -59,8 +59,10 @@ class Settings(BaseSettings):
|
||||
tencent_secret_key: str = ""
|
||||
tencent_asr_app_id: str = ""
|
||||
|
||||
# ── OpenAI (TTS) ─────────────────────────────────────────
|
||||
# ── TTS (openai | tencent) ───────────────────────────────
|
||||
tts_provider: str = "tencent"
|
||||
openai_api_key: str = ""
|
||||
tts_voice_type: int = 603004 # Tencent 音色 ID,见 https://cloud.tencent.com/document/product/1073/92668
|
||||
|
||||
# ── WeChat Pay ───────────────────────────────────────────
|
||||
wechat_pay_app_id: str = ""
|
||||
|
||||
@@ -60,6 +60,15 @@ def get_llm_provider() -> LLMProvider:
|
||||
|
||||
@lru_cache
|
||||
def get_tts_provider() -> TTSProvider:
|
||||
if settings.tts_provider == "tencent":
|
||||
from app.adapters.tts.tencent_tts import TencentTTSProvider
|
||||
|
||||
return TencentTTSProvider(
|
||||
secret_id=settings.tencent_secret_id,
|
||||
secret_key=settings.tencent_secret_key,
|
||||
voice_type=settings.tts_voice_type,
|
||||
codec="mp3",
|
||||
)
|
||||
from app.adapters.tts.openai_tts import OpenAITTSProvider
|
||||
|
||||
return OpenAITTSProvider(api_key=settings.openai_api_key)
|
||||
|
||||
@@ -26,11 +26,41 @@ from app.features.conversation.ws.profile_collector import (
|
||||
get_missing_profile_fields,
|
||||
)
|
||||
from app.features.user.models import User
|
||||
from app.core.dependencies import get_asr_provider
|
||||
from app.core.dependencies import get_asr_provider, get_tts_provider
|
||||
from app.features.memoir.state_service import get_or_create_state
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
async def _send_tts_audio(conversation_id: str, text: str) -> None:
|
||||
"""Synthesize text to speech and send TTS_AUDIO if successful."""
|
||||
try:
|
||||
tts = get_tts_provider()
|
||||
audio_bytes = await tts.synthesize(text)
|
||||
if not audio_bytes:
|
||||
logger.warning(
|
||||
"TTS skipped: synthesize returned empty. Check TTS config in .env"
|
||||
)
|
||||
return
|
||||
await manager.send_message(conversation_id, {
|
||||
"type": MessageType.TTS_AUDIO,
|
||||
"conversation_id": conversation_id,
|
||||
"data": {
|
||||
"audio_base64": base64.b64encode(audio_bytes).decode("utf-8"),
|
||||
"format": "mp3",
|
||||
},
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
})
|
||||
except Exception as e:
|
||||
err_str = str(e)
|
||||
if "PkgExhausted" in err_str:
|
||||
logger.warning(
|
||||
"TTS skipped: 腾讯云语音合成资源包已用尽,请在控制台购买或开通后付费: %s",
|
||||
err_str[:100],
|
||||
)
|
||||
else:
|
||||
logger.error("TTS synthesize failed: %s", e)
|
||||
|
||||
# ── Agent 实例(从 ConnectionManager 移出) ─────────────────────
|
||||
conversation_agent = ConversationAgent()
|
||||
memory_agent = MemoryAgent()
|
||||
@@ -447,6 +477,7 @@ async def process_user_message(
|
||||
"data": {"text": response_text, "index": i, "total": len(responses)},
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
})
|
||||
await _send_tts_audio(conversation_id, response_text)
|
||||
if i < len(responses) - 1:
|
||||
await asyncio.sleep(0.5)
|
||||
return
|
||||
@@ -498,6 +529,7 @@ async def process_user_message(
|
||||
"data": {"text": response_text, "index": i, "total": len(responses)},
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
})
|
||||
await _send_tts_audio(conversation_id, response_text)
|
||||
if i < len(responses) - 1:
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
|
||||
2
app-expo/.gitignore
vendored
2
app-expo/.gitignore
vendored
@@ -32,6 +32,8 @@ yarn-error.*
|
||||
|
||||
# local env files
|
||||
.env*.local
|
||||
# generated .env (from use-env script)
|
||||
.env
|
||||
|
||||
# typescript
|
||||
*.tsbuildinfo
|
||||
|
||||
@@ -3,11 +3,15 @@
|
||||
"main": "expo-router/entry",
|
||||
"version": "1.0.0",
|
||||
"scripts": {
|
||||
"use-env": "node scripts/use-env.js",
|
||||
"prestart": "npm run use-env -- development",
|
||||
"start": "expo start",
|
||||
"start:staging": "npm run use-env -- staging && expo start",
|
||||
"start:prod": "npm run use-env -- production && expo start",
|
||||
"reset-project": "node ./scripts/reset-project.js",
|
||||
"android": "expo run:android",
|
||||
"ios": "expo run:ios",
|
||||
"web": "expo start --web",
|
||||
"android": "npm run use-env -- development && expo run:android",
|
||||
"ios": "npm run use-env -- development && expo run:ios",
|
||||
"web": "npm run use-env -- development && expo start --web",
|
||||
"lint": "expo lint",
|
||||
"test": "jest --watch",
|
||||
"test:changed": "jest --onlyChanged --coverage=false",
|
||||
|
||||
@@ -1,6 +1,14 @@
|
||||
import { Image } from 'expo-image';
|
||||
import { useLocalSearchParams } from 'expo-router';
|
||||
import { Mic, Pause, Play, PlusCircle, Type, X } from 'lucide-react-native';
|
||||
import {
|
||||
Mic,
|
||||
Pause,
|
||||
Play,
|
||||
PlusCircle,
|
||||
Type,
|
||||
Volume2,
|
||||
X,
|
||||
} from 'lucide-react-native';
|
||||
import React, { useCallback, useEffect, useRef, useState } from 'react';
|
||||
import {
|
||||
Alert,
|
||||
@@ -23,6 +31,7 @@ import { useThemeColors } from '@/hooks/use-theme-colors';
|
||||
import { useMessages, useRealtimeSession } from '@/features/conversation/hooks';
|
||||
import type { MessageItem } from '@/features/conversation/types';
|
||||
import { audioFocus } from '@/core/audio/audio-focus';
|
||||
import { usePlayer } from '@/features/voice/hooks/use-player';
|
||||
import { useRecorder } from '@/features/voice/hooks/use-recorder';
|
||||
import { useAudioPlayer, useAudioPlayerStatus } from 'expo-audio';
|
||||
|
||||
@@ -543,10 +552,12 @@ export default function ConversationScreen() {
|
||||
const { t } = useTranslation('conversation');
|
||||
const { t: tApp } = useTranslation('app');
|
||||
const { data: messages } = useMessages(id);
|
||||
const { enqueueTtsAudio, status: playerStatus } = usePlayer();
|
||||
const { connectionState, streamingMessage, sendText, sendVoiceMessage } =
|
||||
useRealtimeSession({
|
||||
conversationId: id,
|
||||
conversationId: id ?? '',
|
||||
enabled: !!id,
|
||||
onTtsAudio: enqueueTtsAudio,
|
||||
});
|
||||
|
||||
const handleRecordingComplete = useCallback(
|
||||
@@ -606,6 +617,14 @@ export default function ConversationScreen() {
|
||||
title={
|
||||
<View style={styles.headerTitleBlock}>
|
||||
<Text style={styles.headerTitle}>{tApp('name')}</Text>
|
||||
{playerStatus === 'playing' && (
|
||||
<Icon
|
||||
as={Volume2}
|
||||
size={18}
|
||||
color={CHAT_COLORS.primary}
|
||||
style={{ marginRight: 6 }}
|
||||
/>
|
||||
)}
|
||||
<View
|
||||
style={[
|
||||
styles.statusBadge,
|
||||
|
||||
@@ -1,14 +1,6 @@
|
||||
const DEV_API_URL = 'http://127.0.0.1:8000';
|
||||
const DEV_WS_URL = 'ws://127.0.0.1:8000';
|
||||
|
||||
const PROD_API_URL = 'https://lifecho.worldsplats.com';
|
||||
const PROD_WS_URL = 'wss://lifecho.worldsplats.com';
|
||||
|
||||
const useProdServer = process.env.EXPO_PUBLIC_USE_PROD_SERVER === 'true';
|
||||
|
||||
export const config = {
|
||||
apiBaseUrl: useProdServer ? PROD_API_URL : DEV_API_URL,
|
||||
wsBaseUrl: useProdServer ? PROD_WS_URL : DEV_WS_URL,
|
||||
apiBaseUrl: process.env.EXPO_PUBLIC_API_URL ?? 'http://192.168.10.178:8000',
|
||||
wsBaseUrl: process.env.EXPO_PUBLIC_WS_URL ?? 'ws://192.168.10.178:8000',
|
||||
isDebugMode: __DEV__,
|
||||
|
||||
api: {
|
||||
|
||||
@@ -112,6 +112,7 @@ export function useEndConversation() {
|
||||
interface UseRealtimeSessionOptions {
|
||||
conversationId: string;
|
||||
enabled?: boolean;
|
||||
onTtsAudio?: (audioBase64: string) => void;
|
||||
}
|
||||
|
||||
const MIN_RECORDING_DURATION_SEC = 1;
|
||||
@@ -136,6 +137,7 @@ interface RealtimeSessionState {
|
||||
export function useRealtimeSession({
|
||||
conversationId,
|
||||
enabled = true,
|
||||
onTtsAudio,
|
||||
}: UseRealtimeSessionOptions): RealtimeSessionState {
|
||||
const queryClient = useQueryClient();
|
||||
const sessionRef = useRef<RealtimeSession | null>(null);
|
||||
@@ -168,6 +170,7 @@ export function useRealtimeSession({
|
||||
conversationId,
|
||||
queryClient,
|
||||
onStreamingText: handleStreamingText,
|
||||
onTtsAudio,
|
||||
onError: handleError,
|
||||
onStateChange: setConnectionState,
|
||||
});
|
||||
@@ -181,7 +184,7 @@ export function useRealtimeSession({
|
||||
setConnectionState('disconnected');
|
||||
setStreamingMessage(null);
|
||||
};
|
||||
}, [conversationId, enabled, queryClient, handleStreamingText, handleError]);
|
||||
}, [conversationId, enabled, queryClient, handleStreamingText, handleError, onTtsAudio]);
|
||||
|
||||
const sendText = useCallback(
|
||||
(text: string) => {
|
||||
|
||||
@@ -18,6 +18,7 @@ interface RealtimeSessionOptions {
|
||||
conversationId: string;
|
||||
queryClient: QueryClient;
|
||||
onStreamingText?: StreamingTextCallback;
|
||||
onTtsAudio?: (audioBase64: string) => void;
|
||||
onError?: ErrorCallback;
|
||||
onStateChange?: WsStateListener;
|
||||
}
|
||||
@@ -38,6 +39,7 @@ export class RealtimeSession {
|
||||
private conversationId: string;
|
||||
private queryClient: QueryClient;
|
||||
private onStreamingText?: StreamingTextCallback;
|
||||
private onTtsAudio?: (audioBase64: string) => void;
|
||||
private onError?: ErrorCallback;
|
||||
private unsubEvent: (() => void) | null = null;
|
||||
private unsubState: (() => void) | null = null;
|
||||
@@ -49,6 +51,7 @@ export class RealtimeSession {
|
||||
this.conversationId = options.conversationId;
|
||||
this.queryClient = options.queryClient;
|
||||
this.onStreamingText = options.onStreamingText;
|
||||
this.onTtsAudio = options.onTtsAudio;
|
||||
this.onError = options.onError;
|
||||
|
||||
this.unsubEvent = this.client.onEvent(this.handleEvent);
|
||||
@@ -117,6 +120,11 @@ export class RealtimeSession {
|
||||
return;
|
||||
}
|
||||
|
||||
if (event.kind === 'tts_audio_received') {
|
||||
this.onTtsAudio?.(event.audioBase64);
|
||||
return;
|
||||
}
|
||||
|
||||
handleWsEvent(this.queryClient, event);
|
||||
|
||||
if (event.kind === 'session_error') {
|
||||
|
||||
Reference in New Issue
Block a user