feat(conversation): TTS 投递与 WebSocket 管线;客户端播放门禁与会话页联动;COS 键与迁移脚本调整
This commit is contained in:
@@ -1,6 +1,15 @@
|
||||
import { Image } from 'expo-image';
|
||||
import { useLocalSearchParams } from 'expo-router';
|
||||
import { Mic, Pause, Play, PlusCircle, Type, X } from 'lucide-react-native';
|
||||
import {
|
||||
Mic,
|
||||
Pause,
|
||||
Play,
|
||||
PlusCircle,
|
||||
Square,
|
||||
Type,
|
||||
Volume2,
|
||||
X,
|
||||
} from 'lucide-react-native';
|
||||
import React, { useCallback, useEffect, useRef, useState } from 'react';
|
||||
import type {
|
||||
LayoutChangeEvent,
|
||||
@@ -23,18 +32,23 @@ import {
|
||||
import { KeyboardAvoidingView as KeyboardControllerAvoidingView } from 'react-native-keyboard-controller';
|
||||
import { useSafeAreaInsets } from 'react-native-safe-area-context';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { useQueryClient } from '@tanstack/react-query';
|
||||
|
||||
import { Icon } from '@/components/ui/icon';
|
||||
import { Text } from '@/components/ui/text';
|
||||
import { ScreenHeader } from '@/components/screen-header';
|
||||
import { useThemeColors } from '@/hooks/use-theme-colors';
|
||||
import { useMessages, useRealtimeSession } from '@/features/conversation/hooks';
|
||||
import type { TtsSegmentPayload } from '@/features/conversation/realtime-session';
|
||||
import { conversationKeys } from '@/features/conversation/query-keys';
|
||||
import {
|
||||
splitMessageParts,
|
||||
splitStreamingSegments,
|
||||
} from '@/features/conversation/message-split';
|
||||
import type { MessageItem } from '@/features/conversation/types';
|
||||
import { isVoiceMessage } from '@/features/conversation/types';
|
||||
import type { PlaybackItem } from '@/features/voice/types';
|
||||
import { createTtsPlaybackGate } from '@/features/voice/tts-playback-gate';
|
||||
import { usePlayer } from '@/features/voice/hooks/use-player';
|
||||
import { useRecorder } from '@/features/voice/hooks/use-recorder';
|
||||
|
||||
@@ -65,6 +79,14 @@ const USER_AVATAR =
|
||||
|
||||
type InputMode = 'text' | 'voice';
|
||||
|
||||
/** 流式助手区与自动 TTS 的 `PlaybackItem.messageRef.listKey` 对齐,用于点区域停止朗读 */
|
||||
const TTS_STREAMING_LIST_KEY = '__tts_streaming__';
|
||||
|
||||
/** 多段拆分后仅首段显示「朗读」控件(整段消息共用 `ttsAudioUrls`) */
|
||||
function isFirstAssistantTextPart(listKey: string, messageId: string): boolean {
|
||||
return listKey === messageId || listKey === `${messageId}_part_0`;
|
||||
}
|
||||
|
||||
/** 展平消息列表:assistant 消息按 [SPLIT] 边界拆成多条,每条一个 listKey */
|
||||
function flattenMessagesForList(
|
||||
messages: MessageItem[],
|
||||
@@ -96,23 +118,119 @@ function flattenMessagesForList(
|
||||
|
||||
function MessageBubble({
|
||||
item,
|
||||
listKey,
|
||||
agentName,
|
||||
meLabel,
|
||||
currentPlaybackUri,
|
||||
currentPlaybackItem,
|
||||
playbackIsPlaying,
|
||||
onPlayVoiceExclusive,
|
||||
onPausePlayback,
|
||||
onInterruptAssistantTts,
|
||||
onReplayAssistantTts,
|
||||
}: {
|
||||
item: MessageItem;
|
||||
listKey: string;
|
||||
agentName: string;
|
||||
meLabel: string;
|
||||
currentPlaybackUri: string | null;
|
||||
currentPlaybackItem: PlaybackItem | null;
|
||||
playbackIsPlaying: boolean;
|
||||
onPlayVoiceExclusive: (uri: string) => void;
|
||||
onPausePlayback: () => void;
|
||||
onInterruptAssistantTts: () => void;
|
||||
onReplayAssistantTts: (messageId: string, urls: string[]) => void;
|
||||
}) {
|
||||
const { t } = useTranslation('conversation');
|
||||
const isUser = item.senderType === 'user';
|
||||
const isVoice = isVoiceMessage(item);
|
||||
const ttsUrls =
|
||||
Array.isArray(item.ttsAudioUrls) && item.ttsAudioUrls.length > 0
|
||||
? item.ttsAudioUrls.filter(
|
||||
(u): u is string => typeof u === 'string' && u.trim().length > 0,
|
||||
)
|
||||
: [];
|
||||
|
||||
const isAssistantTextFirstPart =
|
||||
!isUser && !isVoice && isFirstAssistantTextPart(listKey, item.id);
|
||||
|
||||
const isThisBubbleTtsTarget =
|
||||
!isUser &&
|
||||
!isVoice &&
|
||||
playbackIsPlaying &&
|
||||
currentPlaybackItem?.kind !== 'voice' &&
|
||||
currentPlaybackItem?.messageRef?.listKey === item.id;
|
||||
|
||||
const isAssistantTtsHighlight = isThisBubbleTtsTarget;
|
||||
|
||||
const assistantTextBubbleBody = (
|
||||
<View
|
||||
style={[
|
||||
styles.bubble,
|
||||
styles.bubbleAgent,
|
||||
isAssistantTtsHighlight && styles.bubbleAgentTtsActive,
|
||||
]}
|
||||
>
|
||||
<Text selectable style={[styles.bubbleText, styles.bubbleTextAgent]}>
|
||||
{item.content}
|
||||
</Text>
|
||||
{isAssistantTtsHighlight ? (
|
||||
<Text style={styles.readingAloudCaption}>{t('readingAloud')}</Text>
|
||||
) : null}
|
||||
{isAssistantTextFirstPart ? (
|
||||
<View style={styles.readAloudRow}>
|
||||
{isThisBubbleTtsTarget ? (
|
||||
<View
|
||||
style={styles.readAloudButtonInner}
|
||||
accessibilityElementsHidden
|
||||
importantForAccessibility="no-hide-descendants"
|
||||
>
|
||||
<Icon as={Square} size={16} color={CHAT_COLORS.primary} />
|
||||
<Text style={styles.readAloudButtonLabel}>
|
||||
{t('stopReadingAloud')}
|
||||
</Text>
|
||||
</View>
|
||||
) : (
|
||||
<Pressable
|
||||
onPress={() => {
|
||||
if (ttsUrls.length) {
|
||||
onReplayAssistantTts(item.id, ttsUrls);
|
||||
}
|
||||
}}
|
||||
disabled={!ttsUrls.length}
|
||||
style={({ pressed }) => [
|
||||
styles.readAloudButton,
|
||||
!ttsUrls.length && styles.readAloudButtonDisabled,
|
||||
pressed && ttsUrls.length && { opacity: 0.85 },
|
||||
]}
|
||||
accessibilityRole="button"
|
||||
accessibilityLabel={
|
||||
ttsUrls.length ? t('readAloudAgain') : t('cannotReadAloud')
|
||||
}
|
||||
>
|
||||
<Icon
|
||||
as={Volume2}
|
||||
size={16}
|
||||
color={
|
||||
ttsUrls.length
|
||||
? CHAT_COLORS.primary
|
||||
: CHAT_COLORS.onSurfaceVariant
|
||||
}
|
||||
/>
|
||||
<Text
|
||||
style={[
|
||||
styles.readAloudButtonLabel,
|
||||
!ttsUrls.length && styles.readAloudButtonLabelDisabled,
|
||||
]}
|
||||
>
|
||||
{ttsUrls.length ? t('readAloudAgain') : t('cannotReadAloud')}
|
||||
</Text>
|
||||
</Pressable>
|
||||
)}
|
||||
</View>
|
||||
) : null}
|
||||
</View>
|
||||
);
|
||||
|
||||
return (
|
||||
<View style={[styles.messageRow, isUser && styles.messageRowReverse]}>
|
||||
@@ -157,23 +275,23 @@ function MessageBubble({
|
||||
}}
|
||||
/>
|
||||
</View>
|
||||
) : (
|
||||
<View
|
||||
style={[
|
||||
styles.bubble,
|
||||
isUser ? styles.bubbleUser : styles.bubbleAgent,
|
||||
]}
|
||||
>
|
||||
<Text
|
||||
selectable
|
||||
style={[
|
||||
styles.bubbleText,
|
||||
isUser ? styles.bubbleTextUser : styles.bubbleTextAgent,
|
||||
]}
|
||||
>
|
||||
) : isUser ? (
|
||||
<View style={[styles.bubble, styles.bubbleUser]}>
|
||||
<Text selectable style={[styles.bubbleText, styles.bubbleTextUser]}>
|
||||
{item.content}
|
||||
</Text>
|
||||
</View>
|
||||
) : isThisBubbleTtsTarget ? (
|
||||
<Pressable
|
||||
onPress={onInterruptAssistantTts}
|
||||
style={({ pressed }) => [pressed && { opacity: 0.92 }]}
|
||||
accessibilityRole="button"
|
||||
accessibilityLabel={t('stopReadingAloud')}
|
||||
>
|
||||
{assistantTextBubbleBody}
|
||||
</Pressable>
|
||||
) : (
|
||||
assistantTextBubbleBody
|
||||
)}
|
||||
</View>
|
||||
</View>
|
||||
@@ -184,11 +302,16 @@ function StreamingBubbles({
|
||||
streamingText,
|
||||
isComplete,
|
||||
agentName,
|
||||
streamingTtsActive,
|
||||
onStreamingPress,
|
||||
}: {
|
||||
streamingText: string;
|
||||
isComplete: boolean;
|
||||
agentName: string;
|
||||
streamingTtsActive?: boolean;
|
||||
onStreamingPress?: () => void;
|
||||
}) {
|
||||
const { t } = useTranslation('conversation');
|
||||
const segments = splitStreamingSegments(streamingText);
|
||||
const completedParts =
|
||||
segments.length > 1
|
||||
@@ -197,8 +320,8 @@ function StreamingBubbles({
|
||||
const streamingPart =
|
||||
segments.length > 0 ? segments[segments.length - 1]! : streamingText;
|
||||
|
||||
return (
|
||||
<View>
|
||||
const inner = (
|
||||
<>
|
||||
{completedParts.map((part, i) => (
|
||||
<View
|
||||
key={`streaming_complete_${i}`}
|
||||
@@ -214,7 +337,13 @@ function StreamingBubbles({
|
||||
/>
|
||||
</View>
|
||||
<View style={[styles.bubbleColumn]}>
|
||||
<View style={[styles.bubble, styles.bubbleAgent]}>
|
||||
<View
|
||||
style={[
|
||||
styles.bubble,
|
||||
styles.bubbleAgent,
|
||||
streamingTtsActive && styles.bubbleAgentTtsActive,
|
||||
]}
|
||||
>
|
||||
<Text
|
||||
selectable
|
||||
style={[styles.bubbleText, styles.bubbleTextAgent]}
|
||||
@@ -236,7 +365,13 @@ function StreamingBubbles({
|
||||
/>
|
||||
</View>
|
||||
<View style={[styles.bubbleColumn]}>
|
||||
<View style={[styles.bubble, styles.bubbleAgent]}>
|
||||
<View
|
||||
style={[
|
||||
styles.bubble,
|
||||
styles.bubbleAgent,
|
||||
streamingTtsActive && styles.bubbleAgentTtsActive,
|
||||
]}
|
||||
>
|
||||
<Text
|
||||
selectable
|
||||
style={[styles.bubbleText, styles.bubbleTextAgent]}
|
||||
@@ -247,8 +382,27 @@ function StreamingBubbles({
|
||||
</View>
|
||||
</View>
|
||||
</View>
|
||||
</View>
|
||||
{streamingTtsActive ? (
|
||||
<View style={styles.streamingTtsCaptionRow}>
|
||||
<Text style={styles.readingAloudCaption}>{t('readingAloud')}</Text>
|
||||
</View>
|
||||
) : null}
|
||||
</>
|
||||
);
|
||||
|
||||
if (streamingTtsActive && onStreamingPress) {
|
||||
return (
|
||||
<Pressable
|
||||
onPress={onStreamingPress}
|
||||
accessibilityRole="button"
|
||||
accessibilityLabel={t('stopReadingAloud')}
|
||||
>
|
||||
{inner}
|
||||
</Pressable>
|
||||
);
|
||||
}
|
||||
|
||||
return <View>{inner}</View>;
|
||||
}
|
||||
|
||||
function formatRecordingDuration(seconds: number): string {
|
||||
@@ -606,35 +760,95 @@ function ChatInputBar({
|
||||
|
||||
export default function ConversationScreen() {
|
||||
const { id } = useLocalSearchParams<{ id: string }>();
|
||||
const queryClient = useQueryClient();
|
||||
const insets = useSafeAreaInsets();
|
||||
const { t } = useTranslation('conversation');
|
||||
const { t: tApp } = useTranslation('app');
|
||||
const { data: messages } = useMessages(id);
|
||||
const ttsGate = useRef(createTtsPlaybackGate());
|
||||
const {
|
||||
enqueue,
|
||||
enqueueExclusive,
|
||||
stop,
|
||||
status: playerStatus,
|
||||
currentSource,
|
||||
currentPlaybackItem,
|
||||
} = usePlayer();
|
||||
|
||||
const handleTtsPlaybackResume = useCallback(() => {
|
||||
ttsGate.current.onUserMessageSent();
|
||||
}, []);
|
||||
|
||||
const handleTtsSegment = useCallback(
|
||||
(p: { audioBase64?: string; audioUrl?: string }) => {
|
||||
(p: TtsSegmentPayload) => {
|
||||
if (!ttsGate.current.shouldAcceptIncomingTts()) return;
|
||||
const convId = id ?? '';
|
||||
const cosUrl = p.audioUrl?.trim();
|
||||
/**
|
||||
* 播放走 WS,但「再读」依赖 MessageItem.ttsAudioUrls。乐观提交的消息没有 URL,
|
||||
* 服务端 attach 要等整轮结束;收到 COS URL 时写入缓存,按钮才能用。
|
||||
*/
|
||||
if (cosUrl && convId) {
|
||||
queryClient.setQueryData<MessageItem[]>(
|
||||
conversationKeys.messages(convId),
|
||||
(old) => {
|
||||
if (!old?.length) return old;
|
||||
let idx = -1;
|
||||
if (p.assistantMessageId) {
|
||||
idx = old.findIndex((m) => m.id === p.assistantMessageId);
|
||||
}
|
||||
if (idx < 0) {
|
||||
for (let i = old.length - 1; i >= 0; i--) {
|
||||
const row = old[i]!;
|
||||
if (row.senderType === 'assistant' && !isVoiceMessage(row)) {
|
||||
idx = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (idx < 0) return old;
|
||||
const target = old[idx]!;
|
||||
const prevUrls = target.ttsAudioUrls ?? [];
|
||||
if (prevUrls.includes(cosUrl)) return old;
|
||||
const nextUrls = [...prevUrls, cosUrl];
|
||||
const nextId =
|
||||
p.assistantMessageId &&
|
||||
(target.id.startsWith(`${convId}_agent_`) ||
|
||||
target.id.startsWith('pending'))
|
||||
? p.assistantMessageId
|
||||
: target.id;
|
||||
const next = [...old];
|
||||
next[idx] = {
|
||||
...target,
|
||||
id: nextId,
|
||||
ttsAudioUrls: nextUrls,
|
||||
};
|
||||
return next;
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
const listKey = p.assistantMessageId ?? TTS_STREAMING_LIST_KEY;
|
||||
const shared = {
|
||||
kind: 'tts_auto' as const,
|
||||
label: 'TTS',
|
||||
messageRef: { listKey },
|
||||
};
|
||||
if (p.audioBase64) {
|
||||
void enqueue({
|
||||
...shared,
|
||||
uri: `data:audio/mp3;base64,${p.audioBase64}`,
|
||||
label: 'TTS',
|
||||
});
|
||||
} else if (p.audioUrl) {
|
||||
void enqueue({ uri: p.audioUrl, label: 'TTS' });
|
||||
void enqueue({ ...shared, uri: p.audioUrl });
|
||||
}
|
||||
},
|
||||
[enqueue],
|
||||
[enqueue, id, queryClient],
|
||||
);
|
||||
|
||||
const handlePlayVoiceExclusive = useCallback(
|
||||
(uri: string) => {
|
||||
void enqueueExclusive({ uri, label: 'voice' });
|
||||
void enqueueExclusive({ uri, label: 'voice', kind: 'voice' });
|
||||
},
|
||||
[enqueueExclusive],
|
||||
);
|
||||
@@ -643,12 +857,42 @@ export default function ConversationScreen() {
|
||||
void stop();
|
||||
}, [stop]);
|
||||
|
||||
const { connectionState, streamingMessage, sendText, sendVoiceMessage } =
|
||||
useRealtimeSession({
|
||||
conversationId: id ?? '',
|
||||
enabled: !!id,
|
||||
onTtsSegment: handleTtsSegment,
|
||||
});
|
||||
const handleReplayAssistantTts = useCallback(
|
||||
(messageId: string, urls: string[]) => {
|
||||
if (!urls.length) return;
|
||||
void (async () => {
|
||||
await stop();
|
||||
for (const uri of urls) {
|
||||
await enqueue({
|
||||
uri,
|
||||
kind: 'tts_repeat',
|
||||
label: 'TTS',
|
||||
messageRef: { listKey: messageId },
|
||||
});
|
||||
}
|
||||
})();
|
||||
},
|
||||
[enqueue, stop],
|
||||
);
|
||||
|
||||
const {
|
||||
connectionState,
|
||||
streamingMessage,
|
||||
sendText,
|
||||
sendVoiceMessage,
|
||||
sendTtsCancel,
|
||||
} = useRealtimeSession({
|
||||
conversationId: id ?? '',
|
||||
enabled: !!id,
|
||||
onTtsSegment: handleTtsSegment,
|
||||
onTtsPlaybackResume: handleTtsPlaybackResume,
|
||||
});
|
||||
|
||||
const handleInterruptAssistantTts = useCallback(() => {
|
||||
sendTtsCancel();
|
||||
ttsGate.current.interrupt();
|
||||
void stop();
|
||||
}, [sendTtsCancel, stop]);
|
||||
|
||||
const handleRecordingComplete = useCallback(
|
||||
(uri: string, durationMs: number) => {
|
||||
@@ -697,8 +941,12 @@ export default function ConversationScreen() {
|
||||
const ok = await startRecording();
|
||||
if (!ok) {
|
||||
Alert.alert(t('recordingPermissionDenied'));
|
||||
return;
|
||||
}
|
||||
}, [startRecording, t]);
|
||||
sendTtsCancel();
|
||||
ttsGate.current.interrupt();
|
||||
void stop();
|
||||
}, [sendTtsCancel, startRecording, stop, t]);
|
||||
|
||||
const scrollListToEndAfterComposerLayout = useCallback(() => {
|
||||
InteractionManager.runAfterInteractions(() => {
|
||||
@@ -872,12 +1120,16 @@ export default function ConversationScreen() {
|
||||
renderItem={({ item }) => (
|
||||
<MessageBubble
|
||||
item={item}
|
||||
listKey={item.listKey}
|
||||
agentName={t('agentName')}
|
||||
meLabel={t('me')}
|
||||
currentPlaybackUri={currentSource}
|
||||
currentPlaybackItem={currentPlaybackItem}
|
||||
playbackIsPlaying={playerStatus === 'playing'}
|
||||
onPlayVoiceExclusive={handlePlayVoiceExclusive}
|
||||
onPausePlayback={handlePausePlayback}
|
||||
onInterruptAssistantTts={handleInterruptAssistantTts}
|
||||
onReplayAssistantTts={handleReplayAssistantTts}
|
||||
/>
|
||||
)}
|
||||
onContentSizeChange={() =>
|
||||
@@ -891,6 +1143,12 @@ export default function ConversationScreen() {
|
||||
streamingText={streamingMessage.text}
|
||||
isComplete={streamingMessage.isComplete}
|
||||
agentName={t('agentName')}
|
||||
streamingTtsActive={
|
||||
!!streamingMessage &&
|
||||
playerStatus === 'playing' &&
|
||||
currentPlaybackItem?.kind === 'tts_auto'
|
||||
}
|
||||
onStreamingPress={handleInterruptAssistantTts}
|
||||
/>
|
||||
) : null
|
||||
}
|
||||
@@ -1075,6 +1333,53 @@ const styles = StyleSheet.create({
|
||||
borderBottomRightRadius: 12,
|
||||
borderBottomLeftRadius: 4,
|
||||
},
|
||||
bubbleAgentTtsActive: {
|
||||
borderWidth: 1.5,
|
||||
borderColor: 'rgba(129, 119, 166, 0.5)',
|
||||
backgroundColor: 'rgba(231, 222, 255, 0.45)',
|
||||
},
|
||||
readingAloudCaption: {
|
||||
fontSize: 12,
|
||||
lineHeight: 16,
|
||||
marginTop: 6,
|
||||
color: CHAT_COLORS.primary,
|
||||
fontWeight: '500',
|
||||
},
|
||||
streamingTtsCaptionRow: {
|
||||
paddingLeft: 50,
|
||||
marginTop: 4,
|
||||
marginBottom: 8,
|
||||
},
|
||||
readAloudRow: {
|
||||
marginTop: 8,
|
||||
paddingTop: 8,
|
||||
borderTopWidth: StyleSheet.hairlineWidth,
|
||||
borderTopColor: 'rgba(0, 0, 0, 0.08)',
|
||||
},
|
||||
readAloudButton: {
|
||||
flexDirection: 'row',
|
||||
alignItems: 'center',
|
||||
gap: 6,
|
||||
alignSelf: 'flex-start',
|
||||
},
|
||||
readAloudButtonInner: {
|
||||
flexDirection: 'row',
|
||||
alignItems: 'center',
|
||||
gap: 6,
|
||||
alignSelf: 'flex-start',
|
||||
},
|
||||
readAloudButtonLabel: {
|
||||
fontSize: 14,
|
||||
fontWeight: '600',
|
||||
color: CHAT_COLORS.primary,
|
||||
},
|
||||
readAloudButtonLabelDisabled: {
|
||||
color: CHAT_COLORS.onSurfaceVariant,
|
||||
fontWeight: '500',
|
||||
},
|
||||
readAloudButtonDisabled: {
|
||||
opacity: 0.72,
|
||||
},
|
||||
bubbleUser: {
|
||||
backgroundColor: CHAT_COLORS.primary,
|
||||
borderTopLeftRadius: 12,
|
||||
|
||||
@@ -49,6 +49,7 @@ function mapServerMessage(raw: RawServerMessage): WsEvent | null {
|
||||
audioUrl: d.audio_url as string | undefined,
|
||||
index: d.index as number | undefined,
|
||||
total: d.total as number | undefined,
|
||||
assistantMessageId: d.assistant_message_id as string | undefined,
|
||||
};
|
||||
|
||||
case 'end_conversation':
|
||||
@@ -166,6 +167,10 @@ export class WsClient {
|
||||
return this.send({ type: 'text', data: { text } });
|
||||
}
|
||||
|
||||
sendTtsCancel(): boolean {
|
||||
return this.send({ type: 'tts_cancel', data: {} });
|
||||
}
|
||||
|
||||
sendEndConversation(): boolean {
|
||||
return this.send({ type: 'end_conversation', data: {} });
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@ export type ClientMessageType =
|
||||
| 'audio_segment'
|
||||
| 'audio_message'
|
||||
| 'transcribe_only'
|
||||
| 'tts_cancel'
|
||||
| 'end_conversation';
|
||||
|
||||
export interface RawServerMessage {
|
||||
@@ -63,6 +64,8 @@ export interface TtsAudioReceivedEvent {
|
||||
audioUrl?: string;
|
||||
index?: number;
|
||||
total?: number;
|
||||
/** 持久化后的助手消息 id(与 REST `messages` 中 `id` 对齐) */
|
||||
assistantMessageId?: string;
|
||||
}
|
||||
|
||||
export interface ConversationEndedEvent {
|
||||
|
||||
@@ -117,6 +117,8 @@ interface UseRealtimeSessionOptions {
|
||||
conversationId: string;
|
||||
enabled?: boolean;
|
||||
onTtsSegment?: (payload: TtsSegmentPayload) => void;
|
||||
/** 用户发出下一条文本/语音成功后调用,用于恢复接受 TTS 片段(打断后丢弃迟到片段) */
|
||||
onTtsPlaybackResume?: () => void;
|
||||
}
|
||||
|
||||
const MIN_RECORDING_DURATION_SEC = 1;
|
||||
@@ -136,12 +138,14 @@ interface RealtimeSessionState {
|
||||
sendText: (text: string) => void;
|
||||
sendVoiceMessage: (uri: string, durationMs: number) => Promise<boolean>;
|
||||
sendEndConversation: () => void;
|
||||
sendTtsCancel: () => void;
|
||||
}
|
||||
|
||||
export function useRealtimeSession({
|
||||
conversationId,
|
||||
enabled = true,
|
||||
onTtsSegment,
|
||||
onTtsPlaybackResume,
|
||||
}: UseRealtimeSessionOptions): RealtimeSessionState {
|
||||
const queryClient = useQueryClient();
|
||||
const sessionRef = useRef<RealtimeSession | null>(null);
|
||||
@@ -207,6 +211,8 @@ export function useRealtimeSession({
|
||||
return;
|
||||
}
|
||||
|
||||
onTtsPlaybackResume?.();
|
||||
|
||||
const localId = `pending_${Date.now()}`;
|
||||
|
||||
queryClient.setQueryData<MessageItem[]>(
|
||||
@@ -224,7 +230,7 @@ export function useRealtimeSession({
|
||||
},
|
||||
);
|
||||
},
|
||||
[conversationId, queryClient],
|
||||
[conversationId, queryClient, onTtsPlaybackResume],
|
||||
);
|
||||
|
||||
const sendVoiceMessage = useCallback(
|
||||
@@ -276,19 +282,24 @@ export function useRealtimeSession({
|
||||
return [...(old ?? []), msg];
|
||||
},
|
||||
);
|
||||
onTtsPlaybackResume?.();
|
||||
return true;
|
||||
} catch {
|
||||
setError('语音文件读取失败');
|
||||
return false;
|
||||
}
|
||||
},
|
||||
[conversationId, queryClient],
|
||||
[conversationId, queryClient, onTtsPlaybackResume],
|
||||
);
|
||||
|
||||
const sendEndConversation = useCallback(() => {
|
||||
sessionRef.current?.sendEndConversation();
|
||||
}, []);
|
||||
|
||||
const sendTtsCancel = useCallback(() => {
|
||||
sessionRef.current?.sendTtsCancel();
|
||||
}, []);
|
||||
|
||||
return {
|
||||
connectionState,
|
||||
streamingMessage,
|
||||
@@ -296,5 +307,6 @@ export function useRealtimeSession({
|
||||
sendText,
|
||||
sendVoiceMessage,
|
||||
sendEndConversation,
|
||||
sendTtsCancel,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -19,6 +19,10 @@ export type ErrorCallback = (message: string, code?: string) => void;
|
||||
export type TtsSegmentPayload = {
|
||||
audioBase64?: string;
|
||||
audioUrl?: string;
|
||||
index?: number;
|
||||
total?: number;
|
||||
/** 服务端持久化后的助手消息 id,用于与气泡 listKey / 消息 id 对齐 */
|
||||
assistantMessageId?: string;
|
||||
};
|
||||
|
||||
interface RealtimeSessionOptions {
|
||||
@@ -116,6 +120,11 @@ export class RealtimeSession {
|
||||
return this.client.sendEndConversation();
|
||||
}
|
||||
|
||||
/** 通知服务端停止当前轮次后续 TTS 合成与下发(与客户端 stop 队列配合) */
|
||||
sendTtsCancel(): boolean {
|
||||
return this.client.sendTtsCancel();
|
||||
}
|
||||
|
||||
getConnectionState(): WsConnectionState {
|
||||
return this.client.getState();
|
||||
}
|
||||
@@ -135,6 +144,9 @@ export class RealtimeSession {
|
||||
this.onTtsSegment?.({
|
||||
audioBase64: b64 || undefined,
|
||||
audioUrl: url || undefined,
|
||||
index: event.index,
|
||||
total: event.total,
|
||||
assistantMessageId: event.assistantMessageId,
|
||||
});
|
||||
}
|
||||
return;
|
||||
|
||||
@@ -66,6 +66,8 @@ export interface MessageItem {
|
||||
durationSeconds?: number;
|
||||
/** 语音文件本地 URI,用于回放,仅本地乐观语音条有值 */
|
||||
audioUri?: string;
|
||||
/** 助手 TTS 已上传的 COS URL 列表(与后端 `ttsAudioUrls` 一致),用于不重合成重复朗读 */
|
||||
ttsAudioUrls?: string[];
|
||||
}
|
||||
|
||||
export interface OrganizeResponse {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { useAudioPlayer, useAudioPlayerStatus } from 'expo-audio';
|
||||
import { useCallback, useEffect, useRef, useState } from 'react';
|
||||
import { useCallback, useEffect, useMemo, useRef, useState } from 'react';
|
||||
|
||||
import { audioFocus } from '@/core/audio/audio-focus';
|
||||
|
||||
@@ -10,6 +10,8 @@ interface UsePlayerResult {
|
||||
queueLength: number;
|
||||
/** Current playback source URI (file, https, or data URL). */
|
||||
currentSource: string | null;
|
||||
/** 当前正在播放的队列项(含 kind / messageRef),队列为空或未开始为 null */
|
||||
currentPlaybackItem: PlaybackItem | null;
|
||||
enqueue: (item: PlaybackItem) => void;
|
||||
/** Replace queue and play this item (e.g. user voice bubble vs other sources). */
|
||||
enqueueExclusive: (item: PlaybackItem) => Promise<void>;
|
||||
@@ -29,6 +31,8 @@ export function usePlayer(): UsePlayerResult {
|
||||
const [status, setStatus] = useState<PlayerStatus>('idle');
|
||||
const [queueLength, setQueueLength] = useState(0);
|
||||
const [currentSource, setCurrentSource] = useState<string | null>(null);
|
||||
const [currentPlaybackItem, setCurrentPlaybackItem] =
|
||||
useState<PlaybackItem | null>(null);
|
||||
const isPlayingRef = useRef(false);
|
||||
const wasBlockedByRecorderRef = useRef(false);
|
||||
const isPlayNextInProgressRef = useRef(false);
|
||||
@@ -37,16 +41,30 @@ export function usePlayer(): UsePlayerResult {
|
||||
/** 当前 source 是否已进入过 playing=true,避免换源瞬间 playerStatus 仍带上一首的 duration 而误判「已播完」。 */
|
||||
const trackHasPlayedRef = useRef(false);
|
||||
|
||||
const player = useAudioPlayer(currentSource, { downloadFirst: false });
|
||||
/** 远程 HTTPS 需先下载再解码,否则再读(仅 URL、无 base64)可能无声;本地/data URL 保持 false */
|
||||
const playerOptions = useMemo(() => {
|
||||
const remote =
|
||||
typeof currentSource === 'string' &&
|
||||
(currentSource.startsWith('https://') ||
|
||||
currentSource.startsWith('http://'));
|
||||
return { downloadFirst: remote };
|
||||
}, [currentSource]);
|
||||
|
||||
const player = useAudioPlayer(currentSource, playerOptions);
|
||||
const playerStatus = useAudioPlayerStatus(player);
|
||||
|
||||
// Start playback when a new source is set
|
||||
/**
|
||||
* 必须在 `isLoaded` 之后再 `play()`。
|
||||
* expo-audio 在 `downloadFirst: true` 时先用 null 建 player,再在内部 effect 里异步
|
||||
* `resolveSourceWithDownload` 后 `replace()`(见 node_modules/expo-audio/build/ExpoAudio.js)。
|
||||
* 若仅在 `currentSource` 变化时立刻 `play()`,会在 replace 完成前播放 → 远程 URL(再读)无声。
|
||||
*/
|
||||
useEffect(() => {
|
||||
if (currentSource && player) {
|
||||
player.play();
|
||||
isPlayingRef.current = true;
|
||||
}
|
||||
}, [currentSource, player]);
|
||||
if (!currentSource || !player) return;
|
||||
if (!playerStatus.isLoaded) return;
|
||||
player.play();
|
||||
isPlayingRef.current = true;
|
||||
}, [currentSource, player, playerStatus.isLoaded]);
|
||||
|
||||
const playNext = useCallback(async () => {
|
||||
if (isPlayNextInProgressRef.current) return;
|
||||
@@ -54,6 +72,7 @@ export function usePlayer(): UsePlayerResult {
|
||||
try {
|
||||
if (queueRef.current.length === 0) {
|
||||
playbackActiveUriRef.current = null;
|
||||
setCurrentPlaybackItem(null);
|
||||
setCurrentSource(null);
|
||||
setStatus('idle');
|
||||
setQueueLength(0);
|
||||
@@ -74,6 +93,7 @@ export function usePlayer(): UsePlayerResult {
|
||||
setStatus('playing');
|
||||
trackHasPlayedRef.current = false;
|
||||
playbackActiveUriRef.current = next.uri;
|
||||
setCurrentPlaybackItem(next);
|
||||
setCurrentSource(next.uri);
|
||||
} finally {
|
||||
isPlayNextInProgressRef.current = false;
|
||||
@@ -147,6 +167,7 @@ export function usePlayer(): UsePlayerResult {
|
||||
player.pause();
|
||||
}
|
||||
playbackActiveUriRef.current = null;
|
||||
setCurrentPlaybackItem(null);
|
||||
setCurrentSource(null);
|
||||
setStatus('idle');
|
||||
await audioFocus.release();
|
||||
@@ -165,6 +186,7 @@ export function usePlayer(): UsePlayerResult {
|
||||
}
|
||||
|
||||
playbackActiveUriRef.current = null;
|
||||
setCurrentPlaybackItem(null);
|
||||
setCurrentSource(null);
|
||||
setStatus('idle');
|
||||
await audioFocus.release();
|
||||
@@ -174,6 +196,7 @@ export function usePlayer(): UsePlayerResult {
|
||||
status,
|
||||
queueLength,
|
||||
currentSource,
|
||||
currentPlaybackItem,
|
||||
enqueue,
|
||||
enqueueExclusive,
|
||||
stop,
|
||||
|
||||
17
app-expo/src/features/voice/tts-playback-gate.ts
Normal file
17
app-expo/src/features/voice/tts-playback-gate.ts
Normal file
@@ -0,0 +1,17 @@
|
||||
/**
|
||||
* 打断 TTS 后服务端仍可能推送迟到的 `tts_audio`;在恢复新一轮对话前丢弃这些片段。
|
||||
* `interrupt` 在录音开始或点气泡停止时调用;`onUserMessageSent` 在用户发出下一条文本/语音成功后调用。
|
||||
*/
|
||||
export function createTtsPlaybackGate() {
|
||||
let dropLateSegments = false;
|
||||
|
||||
return {
|
||||
interrupt: () => {
|
||||
dropLateSegments = true;
|
||||
},
|
||||
onUserMessageSent: () => {
|
||||
dropLateSegments = false;
|
||||
},
|
||||
shouldAcceptIncomingTts: () => !dropLateSegments,
|
||||
};
|
||||
}
|
||||
@@ -31,7 +31,12 @@ export interface SegmentOutboxEntry {
|
||||
|
||||
export type PlayerStatus = 'idle' | 'loading' | 'playing' | 'paused' | 'error';
|
||||
|
||||
export type PlaybackItemKind = 'tts_auto' | 'tts_repeat' | 'voice';
|
||||
|
||||
export interface PlaybackItem {
|
||||
uri: string;
|
||||
label?: string;
|
||||
kind?: PlaybackItemKind;
|
||||
/** 与 `flattenMessagesForList` 的 `listKey` 对齐,用于朗读中高亮与点气泡停止 */
|
||||
messageRef?: { listKey: string };
|
||||
}
|
||||
|
||||
@@ -65,6 +65,7 @@ interface Resources {
|
||||
agentName: 'Life Echo';
|
||||
cancel: 'Cancel';
|
||||
cancelRecording: 'Cancel recording';
|
||||
cannotReadAloud: 'Read unavailable';
|
||||
chatQueueSendTimeout: 'Connection timed out. Check your network and try again.';
|
||||
chatTitle: 'Conversation';
|
||||
chatUnavailableConnecting: 'Reconnecting now. You can keep typing and send once the connection is back.';
|
||||
@@ -83,10 +84,13 @@ interface Resources {
|
||||
inputPlaceholder: 'Type a message...';
|
||||
inputPlaceholderVoice: 'Type here or hold the mic to speak...';
|
||||
me: 'Me';
|
||||
readAloudAgain: 'Play again';
|
||||
readingAloud: 'Reading aloud…';
|
||||
recentChats: 'Recent Chats';
|
||||
recordingPermissionDenied: 'Microphone permission is required to record';
|
||||
send: 'Send';
|
||||
startNewSubtitle: 'Capture a new memory or share your thoughts with your companion.';
|
||||
stopReadingAloud: 'Stop reading aloud';
|
||||
switchToText: 'Switch to text input';
|
||||
switchToVoice: 'Switch to voice input';
|
||||
tapToEndRecording: 'Tap to end';
|
||||
|
||||
@@ -22,6 +22,10 @@
|
||||
"inputPlaceholderVoice": "Type here or hold the mic to speak...",
|
||||
"me": "Me",
|
||||
"recentChats": "Recent Chats",
|
||||
"stopReadingAloud": "Stop reading aloud",
|
||||
"readAloudAgain": "Play again",
|
||||
"cannotReadAloud": "Read unavailable",
|
||||
"readingAloud": "Reading aloud…",
|
||||
"recordingPermissionDenied": "Microphone permission is required to record",
|
||||
"send": "Send",
|
||||
"startNewSubtitle": "Capture a new memory or share your thoughts with your companion.",
|
||||
|
||||
@@ -22,6 +22,10 @@
|
||||
"inputPlaceholderVoice": "点击这里输入,或者按住左边说话...",
|
||||
"me": "我",
|
||||
"recentChats": "最近对话",
|
||||
"stopReadingAloud": "停止朗读",
|
||||
"readAloudAgain": "再读",
|
||||
"cannotReadAloud": "暂无法朗读",
|
||||
"readingAloud": "朗读中…",
|
||||
"recordingPermissionDenied": "需要麦克风权限才能录音",
|
||||
"send": "发送",
|
||||
"startNewSubtitle": "记录新回忆,或与岁月知己分享你的想法。",
|
||||
|
||||
Reference in New Issue
Block a user