fix(conversation): 离屏不丢回复、列表预热 WS 与非阻塞进入聊天
- 后端:文本/转写后 AI 生成改为独立任务,避免断连取消整轮;按需 TTS 等与 WS 改动 - 前端:RealtimeSession 重绑 UI 时恢复流式 buffer;列表 onPressIn/挂载预热、已有会话立即 push - 同步会话相关类型、i18n、测试与 env/资源等累计改动 Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
import type { QueryClient } from '@tanstack/react-query';
|
||||
|
||||
import { acquireBackgroundConversationWs } from './conversation-ws-background-pool';
|
||||
import { conversationMessagesRepository } from './conversation-messages-repository';
|
||||
import { conversationKeys } from './query-keys';
|
||||
import { registerPreparedRealtimeSession } from './prepared-session-registry';
|
||||
@@ -51,6 +52,54 @@ export async function prefetchConversationMessages(
|
||||
});
|
||||
}
|
||||
|
||||
const offscreenUiCallbacks = {
|
||||
onStreamingText: () => {},
|
||||
onTtsSegment: () => {},
|
||||
onError: () => {},
|
||||
onStateChange: () => {},
|
||||
};
|
||||
|
||||
const inflightPrewarms = new Set<string>();
|
||||
|
||||
/**
|
||||
* 列表页/卡片按下时的预热:保持后台 WS 连接,并触发消息缓存填充。
|
||||
* 与 `warmupConversationOpening` 不同:不等待开场白、不阻塞调用方,仅适用于"已有消息"的会话。
|
||||
*/
|
||||
export function prewarmConversationSession(
|
||||
queryClient: QueryClient,
|
||||
conversationId: string,
|
||||
): void {
|
||||
if (!conversationId) return;
|
||||
const session = acquireBackgroundConversationWs(
|
||||
conversationId,
|
||||
queryClient,
|
||||
null,
|
||||
);
|
||||
// 预热阶段没有挂载的 UI,先用空回调占位;聊天页 mount 时会重新 attach。
|
||||
session.attachUiCallbacks(offscreenUiCallbacks);
|
||||
if (inflightPrewarms.has(conversationId)) return;
|
||||
const cached = queryClient.getQueryData<MessageItem[]>(
|
||||
conversationKeys.messages(conversationId),
|
||||
);
|
||||
// 已有缓存就交给 React Query staleTime 决定是否刷新;只对首次进入做后台预取
|
||||
if (cached && cached.length > 0) return;
|
||||
inflightPrewarms.add(conversationId);
|
||||
void prefetchConversationMessages(queryClient, conversationId).finally(() => {
|
||||
inflightPrewarms.delete(conversationId);
|
||||
});
|
||||
}
|
||||
|
||||
async function refreshConversationMessagesForWarmup(
|
||||
queryClient: QueryClient,
|
||||
conversationId: string,
|
||||
): Promise<void> {
|
||||
await queryClient.fetchQuery({
|
||||
queryKey: conversationKeys.messages(conversationId),
|
||||
queryFn: () => conversationMessagesRepository.loadMessages(conversationId),
|
||||
staleTime: 0,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* 在会话列表阶段连接 WS 并等待首条助手开场写入 React Query;成功后挂起会话供聊天页接棒。
|
||||
* 超时或失败则 dispose,由聊天页自行重连(服务端若已写入 history 不会重复开场)。
|
||||
@@ -59,8 +108,13 @@ export async function warmupConversationOpening(
|
||||
queryClient: QueryClient,
|
||||
conversationId: string,
|
||||
): Promise<void> {
|
||||
/**
|
||||
* 先走 REST 历史预取:若 access token 已过期,API client 会在这里刷新 token;
|
||||
* 也避免 Redis/DB 已有开场白但本地缓存仍为空时继续等 WS。
|
||||
*/
|
||||
await refreshConversationMessagesForWarmup(queryClient, conversationId);
|
||||
|
||||
if (cacheHasAssistantMessage(queryClient, conversationId)) {
|
||||
await prefetchConversationMessages(queryClient, conversationId);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -174,6 +174,8 @@ interface UseRealtimeSessionOptions {
|
||||
onTtsSegment?: (payload: TtsSegmentPayload) => void;
|
||||
/** 用户发出下一条文本/语音成功后调用,用于恢复接受 TTS 片段(打断后丢弃迟到片段) */
|
||||
onTtsPlaybackResume?: () => void;
|
||||
/** 本条发送是否请求了「本轮助手朗读」,用于仅在该轮自动播放 WS TTS */
|
||||
onUserSendTtsPreference?: (requestedTts: boolean) => void;
|
||||
}
|
||||
|
||||
const MIN_RECORDING_DURATION_SEC = 1;
|
||||
@@ -192,10 +194,19 @@ interface RealtimeSessionState {
|
||||
/** 已发出用户消息,尚未收到助手首段流式文本(用于「正在回复」气泡) */
|
||||
awaitingAssistantReply: boolean;
|
||||
error: string | null;
|
||||
sendText: (text: string) => void;
|
||||
sendVoiceMessage: (uri: string, durationMs: number) => Promise<boolean>;
|
||||
sendText: (text: string, options?: { ttsThisTurn?: boolean }) => void;
|
||||
sendVoiceMessage: (
|
||||
uri: string,
|
||||
durationMs: number,
|
||||
options?: { ttsThisTurn?: boolean },
|
||||
) => Promise<boolean>;
|
||||
sendEndConversation: () => void;
|
||||
sendTtsCancel: () => void;
|
||||
requestAssistantSegmentTts: (body: {
|
||||
assistantMessageId: string;
|
||||
segmentIndex: number;
|
||||
segmentText?: string;
|
||||
}) => boolean;
|
||||
}
|
||||
|
||||
export function useRealtimeSession({
|
||||
@@ -203,6 +214,7 @@ export function useRealtimeSession({
|
||||
enabled = true,
|
||||
onTtsSegment,
|
||||
onTtsPlaybackResume,
|
||||
onUserSendTtsPreference,
|
||||
}: UseRealtimeSessionOptions): RealtimeSessionState {
|
||||
const queryClient = useQueryClient();
|
||||
const sessionRef = useRef<RealtimeSession | null>(null);
|
||||
@@ -301,15 +313,17 @@ export function useRealtimeSession({
|
||||
}, [conversationId, enabled, queryClient, foregroundResumeGeneration]);
|
||||
|
||||
const sendText = useCallback(
|
||||
(text: string) => {
|
||||
(text: string, options?: { ttsThisTurn?: boolean }) => {
|
||||
if (!sessionRef.current) return;
|
||||
|
||||
const sent = sessionRef.current.sendText(text);
|
||||
const sent = sessionRef.current.sendText(text, options);
|
||||
if (!sent) {
|
||||
setError('消息发送失败,连接未就绪');
|
||||
return;
|
||||
}
|
||||
|
||||
onUserSendTtsPreference?.(options?.ttsThisTurn === true);
|
||||
|
||||
setAwaitingAssistantReply(true);
|
||||
onTtsPlaybackResume?.();
|
||||
|
||||
@@ -342,11 +356,15 @@ export function useRealtimeSession({
|
||||
},
|
||||
);
|
||||
},
|
||||
[conversationId, queryClient, onTtsPlaybackResume],
|
||||
[conversationId, queryClient, onTtsPlaybackResume, onUserSendTtsPreference],
|
||||
);
|
||||
|
||||
const sendVoiceMessage = useCallback(
|
||||
async (uri: string, durationMs: number): Promise<boolean> => {
|
||||
async (
|
||||
uri: string,
|
||||
durationMs: number,
|
||||
options?: { ttsThisTurn?: boolean },
|
||||
): Promise<boolean> => {
|
||||
const session = sessionRef.current;
|
||||
if (!session) return false;
|
||||
|
||||
@@ -363,12 +381,15 @@ export function useRealtimeSession({
|
||||
clientSegmentId: `${voiceSessionId}-0`,
|
||||
isLast: true,
|
||||
duration: durationSec,
|
||||
ttsThisTurn: options?.ttsThisTurn,
|
||||
});
|
||||
if (!sent) {
|
||||
setError('语音发送失败,连接未就绪');
|
||||
return false;
|
||||
}
|
||||
|
||||
onUserSendTtsPreference?.(options?.ttsThisTurn === true);
|
||||
|
||||
setAwaitingAssistantReply(true);
|
||||
const localId = `pending_voice_${Date.now()}`;
|
||||
await voiceSegmentStore.recordSentSegment({
|
||||
@@ -413,7 +434,7 @@ export function useRealtimeSession({
|
||||
return false;
|
||||
}
|
||||
},
|
||||
[conversationId, queryClient, onTtsPlaybackResume],
|
||||
[conversationId, queryClient, onTtsPlaybackResume, onUserSendTtsPreference],
|
||||
);
|
||||
|
||||
const sendEndConversation = useCallback(() => {
|
||||
@@ -424,6 +445,15 @@ export function useRealtimeSession({
|
||||
sessionRef.current?.sendTtsCancel();
|
||||
}, []);
|
||||
|
||||
const requestAssistantSegmentTts = useCallback(
|
||||
(body: {
|
||||
assistantMessageId: string;
|
||||
segmentIndex: number;
|
||||
segmentText?: string;
|
||||
}) => sessionRef.current?.requestAssistantSegmentTts(body) ?? false,
|
||||
[],
|
||||
);
|
||||
|
||||
return {
|
||||
connectionState,
|
||||
streamingMessage,
|
||||
@@ -433,5 +463,6 @@ export function useRealtimeSession({
|
||||
sendVoiceMessage,
|
||||
sendEndConversation,
|
||||
sendTtsCancel,
|
||||
requestAssistantSegmentTts,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -12,6 +12,13 @@ import { assistantSegmentMessageId, lastSegmentPreview } from './message-split';
|
||||
import { conversationKeys } from './query-keys';
|
||||
import type { ConversationListItem, MessageItem } from './types';
|
||||
|
||||
/** 与落库助手消息 id、会话页 `durableAssistantIdForBubble` 的 uuid 判断一致 */
|
||||
function looksLikeUuidAssistantMessageId(id: string): boolean {
|
||||
return /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i.test(
|
||||
id,
|
||||
);
|
||||
}
|
||||
|
||||
export type StreamingTextCallback = (text: string, isComplete: boolean) => void;
|
||||
export type ErrorCallback = (message: string, code?: string) => void;
|
||||
|
||||
@@ -23,6 +30,8 @@ export type TtsSegmentPayload = {
|
||||
total?: number;
|
||||
/** 服务端持久化后的助手消息 id,用于与气泡 listKey / 消息 id 对齐 */
|
||||
assistantMessageId?: string;
|
||||
/** 用户点喇叭按需下发时为 true,应加入播放队列(即使未开「本轮朗读」) */
|
||||
manual?: boolean;
|
||||
};
|
||||
|
||||
interface RealtimeSessionOptions {
|
||||
@@ -62,6 +71,17 @@ export class RealtimeSession {
|
||||
private pendingAssistantMessageId: string | null = null;
|
||||
private destroyed = false;
|
||||
|
||||
/** 本条用户消息是否请求「先 TTS 再出字」的助手轮次 */
|
||||
private assistantTurnTtsSync = false;
|
||||
private pendingTtsByKey = new Map<string, TtsSegmentPayload>();
|
||||
|
||||
private static bufferedTtsKey(
|
||||
assistantMessageId: string | undefined,
|
||||
index: number,
|
||||
): string {
|
||||
return `${assistantMessageId ?? '_'}:${index}`;
|
||||
}
|
||||
|
||||
constructor(options: RealtimeSessionOptions) {
|
||||
this.client = new WsClient(options.conversationId);
|
||||
this.conversationId = options.conversationId;
|
||||
@@ -99,6 +119,9 @@ export class RealtimeSession {
|
||||
this.uiStateListener = options.onStateChange;
|
||||
options.onStateChange(this.client.getState());
|
||||
}
|
||||
if (!this.assistantTurnTtsSync && this.streamingBuffer.trim().length > 0) {
|
||||
this.onStreamingText?.(this.streamingBuffer, false);
|
||||
}
|
||||
}
|
||||
|
||||
async connect(): Promise<void> {
|
||||
@@ -113,14 +136,20 @@ export class RealtimeSession {
|
||||
if (this.destroyed) return;
|
||||
this.destroyed = true;
|
||||
this.flushStreamingBufferIfPending();
|
||||
this.resetAssistantTtsSyncState();
|
||||
this.unsubEvent?.();
|
||||
this.unsubState?.();
|
||||
this.client.dispose();
|
||||
}
|
||||
|
||||
/** Returns true if the message was sent over the socket. */
|
||||
sendText(text: string): boolean {
|
||||
return this.client.sendText(text);
|
||||
sendText(
|
||||
text: string,
|
||||
options?: { ttsThisTurn?: boolean },
|
||||
): boolean {
|
||||
const tts = !!options?.ttsThisTurn;
|
||||
this.assistantTurnTtsSync = tts;
|
||||
return this.client.sendText(text, { ttsThisTurn: tts });
|
||||
}
|
||||
|
||||
sendAudioSegment(
|
||||
@@ -131,8 +160,11 @@ export class RealtimeSession {
|
||||
clientSegmentId?: string;
|
||||
isLast?: boolean;
|
||||
duration?: number;
|
||||
ttsThisTurn?: boolean;
|
||||
},
|
||||
): boolean {
|
||||
const tts = !!options?.ttsThisTurn;
|
||||
this.assistantTurnTtsSync = tts;
|
||||
return this.client.send({
|
||||
type: 'audio_segment',
|
||||
data: {
|
||||
@@ -142,6 +174,7 @@ export class RealtimeSession {
|
||||
client_segment_id: options?.clientSegmentId,
|
||||
is_last: options?.isLast,
|
||||
duration: options?.duration,
|
||||
...(options?.ttsThisTurn === true ? { tts_this_turn: true } : {}),
|
||||
},
|
||||
});
|
||||
}
|
||||
@@ -152,6 +185,7 @@ export class RealtimeSession {
|
||||
|
||||
/** 通知服务端停止当前轮次后续 TTS 合成与下发(与客户端 stop 队列配合) */
|
||||
sendTtsCancel(): boolean {
|
||||
this.resetAssistantTtsSyncState();
|
||||
return this.client.sendTtsCancel();
|
||||
}
|
||||
|
||||
@@ -159,8 +193,40 @@ export class RealtimeSession {
|
||||
return this.client.getState();
|
||||
}
|
||||
|
||||
requestAssistantSegmentTts(body: {
|
||||
assistantMessageId: string;
|
||||
segmentIndex: number;
|
||||
segmentText?: string;
|
||||
}): boolean {
|
||||
return this.client.sendTtsRequest(body);
|
||||
}
|
||||
|
||||
// ─── Internal ───
|
||||
|
||||
private resetAssistantTtsSyncState(): void {
|
||||
this.assistantTurnTtsSync = false;
|
||||
this.pendingTtsByKey.clear();
|
||||
}
|
||||
|
||||
private flushBufferedTtsIfSync(
|
||||
assistantMessageId: string | undefined,
|
||||
index: number,
|
||||
): void {
|
||||
if (!this.assistantTurnTtsSync) return;
|
||||
const key = RealtimeSession.bufferedTtsKey(assistantMessageId, index);
|
||||
const payload = this.pendingTtsByKey.get(key);
|
||||
if (payload) {
|
||||
this.pendingTtsByKey.delete(key);
|
||||
this.onTtsSegment?.(payload);
|
||||
}
|
||||
}
|
||||
|
||||
private finishAssistantTurnIfLastSegment(index: number, total: number): void {
|
||||
if (index >= total - 1) {
|
||||
this.resetAssistantTtsSyncState();
|
||||
}
|
||||
}
|
||||
|
||||
private handleEvent: WsEventListener = (event: WsEvent) => {
|
||||
if (event.kind === 'agent_response') {
|
||||
this.handleAgentChunk(event);
|
||||
@@ -170,14 +236,23 @@ export class RealtimeSession {
|
||||
if (event.kind === 'tts_audio_received') {
|
||||
const b64 = event.audioBase64?.trim();
|
||||
const url = event.audioUrl?.trim();
|
||||
if (b64 || url) {
|
||||
this.onTtsSegment?.({
|
||||
audioBase64: b64 || undefined,
|
||||
audioUrl: url || undefined,
|
||||
index: event.index,
|
||||
total: event.total,
|
||||
assistantMessageId: event.assistantMessageId,
|
||||
});
|
||||
if (!b64 && !url) {
|
||||
return;
|
||||
}
|
||||
const payload: TtsSegmentPayload = {
|
||||
audioBase64: b64 || undefined,
|
||||
audioUrl: url || undefined,
|
||||
index: event.index,
|
||||
total: event.total,
|
||||
assistantMessageId: event.assistantMessageId,
|
||||
manual: event.manual,
|
||||
};
|
||||
if (this.assistantTurnTtsSync && !payload.manual) {
|
||||
const idx = event.index ?? 0;
|
||||
const key = RealtimeSession.bufferedTtsKey(event.assistantMessageId, idx);
|
||||
this.pendingTtsByKey.set(key, payload);
|
||||
} else {
|
||||
this.onTtsSegment?.(payload);
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -185,6 +260,7 @@ export class RealtimeSession {
|
||||
handleWsEvent(this.queryClient, event);
|
||||
|
||||
if (event.kind === 'session_error') {
|
||||
this.resetAssistantTtsSyncState();
|
||||
this.onError?.(event.message, event.code);
|
||||
}
|
||||
};
|
||||
@@ -208,14 +284,19 @@ export class RealtimeSession {
|
||||
|
||||
const total = event.total ?? 1;
|
||||
const index = event.index ?? 0;
|
||||
const sync = this.assistantTurnTtsSync;
|
||||
|
||||
if (total > 1) {
|
||||
const id =
|
||||
event.assistantMessageId != null
|
||||
? assistantSegmentMessageId(event.assistantMessageId, index)
|
||||
: `${this.conversationId}_agent_${Date.now()}_${index}`;
|
||||
if (sync) {
|
||||
this.flushBufferedTtsIfSync(event.assistantMessageId, index);
|
||||
}
|
||||
this.commitOneAssistantMessage(event.text, id);
|
||||
this.onStreamingText?.(event.text, true);
|
||||
this.finishAssistantTurnIfLastSegment(index, total);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -224,18 +305,30 @@ export class RealtimeSession {
|
||||
}
|
||||
|
||||
this.streamingBuffer += event.text;
|
||||
|
||||
// 与 coerced index/total 对齐:若服务端只带 text、省略 index/total,旧逻辑会 isComplete=false,永远不落库
|
||||
const isComplete = index >= total - 1;
|
||||
|
||||
this.onStreamingText?.(this.streamingBuffer, isComplete);
|
||||
if (!sync) {
|
||||
this.onStreamingText?.(this.streamingBuffer, isComplete);
|
||||
}
|
||||
|
||||
if (isComplete) {
|
||||
const assistantId =
|
||||
event.assistantMessageId ?? this.pendingAssistantMessageId;
|
||||
const id =
|
||||
this.pendingAssistantMessageId ??
|
||||
`${this.conversationId}_agent_${Date.now()}`;
|
||||
this.commitStreamingBufferWithId(id);
|
||||
if (sync) {
|
||||
this.flushBufferedTtsIfSync(assistantId ?? undefined, 0);
|
||||
this.commitStreamingBufferWithId(id);
|
||||
const visible =
|
||||
this.streamingBuffer.trim().length > 0 ? this.streamingBuffer : '…';
|
||||
this.onStreamingText?.(visible, true);
|
||||
} else {
|
||||
this.commitStreamingBufferWithId(id);
|
||||
}
|
||||
this.streamingBuffer = '';
|
||||
this.pendingAssistantMessageId = null;
|
||||
this.finishAssistantTurnIfLastSegment(0, 1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -249,6 +342,9 @@ export class RealtimeSession {
|
||||
senderType: 'assistant',
|
||||
timestamp: Date.now(),
|
||||
messageType: 'text',
|
||||
...(looksLikeUuidAssistantMessageId(id)
|
||||
? { durableMessageId: id }
|
||||
: {}),
|
||||
};
|
||||
return [...(old ?? []), message];
|
||||
});
|
||||
@@ -262,7 +358,6 @@ export class RealtimeSession {
|
||||
}
|
||||
|
||||
const fullText = this.streamingBuffer;
|
||||
this.streamingBuffer = '';
|
||||
const content = fullText.trim().length > 0 ? fullText : '…';
|
||||
|
||||
const messagesKey = conversationKeys.messages(this.conversationId);
|
||||
@@ -274,6 +369,9 @@ export class RealtimeSession {
|
||||
senderType: 'assistant',
|
||||
timestamp: Date.now(),
|
||||
messageType: 'text',
|
||||
...(looksLikeUuidAssistantMessageId(messageId)
|
||||
? { durableMessageId: messageId }
|
||||
: {}),
|
||||
};
|
||||
return [...(old ?? []), message];
|
||||
});
|
||||
@@ -305,6 +403,7 @@ export class RealtimeSession {
|
||||
this.pendingAssistantMessageId ??
|
||||
`${this.conversationId}_agent_${Date.now()}`;
|
||||
this.commitStreamingBufferWithId(id);
|
||||
this.streamingBuffer = '';
|
||||
this.pendingAssistantMessageId = null;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -72,6 +72,8 @@ export interface MessageItem {
|
||||
audioUri?: string;
|
||||
/** 助手 TTS 已上传的 COS URL 列表(与后端 `ttsAudioUrls` 一致),用于不重合成重复朗读 */
|
||||
ttsAudioUrls?: string[];
|
||||
/** 落库后的助手消息 id(REST 历史同步),用于按需 TTS 请求 */
|
||||
durableMessageId?: string;
|
||||
}
|
||||
|
||||
export interface OrganizeResponse {
|
||||
|
||||
Reference in New Issue
Block a user