fix(conversation): 离屏不丢回复、列表预热 WS 与非阻塞进入聊天

- 后端:文本/转写后 AI 生成改为独立任务,避免断连取消整轮;按需 TTS 等与 WS 改动
- 前端:RealtimeSession 重绑 UI 时恢复流式 buffer;列表 onPressIn/挂载预热、已有会话立即 push
- 同步会话相关类型、i18n、测试与 env/资源等累计改动

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Kevin
2026-05-08 17:28:31 +08:00
parent 5dac3efd52
commit d0c26242db
44 changed files with 1209 additions and 212 deletions

View File

@@ -1,5 +1,6 @@
import type { QueryClient } from '@tanstack/react-query';
import { acquireBackgroundConversationWs } from './conversation-ws-background-pool';
import { conversationMessagesRepository } from './conversation-messages-repository';
import { conversationKeys } from './query-keys';
import { registerPreparedRealtimeSession } from './prepared-session-registry';
@@ -51,6 +52,54 @@ export async function prefetchConversationMessages(
});
}
const offscreenUiCallbacks = {
onStreamingText: () => {},
onTtsSegment: () => {},
onError: () => {},
onStateChange: () => {},
};
const inflightPrewarms = new Set<string>();
/**
* 列表页/卡片按下时的预热:保持后台 WS 连接,并触发消息缓存填充。
* 与 `warmupConversationOpening` 不同:不等待开场白、不阻塞调用方,仅适用于"已有消息"的会话。
*/
export function prewarmConversationSession(
queryClient: QueryClient,
conversationId: string,
): void {
if (!conversationId) return;
const session = acquireBackgroundConversationWs(
conversationId,
queryClient,
null,
);
// 预热阶段没有挂载的 UI先用空回调占位聊天页 mount 时会重新 attach。
session.attachUiCallbacks(offscreenUiCallbacks);
if (inflightPrewarms.has(conversationId)) return;
const cached = queryClient.getQueryData<MessageItem[]>(
conversationKeys.messages(conversationId),
);
// 已有缓存就交给 React Query staleTime 决定是否刷新;只对首次进入做后台预取
if (cached && cached.length > 0) return;
inflightPrewarms.add(conversationId);
void prefetchConversationMessages(queryClient, conversationId).finally(() => {
inflightPrewarms.delete(conversationId);
});
}
async function refreshConversationMessagesForWarmup(
queryClient: QueryClient,
conversationId: string,
): Promise<void> {
await queryClient.fetchQuery({
queryKey: conversationKeys.messages(conversationId),
queryFn: () => conversationMessagesRepository.loadMessages(conversationId),
staleTime: 0,
});
}
/**
* 在会话列表阶段连接 WS 并等待首条助手开场写入 React Query成功后挂起会话供聊天页接棒。
* 超时或失败则 dispose由聊天页自行重连服务端若已写入 history 不会重复开场)。
@@ -59,8 +108,13 @@ export async function warmupConversationOpening(
queryClient: QueryClient,
conversationId: string,
): Promise<void> {
/**
* 先走 REST 历史预取:若 access token 已过期API client 会在这里刷新 token
* 也避免 Redis/DB 已有开场白但本地缓存仍为空时继续等 WS。
*/
await refreshConversationMessagesForWarmup(queryClient, conversationId);
if (cacheHasAssistantMessage(queryClient, conversationId)) {
await prefetchConversationMessages(queryClient, conversationId);
return;
}

View File

@@ -174,6 +174,8 @@ interface UseRealtimeSessionOptions {
onTtsSegment?: (payload: TtsSegmentPayload) => void;
/** 用户发出下一条文本/语音成功后调用,用于恢复接受 TTS 片段(打断后丢弃迟到片段) */
onTtsPlaybackResume?: () => void;
/** 本条发送是否请求了「本轮助手朗读」,用于仅在该轮自动播放 WS TTS */
onUserSendTtsPreference?: (requestedTts: boolean) => void;
}
const MIN_RECORDING_DURATION_SEC = 1;
@@ -192,10 +194,19 @@ interface RealtimeSessionState {
/** 已发出用户消息,尚未收到助手首段流式文本(用于「正在回复」气泡) */
awaitingAssistantReply: boolean;
error: string | null;
sendText: (text: string) => void;
sendVoiceMessage: (uri: string, durationMs: number) => Promise<boolean>;
sendText: (text: string, options?: { ttsThisTurn?: boolean }) => void;
sendVoiceMessage: (
uri: string,
durationMs: number,
options?: { ttsThisTurn?: boolean },
) => Promise<boolean>;
sendEndConversation: () => void;
sendTtsCancel: () => void;
requestAssistantSegmentTts: (body: {
assistantMessageId: string;
segmentIndex: number;
segmentText?: string;
}) => boolean;
}
export function useRealtimeSession({
@@ -203,6 +214,7 @@ export function useRealtimeSession({
enabled = true,
onTtsSegment,
onTtsPlaybackResume,
onUserSendTtsPreference,
}: UseRealtimeSessionOptions): RealtimeSessionState {
const queryClient = useQueryClient();
const sessionRef = useRef<RealtimeSession | null>(null);
@@ -301,15 +313,17 @@ export function useRealtimeSession({
}, [conversationId, enabled, queryClient, foregroundResumeGeneration]);
const sendText = useCallback(
(text: string) => {
(text: string, options?: { ttsThisTurn?: boolean }) => {
if (!sessionRef.current) return;
const sent = sessionRef.current.sendText(text);
const sent = sessionRef.current.sendText(text, options);
if (!sent) {
setError('消息发送失败,连接未就绪');
return;
}
onUserSendTtsPreference?.(options?.ttsThisTurn === true);
setAwaitingAssistantReply(true);
onTtsPlaybackResume?.();
@@ -342,11 +356,15 @@ export function useRealtimeSession({
},
);
},
[conversationId, queryClient, onTtsPlaybackResume],
[conversationId, queryClient, onTtsPlaybackResume, onUserSendTtsPreference],
);
const sendVoiceMessage = useCallback(
async (uri: string, durationMs: number): Promise<boolean> => {
async (
uri: string,
durationMs: number,
options?: { ttsThisTurn?: boolean },
): Promise<boolean> => {
const session = sessionRef.current;
if (!session) return false;
@@ -363,12 +381,15 @@ export function useRealtimeSession({
clientSegmentId: `${voiceSessionId}-0`,
isLast: true,
duration: durationSec,
ttsThisTurn: options?.ttsThisTurn,
});
if (!sent) {
setError('语音发送失败,连接未就绪');
return false;
}
onUserSendTtsPreference?.(options?.ttsThisTurn === true);
setAwaitingAssistantReply(true);
const localId = `pending_voice_${Date.now()}`;
await voiceSegmentStore.recordSentSegment({
@@ -413,7 +434,7 @@ export function useRealtimeSession({
return false;
}
},
[conversationId, queryClient, onTtsPlaybackResume],
[conversationId, queryClient, onTtsPlaybackResume, onUserSendTtsPreference],
);
const sendEndConversation = useCallback(() => {
@@ -424,6 +445,15 @@ export function useRealtimeSession({
sessionRef.current?.sendTtsCancel();
}, []);
const requestAssistantSegmentTts = useCallback(
(body: {
assistantMessageId: string;
segmentIndex: number;
segmentText?: string;
}) => sessionRef.current?.requestAssistantSegmentTts(body) ?? false,
[],
);
return {
connectionState,
streamingMessage,
@@ -433,5 +463,6 @@ export function useRealtimeSession({
sendVoiceMessage,
sendEndConversation,
sendTtsCancel,
requestAssistantSegmentTts,
};
}

View File

@@ -12,6 +12,13 @@ import { assistantSegmentMessageId, lastSegmentPreview } from './message-split';
import { conversationKeys } from './query-keys';
import type { ConversationListItem, MessageItem } from './types';
/** 与落库助手消息 id、会话页 `durableAssistantIdForBubble` 的 uuid 判断一致 */
function looksLikeUuidAssistantMessageId(id: string): boolean {
return /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i.test(
id,
);
}
export type StreamingTextCallback = (text: string, isComplete: boolean) => void;
export type ErrorCallback = (message: string, code?: string) => void;
@@ -23,6 +30,8 @@ export type TtsSegmentPayload = {
total?: number;
/** 服务端持久化后的助手消息 id用于与气泡 listKey / 消息 id 对齐 */
assistantMessageId?: string;
/** 用户点喇叭按需下发时为 true应加入播放队列即使未开「本轮朗读」 */
manual?: boolean;
};
interface RealtimeSessionOptions {
@@ -62,6 +71,17 @@ export class RealtimeSession {
private pendingAssistantMessageId: string | null = null;
private destroyed = false;
/** 本条用户消息是否请求「先 TTS 再出字」的助手轮次 */
private assistantTurnTtsSync = false;
private pendingTtsByKey = new Map<string, TtsSegmentPayload>();
private static bufferedTtsKey(
assistantMessageId: string | undefined,
index: number,
): string {
return `${assistantMessageId ?? '_'}:${index}`;
}
constructor(options: RealtimeSessionOptions) {
this.client = new WsClient(options.conversationId);
this.conversationId = options.conversationId;
@@ -99,6 +119,9 @@ export class RealtimeSession {
this.uiStateListener = options.onStateChange;
options.onStateChange(this.client.getState());
}
if (!this.assistantTurnTtsSync && this.streamingBuffer.trim().length > 0) {
this.onStreamingText?.(this.streamingBuffer, false);
}
}
async connect(): Promise<void> {
@@ -113,14 +136,20 @@ export class RealtimeSession {
if (this.destroyed) return;
this.destroyed = true;
this.flushStreamingBufferIfPending();
this.resetAssistantTtsSyncState();
this.unsubEvent?.();
this.unsubState?.();
this.client.dispose();
}
/** Returns true if the message was sent over the socket. */
sendText(text: string): boolean {
return this.client.sendText(text);
sendText(
text: string,
options?: { ttsThisTurn?: boolean },
): boolean {
const tts = !!options?.ttsThisTurn;
this.assistantTurnTtsSync = tts;
return this.client.sendText(text, { ttsThisTurn: tts });
}
sendAudioSegment(
@@ -131,8 +160,11 @@ export class RealtimeSession {
clientSegmentId?: string;
isLast?: boolean;
duration?: number;
ttsThisTurn?: boolean;
},
): boolean {
const tts = !!options?.ttsThisTurn;
this.assistantTurnTtsSync = tts;
return this.client.send({
type: 'audio_segment',
data: {
@@ -142,6 +174,7 @@ export class RealtimeSession {
client_segment_id: options?.clientSegmentId,
is_last: options?.isLast,
duration: options?.duration,
...(options?.ttsThisTurn === true ? { tts_this_turn: true } : {}),
},
});
}
@@ -152,6 +185,7 @@ export class RealtimeSession {
/** 通知服务端停止当前轮次后续 TTS 合成与下发(与客户端 stop 队列配合) */
sendTtsCancel(): boolean {
this.resetAssistantTtsSyncState();
return this.client.sendTtsCancel();
}
@@ -159,8 +193,40 @@ export class RealtimeSession {
return this.client.getState();
}
requestAssistantSegmentTts(body: {
assistantMessageId: string;
segmentIndex: number;
segmentText?: string;
}): boolean {
return this.client.sendTtsRequest(body);
}
// ─── Internal ───
private resetAssistantTtsSyncState(): void {
this.assistantTurnTtsSync = false;
this.pendingTtsByKey.clear();
}
private flushBufferedTtsIfSync(
assistantMessageId: string | undefined,
index: number,
): void {
if (!this.assistantTurnTtsSync) return;
const key = RealtimeSession.bufferedTtsKey(assistantMessageId, index);
const payload = this.pendingTtsByKey.get(key);
if (payload) {
this.pendingTtsByKey.delete(key);
this.onTtsSegment?.(payload);
}
}
private finishAssistantTurnIfLastSegment(index: number, total: number): void {
if (index >= total - 1) {
this.resetAssistantTtsSyncState();
}
}
private handleEvent: WsEventListener = (event: WsEvent) => {
if (event.kind === 'agent_response') {
this.handleAgentChunk(event);
@@ -170,14 +236,23 @@ export class RealtimeSession {
if (event.kind === 'tts_audio_received') {
const b64 = event.audioBase64?.trim();
const url = event.audioUrl?.trim();
if (b64 || url) {
this.onTtsSegment?.({
audioBase64: b64 || undefined,
audioUrl: url || undefined,
index: event.index,
total: event.total,
assistantMessageId: event.assistantMessageId,
});
if (!b64 && !url) {
return;
}
const payload: TtsSegmentPayload = {
audioBase64: b64 || undefined,
audioUrl: url || undefined,
index: event.index,
total: event.total,
assistantMessageId: event.assistantMessageId,
manual: event.manual,
};
if (this.assistantTurnTtsSync && !payload.manual) {
const idx = event.index ?? 0;
const key = RealtimeSession.bufferedTtsKey(event.assistantMessageId, idx);
this.pendingTtsByKey.set(key, payload);
} else {
this.onTtsSegment?.(payload);
}
return;
}
@@ -185,6 +260,7 @@ export class RealtimeSession {
handleWsEvent(this.queryClient, event);
if (event.kind === 'session_error') {
this.resetAssistantTtsSyncState();
this.onError?.(event.message, event.code);
}
};
@@ -208,14 +284,19 @@ export class RealtimeSession {
const total = event.total ?? 1;
const index = event.index ?? 0;
const sync = this.assistantTurnTtsSync;
if (total > 1) {
const id =
event.assistantMessageId != null
? assistantSegmentMessageId(event.assistantMessageId, index)
: `${this.conversationId}_agent_${Date.now()}_${index}`;
if (sync) {
this.flushBufferedTtsIfSync(event.assistantMessageId, index);
}
this.commitOneAssistantMessage(event.text, id);
this.onStreamingText?.(event.text, true);
this.finishAssistantTurnIfLastSegment(index, total);
return;
}
@@ -224,18 +305,30 @@ export class RealtimeSession {
}
this.streamingBuffer += event.text;
// 与 coerced index/total 对齐:若服务端只带 text、省略 index/total旧逻辑会 isComplete=false永远不落库
const isComplete = index >= total - 1;
this.onStreamingText?.(this.streamingBuffer, isComplete);
if (!sync) {
this.onStreamingText?.(this.streamingBuffer, isComplete);
}
if (isComplete) {
const assistantId =
event.assistantMessageId ?? this.pendingAssistantMessageId;
const id =
this.pendingAssistantMessageId ??
`${this.conversationId}_agent_${Date.now()}`;
this.commitStreamingBufferWithId(id);
if (sync) {
this.flushBufferedTtsIfSync(assistantId ?? undefined, 0);
this.commitStreamingBufferWithId(id);
const visible =
this.streamingBuffer.trim().length > 0 ? this.streamingBuffer : '…';
this.onStreamingText?.(visible, true);
} else {
this.commitStreamingBufferWithId(id);
}
this.streamingBuffer = '';
this.pendingAssistantMessageId = null;
this.finishAssistantTurnIfLastSegment(0, 1);
}
}
@@ -249,6 +342,9 @@ export class RealtimeSession {
senderType: 'assistant',
timestamp: Date.now(),
messageType: 'text',
...(looksLikeUuidAssistantMessageId(id)
? { durableMessageId: id }
: {}),
};
return [...(old ?? []), message];
});
@@ -262,7 +358,6 @@ export class RealtimeSession {
}
const fullText = this.streamingBuffer;
this.streamingBuffer = '';
const content = fullText.trim().length > 0 ? fullText : '…';
const messagesKey = conversationKeys.messages(this.conversationId);
@@ -274,6 +369,9 @@ export class RealtimeSession {
senderType: 'assistant',
timestamp: Date.now(),
messageType: 'text',
...(looksLikeUuidAssistantMessageId(messageId)
? { durableMessageId: messageId }
: {}),
};
return [...(old ?? []), message];
});
@@ -305,6 +403,7 @@ export class RealtimeSession {
this.pendingAssistantMessageId ??
`${this.conversationId}_agent_${Date.now()}`;
this.commitStreamingBufferWithId(id);
this.streamingBuffer = '';
this.pendingAssistantMessageId = null;
}
}

View File

@@ -72,6 +72,8 @@ export interface MessageItem {
audioUri?: string;
/** 助手 TTS 已上传的 COS URL 列表(与后端 `ttsAudioUrls` 一致),用于不重合成重复朗读 */
ttsAudioUrls?: string[];
/** 落库后的助手消息 idREST 历史同步),用于按需 TTS 请求 */
durableMessageId?: string;
}
export interface OrganizeResponse {