fix(conversation): 修复实时会话 TTS/回复被离屏 WS 抢占
- 列表预热仅预取消息缓存,避免后台 WebSocket 覆盖服务端连接 - RealtimeSession UI 回调按 owner 独占,防止 offscreen 覆盖聊天页 - 列表页聚焦时再 prewarm,会话页 TTS 入队优先 base64 - 管线下发 TTS 同时带 audio_base64 与 audio_url;协议说明同步 - 移除 TTS 排查用前后端调试日志,保留错误/告警 - 补充 WS / RealtimeSession / entry-warmup / 播放器相关单测 Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
import { QueryClient } from '@tanstack/react-query';
|
||||
|
||||
import {
|
||||
prewarmConversationSession,
|
||||
prefetchConversationMessages,
|
||||
warmupConversationOpening,
|
||||
} from '@/features/conversation/entry-warmup';
|
||||
@@ -90,6 +91,20 @@ describe('conversation entry warmup', () => {
|
||||
).resolves.toBeUndefined();
|
||||
});
|
||||
|
||||
test('prewarms existing conversations without opening an offscreen websocket', async () => {
|
||||
const existing = assistantMessage();
|
||||
mockLoadMessages.mockResolvedValueOnce([existing]);
|
||||
|
||||
prewarmConversationSession(queryClient, 'conv-1');
|
||||
await new Promise((r) => setImmediate(r));
|
||||
|
||||
expect(mockLoadMessages).toHaveBeenCalledWith('conv-1');
|
||||
expect(mockSessions).toHaveLength(0);
|
||||
expect(
|
||||
queryClient.getQueryData(conversationKeys.messages('conv-1')),
|
||||
).toEqual([existing]);
|
||||
});
|
||||
|
||||
test('uses refreshed history and skips websocket when opening is already cached', async () => {
|
||||
const existing = assistantMessage();
|
||||
mockLoadMessages.mockResolvedValueOnce([existing]);
|
||||
|
||||
@@ -0,0 +1,253 @@
|
||||
import { QueryClient } from '@tanstack/react-query';
|
||||
|
||||
import { RealtimeSession } from '@/features/conversation/realtime-session';
|
||||
import { conversationKeys } from '@/features/conversation/query-keys';
|
||||
import type { MessageItem } from '@/features/conversation/types';
|
||||
|
||||
jest.mock('@/core/auth/token-manager', () => ({
|
||||
tokenManager: {
|
||||
getAccessToken: jest.fn().mockResolvedValue('test-token'),
|
||||
},
|
||||
}));
|
||||
|
||||
jest.mock('@/core/config', () => ({
|
||||
config: {
|
||||
wsBaseUrl: 'ws://localhost:8000/',
|
||||
ws: {
|
||||
reconnectMaxRetries: 3,
|
||||
reconnectBaseDelayMs: 10,
|
||||
reconnectMaxDelayMs: 100,
|
||||
heartbeatIntervalMs: 600000,
|
||||
},
|
||||
},
|
||||
}));
|
||||
|
||||
class MockWebSocket {
|
||||
static OPEN = 1;
|
||||
static CLOSED = 3;
|
||||
static instances: MockWebSocket[] = [];
|
||||
|
||||
readyState = MockWebSocket.OPEN;
|
||||
onopen: (() => void) | null = null;
|
||||
onmessage: ((event: { data: string }) => void) | null = null;
|
||||
onclose: (() => void) | null = null;
|
||||
onerror: (() => void) | null = null;
|
||||
|
||||
constructor(public url: string) {
|
||||
MockWebSocket.instances.push(this);
|
||||
queueMicrotask(() => this.onopen?.());
|
||||
}
|
||||
|
||||
send(): void {}
|
||||
|
||||
close(): void {
|
||||
this.readyState = MockWebSocket.CLOSED;
|
||||
}
|
||||
|
||||
simulateMessage(data: Record<string, unknown>): void {
|
||||
this.onmessage?.({ data: JSON.stringify(data) });
|
||||
}
|
||||
}
|
||||
|
||||
(global as Record<string, unknown>).WebSocket = MockWebSocket;
|
||||
|
||||
function msgs(qc: QueryClient, cid: string): MessageItem[] {
|
||||
return qc.getQueryData<MessageItem[]>(conversationKeys.messages(cid)) ?? [];
|
||||
}
|
||||
|
||||
describe('RealtimeSession sync TTS / agent ordering', () => {
|
||||
let qc: QueryClient;
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
MockWebSocket.instances = [];
|
||||
qc = new QueryClient();
|
||||
qc.setQueryData(conversationKeys.messages('conv-x'), []);
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await new Promise((r) => setImmediate(r));
|
||||
});
|
||||
|
||||
it('defers assistant commit when agent_response arrives before tts_audio (single segment)', async () => {
|
||||
const aid = 'aa11aa11-aaaa-aaaa-aaaa-aaaaaaaaaaaa';
|
||||
const onTts = jest.fn(() => {
|
||||
expect(msgs(qc, 'conv-x').some((m) => m.id === aid)).toBe(true);
|
||||
});
|
||||
const onStream = jest.fn();
|
||||
const session = new RealtimeSession({
|
||||
conversationId: 'conv-x',
|
||||
queryClient: qc,
|
||||
onStreamingText: onStream,
|
||||
onTtsSegment: onTts,
|
||||
});
|
||||
|
||||
await session.connect();
|
||||
await new Promise((r) => setImmediate(r));
|
||||
|
||||
const ws = MockWebSocket.instances[0]!;
|
||||
expect(session.sendText('hi', { ttsThisTurn: true })).toBe(true);
|
||||
|
||||
ws.simulateMessage({
|
||||
type: 'agent_response',
|
||||
conversation_id: 'conv-x',
|
||||
data: {
|
||||
text: 'Hello segment',
|
||||
index: 0,
|
||||
total: 1,
|
||||
assistant_message_id: aid,
|
||||
},
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
|
||||
const afterAgentOnly = msgs(qc, 'conv-x').filter(
|
||||
(m) => m.senderType === 'assistant',
|
||||
);
|
||||
expect(afterAgentOnly).toHaveLength(0);
|
||||
|
||||
ws.simulateMessage({
|
||||
type: 'tts_audio',
|
||||
conversation_id: 'conv-x',
|
||||
data: {
|
||||
audio_url: 'https://example.com/tts-a.mp3',
|
||||
index: 0,
|
||||
total: 1,
|
||||
assistant_message_id: aid,
|
||||
},
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
|
||||
expect(onTts).toHaveBeenCalledTimes(1);
|
||||
const committed = msgs(qc, 'conv-x').filter(
|
||||
(m) => m.senderType === 'assistant',
|
||||
);
|
||||
expect(committed).toHaveLength(1);
|
||||
expect(committed[0]!.content).toContain('Hello segment');
|
||||
|
||||
session.dispose();
|
||||
});
|
||||
|
||||
it('multi-segment sync clears pending UI without streaming footer text', async () => {
|
||||
const aid = 'bb22bb22-bbbb-bbbb-bbbb-bbbbbbbbbbbb';
|
||||
const onTts = jest.fn(() => {
|
||||
expect(
|
||||
msgs(qc, 'conv-x').some((m) => m.id === `${aid}_seg_0`),
|
||||
).toBe(true);
|
||||
});
|
||||
const onStream = jest.fn();
|
||||
const session = new RealtimeSession({
|
||||
conversationId: 'conv-x',
|
||||
queryClient: qc,
|
||||
onStreamingText: onStream,
|
||||
onTtsSegment: onTts,
|
||||
});
|
||||
|
||||
await session.connect();
|
||||
await new Promise((r) => setImmediate(r));
|
||||
|
||||
const ws = MockWebSocket.instances[0]!;
|
||||
session.sendText('hi', { ttsThisTurn: true });
|
||||
|
||||
ws.simulateMessage({
|
||||
type: 'tts_audio',
|
||||
conversation_id: 'conv-x',
|
||||
data: {
|
||||
audio_url: 'https://example.com/tts-b.mp3',
|
||||
index: 0,
|
||||
total: 2,
|
||||
assistant_message_id: aid,
|
||||
},
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
|
||||
ws.simulateMessage({
|
||||
type: 'agent_response',
|
||||
conversation_id: 'conv-x',
|
||||
data: {
|
||||
text: 'Part A',
|
||||
index: 0,
|
||||
total: 2,
|
||||
assistant_message_id: aid,
|
||||
},
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
|
||||
expect(onStream).toHaveBeenCalledWith('', true);
|
||||
expect(onStream).not.toHaveBeenCalledWith('Part A', true);
|
||||
expect(onTts).toHaveBeenCalled();
|
||||
session.dispose();
|
||||
});
|
||||
|
||||
it('keeps active screen TTS callback when stale offscreen attach runs later', async () => {
|
||||
const aid = 'cc33cc33-cccc-cccc-cccc-cccccccccccc';
|
||||
const screenOnTts = jest.fn();
|
||||
const offscreenOnTts = jest.fn();
|
||||
const session = new RealtimeSession({
|
||||
conversationId: 'conv-x',
|
||||
queryClient: qc,
|
||||
});
|
||||
const owner = Symbol('screen-owner');
|
||||
|
||||
session.attachUiCallbacks({ onTtsSegment: screenOnTts }, owner);
|
||||
session.attachUiCallbacks({ onTtsSegment: offscreenOnTts });
|
||||
|
||||
await session.connect();
|
||||
await new Promise((r) => setImmediate(r));
|
||||
|
||||
const ws = MockWebSocket.instances[0]!;
|
||||
ws.simulateMessage({
|
||||
type: 'tts_audio',
|
||||
conversation_id: 'conv-x',
|
||||
data: {
|
||||
audio_base64: 'ZmFrZS1tcDM=',
|
||||
audio_url: 'https://example.com/tts-c.mp3',
|
||||
index: 0,
|
||||
total: 1,
|
||||
assistant_message_id: aid,
|
||||
manual: true,
|
||||
},
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
|
||||
expect(screenOnTts).toHaveBeenCalledTimes(1);
|
||||
expect(offscreenOnTts).not.toHaveBeenCalled();
|
||||
session.dispose();
|
||||
});
|
||||
|
||||
it('keeps active screen TTS callback when a stale screen owner attaches later', async () => {
|
||||
const aid = 'dd44dd44-dddd-dddd-dddd-dddddddddddd';
|
||||
const screenOnTts = jest.fn();
|
||||
const staleScreenOnTts = jest.fn();
|
||||
const session = new RealtimeSession({
|
||||
conversationId: 'conv-x',
|
||||
queryClient: qc,
|
||||
});
|
||||
const activeOwner = Symbol('active-screen-owner');
|
||||
const staleOwner = Symbol('stale-screen-owner');
|
||||
|
||||
session.attachUiCallbacks({ onTtsSegment: screenOnTts }, activeOwner);
|
||||
session.attachUiCallbacks({ onTtsSegment: staleScreenOnTts }, staleOwner);
|
||||
|
||||
await session.connect();
|
||||
await new Promise((r) => setImmediate(r));
|
||||
|
||||
const ws = MockWebSocket.instances[0]!;
|
||||
ws.simulateMessage({
|
||||
type: 'tts_audio',
|
||||
conversation_id: 'conv-x',
|
||||
data: {
|
||||
audio_base64: 'ZmFrZS1tcDM=',
|
||||
audio_url: 'https://example.com/tts-d.mp3',
|
||||
index: 0,
|
||||
total: 1,
|
||||
assistant_message_id: aid,
|
||||
manual: true,
|
||||
},
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
|
||||
expect(screenOnTts).toHaveBeenCalledTimes(1);
|
||||
expect(staleScreenOnTts).not.toHaveBeenCalled();
|
||||
session.dispose();
|
||||
});
|
||||
});
|
||||
@@ -37,6 +37,7 @@ describe('usePlayer', () => {
|
||||
});
|
||||
jest.mocked(audioFocus.acquireForPlayback).mockResolvedValue(true);
|
||||
jest.mocked(audioFocus.releaseIfOwnedBy).mockResolvedValue(undefined);
|
||||
jest.mocked(audioFocus.onOwnerChange).mockImplementation(() => jest.fn());
|
||||
});
|
||||
|
||||
test('keeps the native audio session active while app-level audio focus owns teardown', () => {
|
||||
@@ -127,4 +128,43 @@ describe('usePlayer', () => {
|
||||
expect(pause).not.toHaveBeenCalled();
|
||||
expect(result.current.status).toBe('idle');
|
||||
});
|
||||
|
||||
test('retries queued audio after acquire fails once then audio focus frees', async () => {
|
||||
const acquire = jest.mocked(audioFocus.acquireForPlayback);
|
||||
acquire.mockResolvedValueOnce(false).mockResolvedValue(true);
|
||||
|
||||
let ownerListener: ((owner: null | string) => void) | undefined;
|
||||
jest.mocked(audioFocus.onOwnerChange).mockImplementation((cb) => {
|
||||
ownerListener = cb as (owner: null | string) => void;
|
||||
return jest.fn();
|
||||
});
|
||||
|
||||
mockUseAudioPlayerStatus.mockReturnValue({
|
||||
isLoaded: true,
|
||||
playing: false,
|
||||
currentTime: 0,
|
||||
duration: 10,
|
||||
});
|
||||
const play = jest.fn();
|
||||
mockUseAudioPlayer.mockReturnValue({ pause: jest.fn(), play });
|
||||
|
||||
const { result } = renderHook(() => usePlayer());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.enqueue({
|
||||
uri: 'file:///queued.mp3',
|
||||
kind: 'tts_auto',
|
||||
});
|
||||
});
|
||||
|
||||
expect(acquire).toHaveBeenCalledTimes(1);
|
||||
expect(result.current.status).toBe('idle');
|
||||
|
||||
await act(async () => {
|
||||
ownerListener?.(null);
|
||||
});
|
||||
|
||||
expect(acquire).toHaveBeenCalledTimes(2);
|
||||
expect(play).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user