fix: 优化长语音上传交互并修复输入框高度跳变

This commit is contained in:
Kevin
2026-03-10 11:34:17 +08:00
parent 6ffe96d7a9
commit 8b9ccd4926
16 changed files with 1148 additions and 351 deletions

View File

@@ -314,6 +314,7 @@ async def _process_audio_segment_async(
"data": {
"text": transcript_text or "",
"audio_duration": audio_duration,
"voice_session_id": voice_session_id,
"segment_index": segment_index,
"is_last": is_last,
},
@@ -383,18 +384,6 @@ async def _process_audio_segment_async(
user=user,
)
if is_last:
await manager.send_message(conversation_id, {
"type": MessageType.AGENT_RESPONSE,
"conversation_id": conversation_id,
"data": {
"text": "最后一段语音已收到,我会继续完善这一轮总结。",
"transition": True,
"is_last": True,
"segment_index": segment_index,
},
"timestamp": datetime.now(timezone.utc).isoformat(),
})
break
except Exception as e:

View File

@@ -463,6 +463,7 @@ class WebSocketBaselineTest(unittest.IsolatedAsyncioTestCase):
"type": "audio_segment",
"data": {
"audio_base64": "seg-1",
"voice_session_id": "voice-session-1",
"segment_index": 1,
"duration": 12,
"is_last": False,
@@ -472,6 +473,7 @@ class WebSocketBaselineTest(unittest.IsolatedAsyncioTestCase):
"type": "audio_segment",
"data": {
"audio_base64": "seg-0",
"voice_session_id": "voice-session-1",
"segment_index": 0,
"duration": 10,
"is_last": False,
@@ -523,6 +525,15 @@ class WebSocketBaselineTest(unittest.IsolatedAsyncioTestCase):
]
self.assertEqual(ordered_messages, ["这是第 0 段", "这是第 1 段"])
self.assertEqual(len([obj for obj in fake_db.added if isinstance(obj, Segment)]), 2)
transcript_msgs = [
item["message"]
for item in fake_manager.sent_messages
if item["message"]["type"] == ws_router.MessageType.TRANSCRIPT
]
self.assertEqual(
[msg["data"]["voice_session_id"] for msg in transcript_msgs],
["voice-session-1", "voice-session-1"],
)
async def test_audio_segment_duplicate_index_is_idempotent(self):
user = _make_user()
@@ -727,6 +738,68 @@ class WebSocketBaselineTest(unittest.IsolatedAsyncioTestCase):
]
self.assertGreaterEqual(len(transition_msgs), 1)
async def test_audio_segment_last_segment_does_not_emit_terminal_transition(self):
user = _make_user()
conversation = Conversation(id="conv-1", user_id=user.id, status="active")
fake_db = _FakeAsyncDB(user=user, conversation=conversation)
fake_manager = _FakeManager()
fake_websocket = _FakeWebSocket(
messages=[
{
"type": "audio_segment",
"data": {
"audio_base64": "last-seg-0",
"voice_session_id": "voice-session-last",
"client_segment_id": "voice-session-last-0",
"segment_index": 0,
"duration": 15,
"is_last": True,
},
},
WebSocketDisconnect(),
]
)
process_user_message_mock = AsyncMock()
transcribe_mock = AsyncMock(return_value="最后一段转写")
with ExitStack() as stack:
stack.enter_context(
patch.object(
ws_router,
"verify_token",
return_value={"type": "access", "sub": user.id},
)
)
stack.enter_context(
patch.object(ws_router, "get_async_db", _db_provider(fake_db))
)
stack.enter_context(patch.object(ws_router, "manager", fake_manager))
stack.enter_context(
patch("routers.quota.get_segment_count", new=AsyncMock(return_value=0))
)
stack.enter_context(
patch("routers.quota.check_can_send_message", return_value=(True, ""))
)
stack.enter_context(
patch.object(ws_router, "process_user_message", process_user_message_mock)
)
stack.enter_context(
patch.object(ws_router.asr_service, "transcribe", transcribe_mock)
)
await ws_router.websocket_endpoint(fake_websocket, "conv-1")
await asyncio.sleep(0.05)
transition_msgs = [
item["message"]
for item in fake_manager.sent_messages
if item["message"]["type"] == ws_router.MessageType.AGENT_RESPONSE
and item["message"].get("data", {}).get("transition") is True
]
self.assertEqual(len(transition_msgs), 1)
self.assertIsNone(transition_msgs[0]["data"].get("is_last"))
async def test_audio_segment_continues_after_reconnect_with_existing_previous_segment(self):
user = _make_user()
conversation = Conversation(id="conv-1", user_id=user.id, status="active")