feat: 支持长语音分段上传与断线补传

2026-03-09 15:30:18 +08:00
parent 440f5be07f
commit 6ffe96d7a9
13 changed files with 1451 additions and 19 deletions
--- a/app-android/app/src/main/java/com/huaga/life_echo/config/AppConfig.kt
+++ b/app-android/app/src/main/java/com/huaga/life_echo/config/AppConfig.kt
@@ -19,8 +19,8 @@ object AppConfig {
    // ==================== 服务器地址配置 ====================
    
    // 开发环境（物理机测试使用内网IP）
-    private const val DEV_API_URL = "http://192.168.10.120:8000"
-    private const val DEV_WS_URL = "ws://192.168.10.120:8000"
+    private const val DEV_API_URL = "http://192.168.10.178:8000"
+    private const val DEV_WS_URL = "ws://192.168.10.178:8000"
    
    // 生产环境（公网地址）
    private const val PROD_API_URL = "https://lifecho.worldsplats.com"
--- a/app-android/app/src/main/java/com/huaga/life_echo/feature/voice/PendingVoiceSegmentStore.kt
+++ b/app-android/app/src/main/java/com/huaga/life_echo/feature/voice/PendingVoiceSegmentStore.kt
@@ -0,0 +1,106 @@
+package com.huaga.life_echo.feature.voice
+
+import kotlinx.serialization.Serializable
+import kotlinx.serialization.decodeFromString
+import kotlinx.serialization.encodeToString
+import kotlinx.serialization.json.Json
+import java.io.File
+
+data class PendingVoiceSegment(
+    val clientSegmentId: String,
+    val conversationId: String,
+    val voiceSessionId: String,
+    val segmentIndex: Int,
+    val durationSeconds: Int,
+    val isLast: Boolean,
+    val audioBytes: ByteArray,
+    val createdAtMillis: Long = System.currentTimeMillis(),
+)
+
+@Serializable
+private data class PendingVoiceSegmentMetadata(
+    val clientSegmentId: String,
+    val conversationId: String,
+    val voiceSessionId: String,
+    val segmentIndex: Int,
+    val durationSeconds: Int,
+    val isLast: Boolean,
+    val createdAtMillis: Long,
+)
+
+class PendingVoiceSegmentStore(
+    private val rootDir: File,
+    private val json: Json = Json {
+        ignoreUnknownKeys = true
+        encodeDefaults = true
+    },
+) {
+    fun upsert(segment: PendingVoiceSegment) {
+        if (!rootDir.exists()) {
+            rootDir.mkdirs()
+        }
+
+        metadataFile(segment.clientSegmentId).writeText(
+            json.encodeToString(
+                PendingVoiceSegmentMetadata(
+                    clientSegmentId = segment.clientSegmentId,
+                    conversationId = segment.conversationId,
+                    voiceSessionId = segment.voiceSessionId,
+                    segmentIndex = segment.segmentIndex,
+                    durationSeconds = segment.durationSeconds,
+                    isLast = segment.isLast,
+                    createdAtMillis = segment.createdAtMillis,
+                )
+            )
+        )
+        audioFile(segment.clientSegmentId).writeBytes(segment.audioBytes)
+    }
+
+    fun listByConversation(conversationId: String): List<PendingVoiceSegment> {
+        return listAll().filter { it.conversationId == conversationId }
+    }
+
+    fun listAll(): List<PendingVoiceSegment> {
+        if (!rootDir.exists()) {
+            return emptyList()
+        }
+
+        return rootDir
+            .listFiles { file -> file.extension == "json" }
+            ?.mapNotNull { metadata ->
+                readSegment(metadata)
+            }
+            ?.sortedWith(compareBy<PendingVoiceSegment> { it.createdAtMillis }.thenBy { it.segmentIndex })
+            ?: emptyList()
+    }
+
+    fun remove(clientSegmentId: String) {
+        metadataFile(clientSegmentId).delete()
+        audioFile(clientSegmentId).delete()
+    }
+
+    private fun readSegment(metadata: File): PendingVoiceSegment? {
+        return runCatching {
+            val meta = json.decodeFromString<PendingVoiceSegmentMetadata>(metadata.readText())
+            val audioBytes = audioFile(meta.clientSegmentId).takeIf { it.exists() }?.readBytes() ?: return null
+            PendingVoiceSegment(
+                clientSegmentId = meta.clientSegmentId,
+                conversationId = meta.conversationId,
+                voiceSessionId = meta.voiceSessionId,
+                segmentIndex = meta.segmentIndex,
+                durationSeconds = meta.durationSeconds,
+                isLast = meta.isLast,
+                audioBytes = audioBytes,
+                createdAtMillis = meta.createdAtMillis,
+            )
+        }.getOrNull()
+    }
+
+    private fun metadataFile(clientSegmentId: String): File {
+        return File(rootDir, "$clientSegmentId.json")
+    }
+
+    private fun audioFile(clientSegmentId: String): File {
+        return File(rootDir, "$clientSegmentId.bin")
+    }
+}
--- a/app-android/app/src/main/java/com/huaga/life_echo/feature/voice/SegmentedRecordingDuration.kt
+++ b/app-android/app/src/main/java/com/huaga/life_echo/feature/voice/SegmentedRecordingDuration.kt
@@ -0,0 +1,11 @@
+package com.huaga.life_echo.feature.voice
+
+object SegmentedRecordingDuration {
+    fun displaySeconds(completedSeconds: Int, currentSegmentSeconds: Int): Int {
+        return completedSeconds.coerceAtLeast(0) + currentSegmentSeconds.coerceAtLeast(0)
+    }
+
+    fun nextCompletedSeconds(completedSeconds: Int, finishedSegmentSeconds: Int): Int {
+        return completedSeconds.coerceAtLeast(0) + finishedSegmentSeconds.coerceAtLeast(0)
+    }
+}
--- a/app-android/app/src/main/java/com/huaga/life_echo/network/WebSocketClient.kt
+++ b/app-android/app/src/main/java/com/huaga/life_echo/network/WebSocketClient.kt
@@ -208,6 +208,45 @@ class WebSocketClient {
            data = buildJsonObject { put("audio_base64", JsonPrimitive(base64Audio)) }
        ))
    }
+
+    /**
+     * 发送分段语音（长语音边录边传）
+     */
+    suspend fun sendAudioSegment(
+        audioBytes: ByteArray,
+        conversationId: String,
+        voiceSessionId: String,
+        segmentIndex: Int,
+        duration: Int,
+        isLast: Boolean,
+        clientSegmentId: String? = null
+    ) {
+        Log.d(
+            TAG,
+            "准备发送分段语音, idx=$segmentIndex, size=${audioBytes.size}, duration=${duration}s, isLast=$isLast"
+        )
+        if (!isConnected) {
+            Log.w(TAG, "WebSocket未连接，无法发送分段语音")
+            throw Exception("WebSocket未连接，请先建立连接")
+        }
+
+        val base64Audio = android.util.Base64.encodeToString(audioBytes, android.util.Base64.NO_WRAP)
+        sendMessage(WebSocketMessage(
+            type = MessageType.audio_segment,
+            conversation_id = conversationId,
+            data = buildJsonObject {
+                put("audio_base64", JsonPrimitive(base64Audio))
+                put("voice_session_id", JsonPrimitive(voiceSessionId))
+                put("segment_index", JsonPrimitive(segmentIndex))
+                put("duration", JsonPrimitive(duration))
+                put("is_last", JsonPrimitive(isLast))
+                put("format", JsonPrimitive("m4a"))
+                if (clientSegmentId != null) {
+                    put("client_segment_id", JsonPrimitive(clientSegmentId))
+                }
+            }
+        ))
+    }
    
    /**
     * 发送完整的音频消息（类似微信语音消息）
@@ -331,4 +370,3 @@ class WebSocketClient {
    
    fun isConnected(): Boolean = isConnected
 }
-
--- a/app-android/app/src/main/java/com/huaga/life_echo/network/WebSocketMessage.kt
+++ b/app-android/app/src/main/java/com/huaga/life_echo/network/WebSocketMessage.kt
@@ -9,6 +9,7 @@ import kotlinx.serialization.json.buildJsonObject
 enum class MessageType {
    connect,
    audio_chunk,
+    audio_segment,        // 分段语音（长语音边录边传）
    audio_message,        // 完整音频消息（类似微信语音）
    transcribe_only,      // 仅转写，不落库不触发 Agent，用于「转文字」发送
    text,  // 文本消息
@@ -82,4 +83,3 @@ data class WebSocketMessage(
        }
    }
 }
-
--- a/app-android/app/src/main/java/com/huaga/life_echo/ui/viewmodel/CreateMemoryViewModel.kt
+++ b/app-android/app/src/main/java/com/huaga/life_echo/ui/viewmodel/CreateMemoryViewModel.kt
@@ -1,7 +1,6 @@
 package com.huaga.life_echo.ui.viewmodel

 import android.content.Context
-import android.os.Build
 import android.util.Log
 import androidx.lifecycle.ViewModel
 import androidx.lifecycle.viewModelScope
@@ -10,7 +9,11 @@ import com.huaga.life_echo.data.repository.ConversationRepository
 import com.huaga.life_echo.data.repository.ChapterRepository
 import com.huaga.life_echo.data.repository.MessageRepository
 import com.huaga.life_echo.feature.voice.AudioPlayer
+import com.huaga.life_echo.feature.voice.PendingVoiceSegment
+import com.huaga.life_echo.feature.voice.PendingVoiceSegmentStore
 import com.huaga.life_echo.feature.voice.PlaybackInfo
+import com.huaga.life_echo.feature.voice.RecordingResult
+import com.huaga.life_echo.feature.voice.SegmentedRecordingDuration
 import com.huaga.life_echo.feature.voice.VoiceRecorder
 import com.huaga.life_echo.network.WebSocketClient
 import com.huaga.life_echo.network.WebSocketMessage
@@ -21,11 +24,21 @@ import com.huaga.life_echo.network.models.MessageDto
 import com.huaga.life_echo.data.database.Chapter
 import kotlinx.coroutines.channels.Channel
 import kotlinx.coroutines.flow.MutableStateFlow
+import kotlinx.coroutines.flow.SharingStarted
 import kotlinx.coroutines.flow.StateFlow
 import kotlinx.coroutines.flow.asStateFlow
+import kotlinx.coroutines.flow.combine
+import kotlinx.coroutines.flow.filterNotNull
+import kotlinx.coroutines.flow.first
+import kotlinx.coroutines.flow.stateIn
+import kotlinx.coroutines.Job
 import kotlinx.coroutines.launch
 import kotlinx.coroutines.delay
 import kotlinx.coroutines.withTimeoutOrNull
+import kotlinx.coroutines.sync.Mutex
+import kotlinx.coroutines.sync.withLock
+import java.io.File
+import java.util.UUID

 class CreateMemoryViewModel(
    private val conversationRepository: ConversationRepository,
@@ -37,21 +50,40 @@ class CreateMemoryViewModel(
    companion object {
        private const val TAG = "CreateMemoryViewModel"
        private const val MIN_RECORDING_DURATION = 1  // 最小录音时长（秒）
+        private const val VOICE_SEGMENT_DURATION_SECONDS = 30  // 录音自动切片时长
+        private const val SEGMENT_RETRY_MAX_ATTEMPTS = 3
+        private const val SEGMENT_RETRY_BASE_DELAY_MS = 800L
+        private const val SEGMENT_WAIT_TIMEOUT_MS = 10_000L
+        private const val PENDING_SEGMENT_RETRY_DELAY_MS = 5_000L
+        private const val UNASSIGNED_PENDING_CONVERSATION_ID = "__pending_voice_conversation__"
    }
    
    private val webSocketClient = WebSocketClient()
    private val apiService = ApiService(TokenManager, AuthService())
+    private val pendingVoiceSegmentStore = PendingVoiceSegmentStore(
+        File(context.filesDir, "pending-voice-segments")
+    )
+    private val voiceUploadPreparationMutex = Mutex()
+    private val pendingDispatchLock = Any()
+    private val pendingDispatchInFlight = mutableSetOf<String>()
+    private var pendingSegmentRetryJob: Job? = null
    
    // 语音录制器
    private val voiceRecorder = VoiceRecorder(context).apply {
-        maxDuration = 60  // 最大录音60秒
+        maxDuration = VOICE_SEGMENT_DURATION_SECONDS
        onMaxDurationReached = {
-            // 达到最大时长时自动停止并发送
+            // 达到切片时长后自动滚动到下一段，不打断长语音输入
            viewModelScope.launch {
-                stopAndSendRecording()
+                rolloverRecordingSegment()
            }
        }
    }
+
+    @Volatile
+    private var keepSegmentedRecording = false
+    private var currentVoiceSessionId: String? = null
+    private var nextVoiceSegmentIndex: Int = 0
+    private val segmentStateLock = Any()
    
    // 音频播放器
    private val audioPlayer = AudioPlayer(context)
@@ -86,7 +118,17 @@ class CreateMemoryViewModel(
    
    // 语音录制相关状态
    val isVoiceRecording: StateFlow<Boolean> = voiceRecorder.isRecording
-    val recordingDuration: StateFlow<Int> = voiceRecorder.recordingDuration
+    private val completedRecordingDuration = MutableStateFlow(0)
+    val recordingDuration: StateFlow<Int> = combine(
+        completedRecordingDuration,
+        voiceRecorder.recordingDuration
+    ) { completedSeconds, currentSegmentSeconds ->
+        SegmentedRecordingDuration.displaySeconds(completedSeconds, currentSegmentSeconds)
+    }.stateIn(
+        scope = viewModelScope,
+        started = SharingStarted.Eagerly,
+        initialValue = 0,
+    )
    
    // 音频播放相关状态
    val playbackInfo: StateFlow<PlaybackInfo> = audioPlayer.playbackInfo
@@ -232,6 +274,8 @@ class CreateMemoryViewModel(
    
    fun endConversation() {
        viewModelScope.launch {
+            keepSegmentedRecording = false
+            resetVoiceSessionState()
            conversationId.value?.let { id ->
                webSocketClient.sendEndConversation(id)
                webSocketClient.disconnect()
@@ -251,13 +295,187 @@ class CreateMemoryViewModel(
    }
    
    // ==================== 语音录制功能 ====================
+
+    private fun ensureVoiceSessionStarted() {
+        synchronized(segmentStateLock) {
+            if (currentVoiceSessionId == null) {
+                currentVoiceSessionId = UUID.randomUUID().toString()
+                nextVoiceSegmentIndex = 0
+            }
+        }
+    }
+
+    private fun currentVoiceSessionIdOrCreate(): String {
+        synchronized(segmentStateLock) {
+            if (currentVoiceSessionId == null) {
+                currentVoiceSessionId = UUID.randomUUID().toString()
+            }
+            return currentVoiceSessionId!!
+        }
+    }
+
+    private fun consumeNextVoiceSegmentIndex(): Int {
+        synchronized(segmentStateLock) {
+            val index = nextVoiceSegmentIndex
+            nextVoiceSegmentIndex += 1
+            return index
+        }
+    }
+
+    private fun resetVoiceSessionState() {
+        synchronized(segmentStateLock) {
+            currentVoiceSessionId = null
+            nextVoiceSegmentIndex = 0
+        }
+        completedRecordingDuration.value = 0
+    }
+
+    private fun tryAcquirePendingDispatch(clientSegmentId: String): Boolean {
+        synchronized(pendingDispatchLock) {
+            if (pendingDispatchInFlight.contains(clientSegmentId)) {
+                return false
+            }
+            pendingDispatchInFlight.add(clientSegmentId)
+            return true
+        }
+    }
+
+    private fun releasePendingDispatch(clientSegmentId: String) {
+        synchronized(pendingDispatchLock) {
+            pendingDispatchInFlight.remove(clientSegmentId)
+        }
+    }
+
+    private fun schedulePendingSegmentRetry(conversationId: String) {
+        if (pendingSegmentRetryJob?.isActive == true) {
+            return
+        }
+        pendingSegmentRetryJob = viewModelScope.launch {
+            delay(PENDING_SEGMENT_RETRY_DELAY_MS)
+            retryPendingVoiceSegmentsForConversation(conversationId)
+        }
+    }
+
+    private fun retryPendingVoiceSegmentsForConversation(conversationId: String) {
+        val pendingSegments = pendingVoiceSegmentStore.listByConversation(conversationId)
+        if (pendingSegments.isEmpty()) {
+            return
+        }
+
+        pendingSegments.forEach { pendingSegment ->
+            viewModelScope.launch {
+                dispatchPendingVoiceSegment(pendingSegment)
+            }
+        }
+    }
+
+    private suspend fun ensureConversationReadyForSegmentUpload(): String? = voiceUploadPreparationMutex.withLock {
+        if (conversationId.value == null) {
+            Log.d(TAG, "分段发送前创建新对话")
+            startConversation()
+        }
+
+        val readyConversationId = withTimeoutOrNull(SEGMENT_WAIT_TIMEOUT_MS) {
+            conversationId.filterNotNull().first()
+        } ?: run {
+            connectionStatus.value = "创建对话超时，请重试"
+            return@withLock null
+        }
+
+        if (!webSocketClient.isConnected()) {
+            Log.w(TAG, "分段发送前 WebSocket 未连接，开始等待重连")
+            connectionStatus.value = "未连接，正在重连..."
+            wsIsConnected.value = false
+            val token = TokenManager.getAccessToken()
+            try {
+                webSocketClient.connect(
+                    readyConversationId,
+                    token,
+                    onMessage = { message -> handleWebSocketMessage(message) },
+                    onError = { errorMsg -> connectionStatus.value = "错误: $errorMsg" }
+                )
+            } catch (e: Exception) {
+                connectionStatus.value = "重连失败: ${e.message}"
+                return@withLock null
+            }
+        }
+
+        val connected = withTimeoutOrNull(SEGMENT_WAIT_TIMEOUT_MS) {
+            if (webSocketClient.isConnected()) {
+                true
+            } else {
+                wsIsConnected.first { it }
+            }
+        } ?: false
+
+        if (!connected) {
+            connectionStatus.value = "连接超时，请重试"
+            return@withLock null
+        }
+
+        readyConversationId
+    }
+
+    private fun enqueueSegmentUpload(result: RecordingResult, isLast: Boolean) {
+        val segmentIndex = consumeNextVoiceSegmentIndex()
+        val sessionId = currentVoiceSessionIdOrCreate()
+        val clientSegmentId = "$sessionId-$segmentIndex"
+        val pendingSegment = PendingVoiceSegment(
+            clientSegmentId = clientSegmentId,
+            conversationId = conversationId.value ?: UNASSIGNED_PENDING_CONVERSATION_ID,
+            voiceSessionId = sessionId,
+            segmentIndex = segmentIndex,
+            durationSeconds = result.durationSeconds,
+            isLast = isLast,
+            audioBytes = result.audioBytes,
+        )
+        pendingVoiceSegmentStore.upsert(pendingSegment)
+        viewModelScope.launch {
+            dispatchPendingVoiceSegment(pendingSegment)
+        }
+    }
+
+    private suspend fun rolloverRecordingSegment() {
+        if (!keepSegmentedRecording) return
+
+        val result = voiceRecorder.stopRecording() ?: run {
+            Log.w(TAG, "自动切片失败：当前录音结果为空")
+            return
+        }
+
+        if (result.durationSeconds >= MIN_RECORDING_DURATION) {
+            completedRecordingDuration.value = SegmentedRecordingDuration.nextCompletedSeconds(
+                completedSeconds = completedRecordingDuration.value,
+                finishedSegmentSeconds = result.durationSeconds,
+            )
+            enqueueSegmentUpload(result, isLast = false)
+        } else {
+            Log.w(TAG, "自动切片时长过短，跳过上传: ${result.durationSeconds}s")
+        }
+
+        if (!keepSegmentedRecording) return
+        val restarted = voiceRecorder.startRecording()
+        if (restarted == null) {
+            keepSegmentedRecording = false
+            resetVoiceSessionState()
+            connectionStatus.value = "录音切片续录失败，请重试"
+            Log.e(TAG, "自动切片后重启录音失败")
+        }
+    }
    
    /**
     * 开始录音（需 API 26+ 与录音权限）
     */
    fun startRecordingVoice() {
        Log.d(TAG, "开始录音")
-        voiceRecorder.startRecording()
+        keepSegmentedRecording = true
+        ensureVoiceSessionStarted()
+        val file = voiceRecorder.startRecording()
+        if (file == null) {
+            keepSegmentedRecording = false
+            resetVoiceSessionState()
+            connectionStatus.value = "录音启动失败，请检查权限后重试"
+        }
    }
    
    /**
@@ -266,16 +484,20 @@ class CreateMemoryViewModel(
    fun stopAndSendRecording() {
        viewModelScope.launch {
            Log.d(TAG, "停止录音并发送")
+            keepSegmentedRecording = false
            val result = voiceRecorder.stopRecording()
            if (result == null) {
                Log.e(TAG, "录音失败，result 为空")
+                resetVoiceSessionState()
                return@launch
            }
            if (result.durationSeconds < MIN_RECORDING_DURATION) {
                Log.w(TAG, "录音时间太短: ${result.durationSeconds}s")
+                resetVoiceSessionState()
                return@launch
            }
-            sendAudioMessage(result.audioBytes, result.filePath, result.durationSeconds)
+            enqueueSegmentUpload(result, isLast = true)
+            resetVoiceSessionState()
        }
    }
    
@@ -284,7 +506,9 @@ class CreateMemoryViewModel(
     */
    fun cancelRecordingVoice() {
        Log.d(TAG, "取消录音")
+        keepSegmentedRecording = false
        voiceRecorder.cancelRecording()
+        resetVoiceSessionState()
    }
    
    /**
@@ -293,6 +517,8 @@ class CreateMemoryViewModel(
    fun stopAndSendRecordingAsText() {
        viewModelScope.launch {
            Log.d(TAG, "停止录音并转文字发送")
+            keepSegmentedRecording = false
+            resetVoiceSessionState()
            val result = voiceRecorder.stopRecording() ?: run {
                Log.e(TAG, "录音结果为空")
                return@launch
@@ -345,6 +571,77 @@ class CreateMemoryViewModel(
        }
    }
    
+    /**
+     * 发送分段语音（持久化待发队列 + 自动重试，不阻塞后续分段发送）
+     */
+    private suspend fun dispatchPendingVoiceSegment(pendingSegment: PendingVoiceSegment) {
+        if (!tryAcquirePendingDispatch(pendingSegment.clientSegmentId)) {
+            return
+        }
+
+        Log.d(
+            TAG,
+            "准备发送分段语音: session=${pendingSegment.voiceSessionId}, idx=${pendingSegment.segmentIndex}, duration=${pendingSegment.durationSeconds}s, isLast=${pendingSegment.isLast}"
+        )
+
+        try {
+            val readyConversationId = ensureConversationReadyForSegmentUpload() ?: run {
+                schedulePendingSegmentRetry(pendingSegment.conversationId)
+                return
+            }
+
+            val segmentToSend = if (pendingSegment.conversationId == readyConversationId) {
+                pendingSegment
+            } else {
+                pendingSegment.copy(conversationId = readyConversationId)
+            }
+
+            if (segmentToSend !== pendingSegment) {
+                pendingVoiceSegmentStore.upsert(segmentToSend)
+            }
+
+            isTyping.value = true
+
+            var lastError: Exception? = null
+            for (attempt in 1..SEGMENT_RETRY_MAX_ATTEMPTS) {
+                try {
+                    webSocketClient.sendAudioSegment(
+                        audioBytes = segmentToSend.audioBytes,
+                        conversationId = segmentToSend.conversationId,
+                        voiceSessionId = segmentToSend.voiceSessionId,
+                        segmentIndex = segmentToSend.segmentIndex,
+                        duration = segmentToSend.durationSeconds,
+                        isLast = segmentToSend.isLast,
+                        clientSegmentId = segmentToSend.clientSegmentId,
+                    )
+                    pendingVoiceSegmentStore.remove(segmentToSend.clientSegmentId)
+                    Log.d(TAG, "分段语音发送成功: idx=${segmentToSend.segmentIndex}, attempt=$attempt")
+                    return
+                } catch (e: Exception) {
+                    lastError = e
+                    Log.e(
+                        TAG,
+                        "分段语音发送失败: idx=${segmentToSend.segmentIndex}, attempt=$attempt, err=${e.message}"
+                    )
+                    if (attempt < SEGMENT_RETRY_MAX_ATTEMPTS) {
+                        delay(SEGMENT_RETRY_BASE_DELAY_MS * attempt)
+                    }
+                }
+            }
+
+            connectionStatus.value = "分段发送失败: ${lastError?.message ?: "未知错误"}"
+            errorMessages.value = (
+                errorMessages.value + "分段发送失败(idx=${segmentToSend.segmentIndex}): ${lastError?.message ?: "未知错误"}"
+            ).takeLast(10)
+            schedulePendingSegmentRetry(segmentToSend.conversationId)
+            if (segmentToSend.isLast) {
+                isTyping.value = false
+            }
+        } finally {
+            releasePendingDispatch(pendingSegment.clientSegmentId)
+        }
+    }
+
    /**
     * 发送音频消息
     */
@@ -543,9 +840,15 @@ class CreateMemoryViewModel(
            MessageType.agent_response -> {
                // 处理Agent回复（可能有多条消息，每条作为单独气泡显示）
                val text = message.getString("text") ?: ""
+                val isTransition = message.getBoolean("transition") == true
                val index = message.getInt("index") ?: 0
                val total = message.getInt("total") ?: 1
-                
+
+                if (isTransition) {
+                    isTyping.value = true
+                    return
+                }
+
                // 收到第一条回复时，隐藏打字指示器
                if (index == 0) {
                    isTyping.value = false
@@ -627,6 +930,9 @@ class CreateMemoryViewModel(
                Log.d(TAG, "收到连接确认消息，设置状态为已连接")
                connectionStatus.value = "已连接"
                wsIsConnected.value = true
+                conversationId.value?.let { id ->
+                    retryPendingVoiceSegmentsForConversation(id)
+                }
            }
            MessageType.end_conversation -> {
                connectionStatus.value = "对话已结束"
@@ -768,6 +1074,9 @@ class CreateMemoryViewModel(
    
    override fun onCleared() {
        super.onCleared()
+        keepSegmentedRecording = false
+        resetVoiceSessionState()
+        pendingSegmentRetryJob?.cancel()
        viewModelScope.launch {
            webSocketClient.disconnect()
        }
@@ -776,4 +1085,3 @@ class CreateMemoryViewModel(
        audioPlayer.release()
    }
 }
-
--- a/app-android/app/src/test/java/com/huaga/life_echo/feature/voice/PendingVoiceSegmentStoreTest.kt
+++ b/app-android/app/src/test/java/com/huaga/life_echo/feature/voice/PendingVoiceSegmentStoreTest.kt
@@ -0,0 +1,62 @@
+package com.huaga.life_echo.feature.voice
+
+import org.junit.Assert.assertArrayEquals
+import org.junit.Assert.assertEquals
+import org.junit.Test
+import java.nio.file.Files
+
+class PendingVoiceSegmentStoreTest {
+
+    @Test
+    fun upsert_and_listByConversation_persists_audio_bytes() {
+        val rootDir = Files.createTempDirectory("pending-voice-segments").toFile()
+        try {
+            val store = PendingVoiceSegmentStore(rootDir)
+            val pendingSegment = PendingVoiceSegment(
+                clientSegmentId = "voice-session-1-0",
+                conversationId = "conv-1",
+                voiceSessionId = "voice-session-1",
+                segmentIndex = 0,
+                durationSeconds = 30,
+                isLast = false,
+                audioBytes = byteArrayOf(1, 2, 3, 4),
+                createdAtMillis = 100L,
+            )
+
+            store.upsert(pendingSegment)
+
+            val pendingSegments = store.listByConversation("conv-1")
+            assertEquals(1, pendingSegments.size)
+            assertEquals("voice-session-1", pendingSegments.single().voiceSessionId)
+            assertArrayEquals(byteArrayOf(1, 2, 3, 4), pendingSegments.single().audioBytes)
+        } finally {
+            rootDir.deleteRecursively()
+        }
+    }
+
+    @Test
+    fun remove_deletes_pending_segment_from_store() {
+        val rootDir = Files.createTempDirectory("pending-voice-segments").toFile()
+        try {
+            val store = PendingVoiceSegmentStore(rootDir)
+            store.upsert(
+                PendingVoiceSegment(
+                    clientSegmentId = "voice-session-1-1",
+                    conversationId = "conv-1",
+                    voiceSessionId = "voice-session-1",
+                    segmentIndex = 1,
+                    durationSeconds = 25,
+                    isLast = true,
+                    audioBytes = byteArrayOf(9, 8, 7),
+                    createdAtMillis = 200L,
+                )
+            )
+
+            store.remove("voice-session-1-1")
+
+            assertEquals(emptyList<PendingVoiceSegment>(), store.listByConversation("conv-1"))
+        } finally {
+            rootDir.deleteRecursively()
+        }
+    }
+}
--- a/app-android/app/src/test/java/com/huaga/life_echo/feature/voice/SegmentedRecordingDurationTest.kt
+++ b/app-android/app/src/test/java/com/huaga/life_echo/feature/voice/SegmentedRecordingDurationTest.kt
@@ -0,0 +1,17 @@
+package com.huaga.life_echo.feature.voice
+
+import org.junit.Assert.assertEquals
+import org.junit.Test
+
+class SegmentedRecordingDurationTest {
+
+    @Test
+    fun displaySeconds_accumulates_completed_segments_and_current_segment() {
+        assertEquals(65, SegmentedRecordingDuration.displaySeconds(60, 5))
+    }
+
+    @Test
+    fun nextCompletedSeconds_adds_finished_segment_duration() {
+        assertEquals(60, SegmentedRecordingDuration.nextCompletedSeconds(30, 30))
+    }
+}