feat: 客户端语音回放 长录音客户端切片,并行转录

This commit is contained in:
Kevin
2026-03-12 15:57:45 +08:00
parent ec7107cbf0
commit f15046c1c1
2 changed files with 84 additions and 35 deletions

View File

@@ -10,8 +10,11 @@ import com.huaga.life_echo.data.repository.ChapterRepository
import com.huaga.life_echo.data.repository.MessageRepository import com.huaga.life_echo.data.repository.MessageRepository
import com.huaga.life_echo.feature.conversation.ports.ConversationApiPort import com.huaga.life_echo.feature.conversation.ports.ConversationApiPort
import com.huaga.life_echo.feature.conversation.ports.ConversationRealtimePort import com.huaga.life_echo.feature.conversation.ports.ConversationRealtimePort
import com.huaga.life_echo.feature.conversation.ports.AudioSegmentRequest
import com.huaga.life_echo.feature.voice.AudioPlayer import com.huaga.life_echo.feature.voice.AudioPlayer
import com.huaga.life_echo.feature.voice.AudioSegmenter
import com.huaga.life_echo.feature.voice.PlaybackInfo import com.huaga.life_echo.feature.voice.PlaybackInfo
import com.huaga.life_echo.feature.voice.PendingVoiceSegmentBatchBuilder
import com.huaga.life_echo.feature.voice.RecordingCoordinator import com.huaga.life_echo.feature.voice.RecordingCoordinator
import com.huaga.life_echo.feature.voice.RecordingFinishResult import com.huaga.life_echo.feature.voice.RecordingFinishResult
import com.huaga.life_echo.feature.voice.RecordingStartResult import com.huaga.life_echo.feature.voice.RecordingStartResult
@@ -20,7 +23,10 @@ import com.huaga.life_echo.network.MessageType
import com.huaga.life_echo.model.MessageDto import com.huaga.life_echo.model.MessageDto
import com.huaga.life_echo.data.database.Chapter import com.huaga.life_echo.data.database.Chapter
import kotlinx.coroutines.Job import kotlinx.coroutines.Job
import kotlinx.coroutines.async
import kotlinx.coroutines.awaitAll
import kotlinx.coroutines.channels.Channel import kotlinx.coroutines.channels.Channel
import kotlinx.coroutines.coroutineScope
import kotlinx.coroutines.flow.MutableStateFlow import kotlinx.coroutines.flow.MutableStateFlow
import kotlinx.coroutines.flow.StateFlow import kotlinx.coroutines.flow.StateFlow
import kotlinx.coroutines.flow.asStateFlow import kotlinx.coroutines.flow.asStateFlow
@@ -44,6 +50,7 @@ class CreateMemoryViewModel(
companion object { companion object {
private const val TAG = "CreateMemoryViewModel" private const val TAG = "CreateMemoryViewModel"
private const val MIN_RECORDING_DURATION = 1 private const val MIN_RECORDING_DURATION = 1
private const val SEGMENT_DURATION_SECONDS = 60
} }
private val audioPlayer = AudioPlayer(context) private val audioPlayer = AudioPlayer(context)
@@ -272,13 +279,11 @@ class CreateMemoryViewModel(
Log.w(TAG, "录音时间太短: ${capture.durationSeconds}s") Log.w(TAG, "录音时间太短: ${capture.durationSeconds}s")
return@launch return@launch
} }
val audioBytes = try { sendRecordingSegments(
File(capture.session.filePath).readBytes() filePath = capture.session.filePath,
} catch (e: Exception) { voiceSessionId = capture.session.voiceSessionId,
Log.e(TAG, "读取录音文件失败: ${e.message}") durationSeconds = capture.durationSeconds,
return@launch )
}
sendAudioMessage(audioBytes, capture.session.filePath, capture.durationSeconds)
} }
is RecordingFinishResult.Failed -> { is RecordingFinishResult.Failed -> {
Log.e(TAG, "停止录音失败: ${result.capture.cause.message}") Log.e(TAG, "停止录音失败: ${result.capture.cause.message}")
@@ -345,8 +350,16 @@ class CreateMemoryViewModel(
} }
} }
private suspend fun sendAudioMessage(audioBytes: ByteArray, filePath: String, durationSeconds: Int) { /**
Log.d(TAG, "准备发送音频消息,大小: ${audioBytes.size}, 时长: ${durationSeconds}s") * 将录音文件按 [SEGMENT_DURATION_SECONDS] 切片,逐段通过 [sendAudioSegment] 发送。
* 短录音(<= 段时长)不会真正切片,直接整文件复制为单段。
*/
private suspend fun sendRecordingSegments(
filePath: String,
voiceSessionId: String,
durationSeconds: Int,
) {
Log.d(TAG, "准备切片并发送,时长: ${durationSeconds}s, 段长: ${SEGMENT_DURATION_SECONDS}s")
if (conversationId.value == null) { if (conversationId.value == null) {
Log.d(TAG, "对话ID为空开始创建新对话") Log.d(TAG, "对话ID为空开始创建新对话")
@@ -358,9 +371,9 @@ class CreateMemoryViewModel(
return return
} }
conversationId.value?.let { id -> val id = conversationId.value ?: return
val tempMessageId = "audio_user_${System.currentTimeMillis()}"
val tempMessageId = "audio_user_${System.currentTimeMillis()}"
val tempMessage = MessageDto( val tempMessage = MessageDto(
id = tempMessageId, id = tempMessageId,
conversationId = id, conversationId = id,
@@ -370,23 +383,59 @@ class CreateMemoryViewModel(
messageType = "audio" messageType = "audio"
) )
historyMessages.value = historyMessages.value + tempMessage historyMessages.value = historyMessages.value + tempMessage
_audioFilePaths.value = _audioFilePaths.value + (tempMessageId to filePath) _audioFilePaths.value = _audioFilePaths.value + (tempMessageId to filePath)
_audioDurations.value = _audioDurations.value + (tempMessageId to durationSeconds) _audioDurations.value = _audioDurations.value + (tempMessageId to durationSeconds)
val segmentFiles = try {
AudioSegmenter.split(
inputPath = filePath,
segmentDurationSeconds = SEGMENT_DURATION_SECONDS,
cacheDir = context.cacheDir,
)
} catch (e: Exception) {
Log.e(TAG, "音频切片失败: ${e.message}", e)
connectionStatus.value = "音频处理失败: ${e.message}"
return
}
try { try {
val segments = PendingVoiceSegmentBatchBuilder.build(
segmentFiles = segmentFiles,
conversationId = id,
voiceSessionId = voiceSessionId,
)
isTyping.value = true isTyping.value = true
conversationRealtime.sendAudioMessage(audioBytes, id, durationSeconds) Log.d(TAG, "并行发送 ${segments.size} 个音频段,服务端按 segmentIndex 排序拼接")
Log.d(TAG, "音频消息发送成功") coroutineScope {
segments.map { segment ->
async {
conversationRealtime.sendAudioSegment(
AudioSegmentRequest(
audioBytes = segment.audioBytes,
conversationId = segment.conversationId,
voiceSessionId = segment.voiceSessionId,
segmentIndex = segment.segmentIndex,
duration = segment.durationSeconds,
isLast = segment.isLast,
clientSegmentId = segment.clientSegmentId,
)
)
Log.d(TAG, "已发送段 ${segment.segmentIndex}/${segments.size - 1}, last=${segment.isLast}")
}
}.awaitAll()
}
Log.d(TAG, "全部音频段发送完成")
} catch (e: Exception) { } catch (e: Exception) {
isTyping.value = false isTyping.value = false
Log.e(TAG, "音频消息发送失败: ${e.message}", e) Log.e(TAG, "音频发送失败: ${e.message}", e)
connectionStatus.value = "发送失败: ${e.message}" connectionStatus.value = "发送失败: ${e.message}"
errorMessages.value = (errorMessages.value + "发送失败: ${e.message}").takeLast(10) errorMessages.value = (errorMessages.value + "发送失败: ${e.message}").takeLast(10)
historyMessages.value = historyMessages.value.filter { it.id != tempMessageId } historyMessages.value = historyMessages.value.filter { it.id != tempMessageId }
_audioFilePaths.value = _audioFilePaths.value - tempMessageId _audioFilePaths.value = _audioFilePaths.value - tempMessageId
_audioDurations.value = _audioDurations.value - tempMessageId _audioDurations.value = _audioDurations.value - tempMessageId
} } finally {
segmentFiles.forEach { it.file.delete() }
} }
} }

View File

@@ -24,7 +24,7 @@ class ViewModelFactory(private val context: Context) : ViewModelProvider.Factory
override fun <T : ViewModel> create(modelClass: Class<T>): T { override fun <T : ViewModel> create(modelClass: Class<T>): T {
return when { return when {
modelClass.isAssignableFrom(CreateMemoryViewModel::class.java) -> { modelClass.isAssignableFrom(CreateMemoryViewModel::class.java) -> {
val recorder = VoiceRecorder(context).apply { recordingLimit = 60 } val recorder = VoiceRecorder(context).apply { recordingLimit = 600 }
CreateMemoryViewModel( CreateMemoryViewModel(
conversationRepository = container.conversationRepository, conversationRepository = container.conversationRepository,
chapterRepository = container.chapterRepository, chapterRepository = container.chapterRepository,