"""Resolve pending consumable confirmation from uploaded WAV: MinIO + Baidu ASR + parse.""" from __future__ import annotations import json from dataclasses import dataclass from fastapi.concurrency import run_in_threadpool from loguru import logger from app.config import Settings from app.database import AsyncSessionLocal from app.repositories.voice_audits import VoiceAuditRepository from app.services.audio_wav import WavDecodeError, wav_bytes_to_pcm16k_mono_s16le from app.services.baidu_speech import BaiduSpeechNotConfiguredError, BaiduSpeechService from app.services.minio_audio_storage import MinioAudioStorageService, StoredAudio from app.services.video.session_manager import CameraSessionManager from app.services.voice_confirm import is_rejection_phrase, parse_voice_choice from app.surgery_errors import SurgeryPipelineError @dataclass(frozen=True) class VoiceResolveResult: resolved_label: str | None rejected: bool asr_text: str | None audio_object_key: str | None message: str class VoiceConfirmationService: """Upload audio to MinIO, run Baidu ASR, parse choice, resolve pending queue entry.""" def __init__( self, settings: Settings, sessions: CameraSessionManager, baidu: BaiduSpeechService, minio: MinioAudioStorageService, audits: VoiceAuditRepository, ) -> None: self._s = settings self._sessions = sessions self._baidu = baidu self._minio = minio self._audits = audits async def resolve_from_wav( self, *, surgery_id: str, confirmation_id: str, wav_bytes: bytes, filename: str, content_type: str | None, ) -> VoiceResolveResult: _ = filename # reserved for logging / future MIME sniff if len(wav_bytes) > self._s.voice_upload_max_bytes: await self._persist_audit( surgery_id=surgery_id, confirmation_id=confirmation_id, status="invalid_audio", audio_object_key=None, audio_content_type=content_type, audio_size_bytes=len(wav_bytes), audio_sha256=None, asr_text=None, resolved_label=None, options_snapshot_json=None, error_message="音频超过大小限制", ) raise SurgeryPipelineError( "VOICE_AUDIO_INVALID", f"音频大小超过限制(最大 {self._s.voice_upload_max_bytes} 字节)。", ) if not self._minio.configured: raise SurgeryPipelineError( "MINIO_NOT_CONFIGURED", "服务端未配置 MinIO,无法保存语音追溯文件。", ) if not self._baidu.configured: raise SurgeryPipelineError( "BAIDU_NOT_CONFIGURED", "服务端未配置百度语音,无法进行语音识别。", ) pending = self._sessions.get_pending_confirmation_by_id( surgery_id, confirmation_id ) if pending is None: raise SurgeryPipelineError( "CONFIRMATION_NOT_FOUND", "未找到该待确认项或已处理。", ) option_labels = [a.strip() for a, _ in pending.options if a.strip()] options_snapshot = json.dumps( [{"label": a, "confidence": b} for a, b in pending.options], ensure_ascii=False, ) stored: StoredAudio | None = None try: await run_in_threadpool(self._minio.ensure_bucket) stored = await run_in_threadpool( lambda: self._minio.upload_voice_wav( surgery_id=surgery_id, confirmation_id=confirmation_id, data=wav_bytes, content_type=content_type, ) ) except Exception as exc: logger.warning("MinIO upload failed: {}", exc) await self._persist_audit( surgery_id=surgery_id, confirmation_id=confirmation_id, status="upload_failed", audio_object_key=None, audio_content_type=content_type, audio_size_bytes=len(wav_bytes), audio_sha256=None, asr_text=None, resolved_label=None, options_snapshot_json=options_snapshot, error_message=str(exc), ) self._sessions.record_voice_trace(surgery_id, asr_text=None, error=str(exc)) raise SurgeryPipelineError( "MINIO_UPLOAD_FAILED", f"语音文件上传失败:{exc}", ) from exc try: pcm = await run_in_threadpool(wav_bytes_to_pcm16k_mono_s16le, wav_bytes) except WavDecodeError as exc: await self._persist_audit( surgery_id=surgery_id, confirmation_id=confirmation_id, status="invalid_audio", audio_object_key=stored.object_key, audio_content_type=content_type, audio_size_bytes=stored.size_bytes, audio_sha256=stored.sha256_hex, asr_text=None, resolved_label=None, options_snapshot_json=options_snapshot, error_message=str(exc), ) self._sessions.record_voice_trace(surgery_id, asr_text=None, error=str(exc)) raise SurgeryPipelineError( "VOICE_AUDIO_INVALID", f"无法解析 WAV 音频:{exc}", ) from exc try: asr_payload = await run_in_threadpool( self._baidu.asr, pcm, "pcm", 16000, None ) except BaiduSpeechNotConfiguredError as exc: raise SurgeryPipelineError( "BAIDU_NOT_CONFIGURED", str(exc), ) from exc except Exception as exc: await self._persist_audit( surgery_id=surgery_id, confirmation_id=confirmation_id, status="asr_failed", audio_object_key=stored.object_key, audio_content_type=content_type, audio_size_bytes=stored.size_bytes, audio_sha256=stored.sha256_hex, asr_text=None, resolved_label=None, options_snapshot_json=options_snapshot, error_message=str(exc), ) self._sessions.record_voice_trace(surgery_id, asr_text=None, error=str(exc)) raise SurgeryPipelineError( "VOICE_ASR_FAILED", f"语音识别调用失败:{exc}", ) from exc if not isinstance(asr_payload, dict): msg = "ASR 返回格式异常" await self._persist_audit( surgery_id=surgery_id, confirmation_id=confirmation_id, status="asr_failed", audio_object_key=stored.object_key, audio_content_type=content_type, audio_size_bytes=stored.size_bytes, audio_sha256=stored.sha256_hex, asr_text=None, resolved_label=None, options_snapshot_json=options_snapshot, error_message=msg, ) self._sessions.record_voice_trace(surgery_id, asr_text=None, error=msg) raise SurgeryPipelineError("VOICE_ASR_FAILED", msg) if asr_payload.get("err_no") != 0: msg = ( f"asr_err_{asr_payload.get('err_no')}: " f"{asr_payload.get('err_msg')}" ) await self._persist_audit( surgery_id=surgery_id, confirmation_id=confirmation_id, status="asr_failed", audio_object_key=stored.object_key, audio_content_type=content_type, audio_size_bytes=stored.size_bytes, audio_sha256=stored.sha256_hex, asr_text=None, resolved_label=None, options_snapshot_json=options_snapshot, error_message=msg, ) self._sessions.record_voice_trace(surgery_id, asr_text=None, error=msg) raise SurgeryPipelineError("VOICE_ASR_FAILED", msg) results = asr_payload.get("result") text: str | None = None if isinstance(results, list) and results: text = str(results[0]) elif isinstance(results, str): text = results text = (text or "").strip() if not text: msg = "语音识别结果为空" await self._persist_audit( surgery_id=surgery_id, confirmation_id=confirmation_id, status="asr_failed", audio_object_key=stored.object_key, audio_content_type=content_type, audio_size_bytes=stored.size_bytes, audio_sha256=stored.sha256_hex, asr_text=None, resolved_label=None, options_snapshot_json=options_snapshot, error_message=msg, ) self._sessions.record_voice_trace(surgery_id, asr_text=None, error=msg) raise SurgeryPipelineError("VOICE_ASR_FAILED", msg) self._sessions.record_voice_trace(surgery_id, asr_text=text, error=None) rejected = is_rejection_phrase(text) chosen: str | None = None if not rejected: chosen = parse_voice_choice(text, option_labels) if not rejected and not chosen: msg = "无法从语音中匹配候选项,请重试或说「不是」否认全部" await self._persist_audit( surgery_id=surgery_id, confirmation_id=confirmation_id, status="parse_failed", audio_object_key=stored.object_key, audio_content_type=content_type, audio_size_bytes=stored.size_bytes, audio_sha256=stored.sha256_hex, asr_text=text, resolved_label=None, options_snapshot_json=options_snapshot, error_message=msg, ) self._sessions.record_voice_trace(surgery_id, asr_text=text, error=msg) raise SurgeryPipelineError("VOICE_PARSE_FAILED", msg) await self._sessions.resolve_pending_confirmation( surgery_id, confirmation_id, chosen_label=chosen, rejected=rejected, ) final_status = "rejected" if rejected else "recognized" await self._persist_audit( surgery_id=surgery_id, confirmation_id=confirmation_id, status=final_status, audio_object_key=stored.object_key, audio_content_type=content_type, audio_size_bytes=stored.size_bytes, audio_sha256=stored.sha256_hex, asr_text=text, resolved_label=chosen if not rejected else None, options_snapshot_json=options_snapshot, error_message=None, ) if rejected: return VoiceResolveResult( resolved_label=None, rejected=True, asr_text=text, audio_object_key=stored.object_key, message="已否认全部候选,未记消耗。", ) return VoiceResolveResult( resolved_label=chosen, rejected=False, asr_text=text, audio_object_key=stored.object_key, message="已确认并记一条消耗。", ) async def _persist_audit( self, *, surgery_id: str, confirmation_id: str, status: str, audio_object_key: str | None, audio_content_type: str | None, audio_size_bytes: int | None, audio_sha256: str | None, asr_text: str | None, resolved_label: str | None, options_snapshot_json: str | None, error_message: str | None, ) -> None: try: async with AsyncSessionLocal() as session: async with session.begin(): await self._audits.save_audit( session, surgery_id=surgery_id, confirmation_id=confirmation_id, status=status, audio_object_key=audio_object_key, audio_content_type=audio_content_type, audio_size_bytes=audio_size_bytes, audio_sha256=audio_sha256, asr_text=asr_text, resolved_label=resolved_label, options_snapshot_json=options_snapshot_json, error_message=error_message, ) except Exception as exc: logger.error("Persist voice audit failed: {}", exc)