Files
operating-room-monitor-server/app/services/voice_resolution.py

696 lines
26 KiB
Python
Raw Normal View History

"""Resolve pending consumable confirmation from uploaded WAV: MinIO + Baidu ASR + parse."""
from __future__ import annotations
import json
from dataclasses import dataclass
from fastapi.concurrency import run_in_threadpool
from loguru import logger
from app.config import Settings
from app.services.voice_file_log import emit_voice_event
from app.database import AsyncSessionLocal
from app.repositories.voice_audits import VoiceAuditRepository
from app.services.audio_wav import WavDecodeError, wav_bytes_to_pcm16k_mono_s16le
from app.services.baidu_speech import BaiduSpeechNotConfiguredError, BaiduSpeechService
from app.services.minio_audio_storage import MinioAudioStorageService, StoredAudio
from app.services.video.session_manager import CameraSessionManager
from app.services.voice_confirm import (
is_rejection_phrase,
match_voice_choice_against_candidates,
parse_voice_choice,
)
from app.surgery_errors import SurgeryPipelineError
@dataclass(frozen=True)
class VoiceResolveResult:
resolved_label: str | None
rejected: bool
asr_text: str | None
audio_object_key: str | None
message: str
class VoiceConfirmationService:
"""Upload audio to MinIO, run Baidu ASR, parse choice, resolve pending queue entry."""
def __init__(
self,
settings: Settings,
sessions: CameraSessionManager,
baidu: BaiduSpeechService,
minio: MinioAudioStorageService,
audits: VoiceAuditRepository,
) -> None:
self._s = settings
self._sessions = sessions
self._baidu = baidu
self._minio = minio
self._audits = audits
def _emit_voice_trace(
self,
*,
source: str,
status: str,
surgery_id: str,
confirmation_id: str,
asr_text: str | None = None,
resolved_label: str | None = None,
rejected: bool | str | None = None,
error_message: str | None = None,
audio_object_key: str | None = None,
) -> None:
emit_voice_event(
self._s,
surgery_id=surgery_id,
source=source,
status=status,
confirmation_id=confirmation_id,
asr_text=asr_text,
resolved_label=resolved_label,
rejected=rejected,
error_message=error_message,
audio_object_key=audio_object_key,
)
def synthesize_prompt_to_mp3(self, text: str) -> bytes:
"""百度在线语音合成,供浏览器直接播放,与 `voice_confirm._synthesize_to_temp_mp3` 同参。"""
t = (text or "").strip()
if not t:
raise SurgeryPipelineError("TTS_TEXT_EMPTY", "提示文本为空。")
try:
r = self._baidu.synthesis(
t, "zh", 1, {"spd": 5, "pit": 5, "vol": 9, "per": 0}
)
except BaiduSpeechNotConfiguredError as exc:
raise SurgeryPipelineError(
"BAIDU_NOT_CONFIGURED",
"服务端未配置百度语音,无法合成播报音频。",
) from exc
if isinstance(r, dict):
raise SurgeryPipelineError("TTS_ERROR", f"百度 TTS 失败: {r!r}")
return r
async def resolve_from_wav(
self,
*,
surgery_id: str,
confirmation_id: str,
wav_bytes: bytes,
filename: str,
content_type: str | None,
) -> VoiceResolveResult:
_ = filename # reserved for logging / future MIME sniff
if len(wav_bytes) > self._s.voice_upload_max_bytes:
await self._persist_audit(
surgery_id=surgery_id,
confirmation_id=confirmation_id,
status="invalid_audio",
audio_object_key=None,
audio_content_type=content_type,
audio_size_bytes=len(wav_bytes),
audio_sha256=None,
asr_text=None,
resolved_label=None,
options_snapshot_json=None,
error_message="音频超过大小限制",
)
self._emit_voice_trace(
source="wav",
status="invalid_audio",
surgery_id=surgery_id,
confirmation_id=confirmation_id,
error_message="音频超过大小限制",
)
raise SurgeryPipelineError(
"VOICE_AUDIO_INVALID",
f"音频大小超过限制(最大 {self._s.voice_upload_max_bytes} 字节)。",
)
if not self._minio.configured:
self._emit_voice_trace(
source="wav",
status="minio_not_configured",
surgery_id=surgery_id,
confirmation_id=confirmation_id,
error_message="服务端未配置 MinIO无法保存语音追溯文件。",
)
raise SurgeryPipelineError(
"MINIO_NOT_CONFIGURED",
"服务端未配置 MinIO无法保存语音追溯文件。",
)
if not self._baidu.configured:
self._emit_voice_trace(
source="wav",
status="baidu_not_configured",
surgery_id=surgery_id,
confirmation_id=confirmation_id,
error_message="服务端未配置百度语音,无法进行语音识别。",
)
raise SurgeryPipelineError(
"BAIDU_NOT_CONFIGURED",
"服务端未配置百度语音,无法进行语音识别。",
)
pending = self._sessions.get_pending_confirmation_by_id(
surgery_id, confirmation_id
)
if pending is None:
self._emit_voice_trace(
source="wav",
status="confirmation_not_found",
surgery_id=surgery_id,
confirmation_id=confirmation_id,
error_message="未找到该待确认项或已处理。",
)
raise SurgeryPipelineError(
"CONFIRMATION_NOT_FOUND",
"未找到该待确认项或已处理。",
)
option_labels = [a.strip() for a, _ in pending.options if a.strip()]
options_snapshot = json.dumps(
[{"label": a, "confidence": b} for a, b in pending.options],
ensure_ascii=False,
)
stored: StoredAudio | None = None
try:
await run_in_threadpool(self._minio.ensure_bucket)
stored = await run_in_threadpool(
lambda: self._minio.upload_voice_wav(
surgery_id=surgery_id,
confirmation_id=confirmation_id,
data=wav_bytes,
content_type=content_type,
)
)
except Exception as exc:
logger.warning("MinIO upload failed: {}", exc)
await self._persist_audit(
surgery_id=surgery_id,
confirmation_id=confirmation_id,
status="upload_failed",
audio_object_key=None,
audio_content_type=content_type,
audio_size_bytes=len(wav_bytes),
audio_sha256=None,
asr_text=None,
resolved_label=None,
options_snapshot_json=options_snapshot,
error_message=str(exc),
)
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=str(exc))
self._emit_voice_trace(
source="wav",
status="upload_failed",
surgery_id=surgery_id,
confirmation_id=confirmation_id,
error_message=str(exc),
)
raise SurgeryPipelineError(
"MINIO_UPLOAD_FAILED",
f"语音文件上传失败:{exc}",
) from exc
try:
pcm = await run_in_threadpool(wav_bytes_to_pcm16k_mono_s16le, wav_bytes)
except WavDecodeError as exc:
await self._persist_audit(
surgery_id=surgery_id,
confirmation_id=confirmation_id,
status="invalid_audio",
audio_object_key=stored.object_key,
audio_content_type=content_type,
audio_size_bytes=stored.size_bytes,
audio_sha256=stored.sha256_hex,
asr_text=None,
resolved_label=None,
options_snapshot_json=options_snapshot,
error_message=str(exc),
)
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=str(exc))
self._emit_voice_trace(
source="wav",
status="invalid_audio",
surgery_id=surgery_id,
confirmation_id=confirmation_id,
error_message=str(exc),
audio_object_key=stored.object_key,
)
raise SurgeryPipelineError(
"VOICE_AUDIO_INVALID",
f"无法解析 WAV 音频:{exc}",
) from exc
try:
asr_payload = await run_in_threadpool(
self._baidu.asr, pcm, "pcm", 16000, None
)
except BaiduSpeechNotConfiguredError as exc:
self._emit_voice_trace(
source="wav",
status="baidu_not_configured",
surgery_id=surgery_id,
confirmation_id=confirmation_id,
error_message=str(exc),
audio_object_key=stored.object_key,
)
raise SurgeryPipelineError(
"BAIDU_NOT_CONFIGURED",
str(exc),
) from exc
except Exception as exc:
await self._persist_audit(
surgery_id=surgery_id,
confirmation_id=confirmation_id,
status="asr_failed",
audio_object_key=stored.object_key,
audio_content_type=content_type,
audio_size_bytes=stored.size_bytes,
audio_sha256=stored.sha256_hex,
asr_text=None,
resolved_label=None,
options_snapshot_json=options_snapshot,
error_message=str(exc),
)
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=str(exc))
self._emit_voice_trace(
source="wav",
status="asr_failed",
surgery_id=surgery_id,
confirmation_id=confirmation_id,
error_message=str(exc),
audio_object_key=stored.object_key,
)
raise SurgeryPipelineError(
"VOICE_ASR_FAILED",
f"语音识别调用失败:{exc}",
) from exc
if not isinstance(asr_payload, dict):
msg = "ASR 返回格式异常"
await self._persist_audit(
surgery_id=surgery_id,
confirmation_id=confirmation_id,
status="asr_failed",
audio_object_key=stored.object_key,
audio_content_type=content_type,
audio_size_bytes=stored.size_bytes,
audio_sha256=stored.sha256_hex,
asr_text=None,
resolved_label=None,
options_snapshot_json=options_snapshot,
error_message=msg,
)
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=msg)
self._emit_voice_trace(
source="wav",
status="asr_failed",
surgery_id=surgery_id,
confirmation_id=confirmation_id,
error_message=msg,
audio_object_key=stored.object_key,
)
raise SurgeryPipelineError("VOICE_ASR_FAILED", msg)
if asr_payload.get("err_no") != 0:
msg = (
f"asr_err_{asr_payload.get('err_no')}: "
f"{asr_payload.get('err_msg')}"
)
await self._persist_audit(
surgery_id=surgery_id,
confirmation_id=confirmation_id,
status="asr_failed",
audio_object_key=stored.object_key,
audio_content_type=content_type,
audio_size_bytes=stored.size_bytes,
audio_sha256=stored.sha256_hex,
asr_text=None,
resolved_label=None,
options_snapshot_json=options_snapshot,
error_message=msg,
)
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=msg)
self._emit_voice_trace(
source="wav",
status="asr_failed",
surgery_id=surgery_id,
confirmation_id=confirmation_id,
error_message=msg,
audio_object_key=stored.object_key,
)
raise SurgeryPipelineError("VOICE_ASR_FAILED", msg)
results = asr_payload.get("result")
text: str | None = None
if isinstance(results, list) and results:
text = str(results[0])
elif isinstance(results, str):
text = results
text = (text or "").strip()
if not text:
msg = "语音识别结果为空"
await self._persist_audit(
surgery_id=surgery_id,
confirmation_id=confirmation_id,
status="asr_failed",
audio_object_key=stored.object_key,
audio_content_type=content_type,
audio_size_bytes=stored.size_bytes,
audio_sha256=stored.sha256_hex,
asr_text=None,
resolved_label=None,
options_snapshot_json=options_snapshot,
error_message=msg,
)
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=msg)
self._emit_voice_trace(
source="wav",
status="asr_failed",
surgery_id=surgery_id,
confirmation_id=confirmation_id,
error_message=msg,
audio_object_key=stored.object_key,
)
raise SurgeryPipelineError("VOICE_ASR_FAILED", msg)
self._sessions.record_voice_trace(surgery_id, asr_text=text, error=None)
rejected = is_rejection_phrase(text)
chosen: str | None = None
if not rejected:
chosen = parse_voice_choice(text, option_labels)
if chosen is None:
surgery_candidates = self._sessions.get_surgery_candidate_consumables(
surgery_id
)
chosen = match_voice_choice_against_candidates(
text, surgery_candidates
)
if not rejected and not chosen:
_, retry_remaining = await self._sessions.record_voice_parse_failure(
surgery_id, confirmation_id
)
base = (
"无法从语音中匹配候选项或本台手术候选清单中的耗材名称,"
"请重试或说「不是」否认全部。"
)
if retry_remaining > 0:
msg = (
f"{base} 本次未听清或未能解析,"
f"您还可重试 {retry_remaining} 次,"
"请说「第一个」「第二个」等序号或候选项全名。"
)
else:
msg = (
f"{base} 本轮重试机会已用完,"
"请再清晰地说序号/全名,或说「不是」否认全部。"
)
await self._persist_audit(
surgery_id=surgery_id,
confirmation_id=confirmation_id,
status="parse_failed",
audio_object_key=stored.object_key,
audio_content_type=content_type,
audio_size_bytes=stored.size_bytes,
audio_sha256=stored.sha256_hex,
asr_text=text,
resolved_label=None,
options_snapshot_json=options_snapshot,
error_message=msg,
)
self._sessions.record_voice_trace(surgery_id, asr_text=text, error=msg)
self._emit_voice_trace(
source="wav",
status="parse_failed",
surgery_id=surgery_id,
confirmation_id=confirmation_id,
asr_text=text,
error_message=msg,
audio_object_key=stored.object_key,
)
raise SurgeryPipelineError(
"VOICE_PARSE_FAILED",
msg,
extra={
"confirmation_id": confirmation_id,
"retry_remaining": retry_remaining,
},
)
await self._sessions.resolve_pending_confirmation(
surgery_id,
confirmation_id,
chosen_label=chosen,
rejected=rejected,
)
final_status = "rejected" if rejected else "recognized"
await self._persist_audit(
surgery_id=surgery_id,
confirmation_id=confirmation_id,
status=final_status,
audio_object_key=stored.object_key,
audio_content_type=content_type,
audio_size_bytes=stored.size_bytes,
audio_sha256=stored.sha256_hex,
asr_text=text,
resolved_label=chosen if not rejected else None,
options_snapshot_json=options_snapshot,
error_message=None,
)
self._emit_voice_trace(
source="wav",
status=final_status,
surgery_id=surgery_id,
confirmation_id=confirmation_id,
asr_text=text,
resolved_label=chosen if not rejected else None,
rejected=rejected,
audio_object_key=stored.object_key,
)
if rejected:
return VoiceResolveResult(
resolved_label=None,
rejected=True,
asr_text=text,
audio_object_key=stored.object_key,
message="已否认全部候选,未记消耗。",
)
return VoiceResolveResult(
resolved_label=chosen,
rejected=False,
asr_text=text,
audio_object_key=stored.object_key,
message="已确认并记一条消耗。",
)
async def resolve_from_recognized_text(
self,
*,
surgery_id: str,
confirmation_id: str,
recognized_text: str,
) -> VoiceResolveResult:
"""浏览器 Web Speech 等客户端本机识别后的文本,不经 MinIO/百度 ASR解析规则与 `resolve_from_wav` 一致。"""
pending = self._sessions.get_pending_confirmation_by_id(
surgery_id, confirmation_id
)
if pending is None:
self._emit_voice_trace(
source="text",
status="confirmation_not_found",
surgery_id=surgery_id,
confirmation_id=confirmation_id,
error_message="未找到该待确认项或已处理。",
)
raise SurgeryPipelineError(
"CONFIRMATION_NOT_FOUND",
"未找到该待确认项或已处理。",
)
option_labels = [a.strip() for a, _ in pending.options if a.strip()]
options_snapshot = json.dumps(
[{"label": a, "confidence": b} for a, b in pending.options],
ensure_ascii=False,
)
text = (recognized_text or "").strip()
if not text:
await self._persist_audit(
surgery_id=surgery_id,
confirmation_id=confirmation_id,
status="client_stt_empty",
audio_object_key=None,
audio_content_type=None,
audio_size_bytes=None,
audio_sha256=None,
asr_text=None,
resolved_label=None,
options_snapshot_json=options_snapshot,
error_message="客户端识别文本为空",
)
self._sessions.record_voice_trace(surgery_id, asr_text=None, error="empty text")
self._emit_voice_trace(
source="text",
status="client_stt_empty",
surgery_id=surgery_id,
confirmation_id=confirmation_id,
error_message="客户端识别文本为空",
)
raise SurgeryPipelineError("VOICE_TEXT_EMPTY", "recognized_text 为空。")
self._sessions.record_voice_trace(surgery_id, asr_text=text, error=None)
rejected = is_rejection_phrase(text)
chosen: str | None = None
if not rejected:
chosen = parse_voice_choice(text, option_labels)
if chosen is None:
surgery_candidates = self._sessions.get_surgery_candidate_consumables(
surgery_id
)
chosen = match_voice_choice_against_candidates(text, surgery_candidates)
if not rejected and not chosen:
_, retry_remaining = await self._sessions.record_voice_parse_failure(
surgery_id, confirmation_id
)
base = (
"无法从文本中匹配候选项或本台手术候选清单中的耗材名称,"
"请重试或说「不是」否认全部。"
)
if retry_remaining > 0:
msg = (
f"{base} 本次未能解析,"
f"您还可重试 {retry_remaining} 次,"
"请输入「第一个」「第二个」等或候选项全名。"
)
else:
msg = (
f"{base} 本轮重试机会已用完,"
"请再输入序号/全名,或说「不是」否认全部。"
)
await self._persist_audit(
surgery_id=surgery_id,
confirmation_id=confirmation_id,
status="client_stt_parse_failed",
audio_object_key=None,
audio_content_type=None,
audio_size_bytes=None,
audio_sha256=None,
asr_text=text,
resolved_label=None,
options_snapshot_json=options_snapshot,
error_message=msg,
)
self._sessions.record_voice_trace(surgery_id, asr_text=text, error=msg)
self._emit_voice_trace(
source="text",
status="client_stt_parse_failed",
surgery_id=surgery_id,
confirmation_id=confirmation_id,
asr_text=text,
error_message=msg,
)
raise SurgeryPipelineError(
"VOICE_PARSE_FAILED",
msg,
extra={
"confirmation_id": confirmation_id,
"retry_remaining": retry_remaining,
},
)
await self._sessions.resolve_pending_confirmation(
surgery_id,
confirmation_id,
chosen_label=chosen,
rejected=rejected,
)
final_status = "rejected" if rejected else "recognized"
await self._persist_audit(
surgery_id=surgery_id,
confirmation_id=confirmation_id,
status=final_status,
audio_object_key=None,
audio_content_type=None,
audio_size_bytes=None,
audio_sha256=None,
asr_text=text,
resolved_label=chosen if not rejected else None,
options_snapshot_json=options_snapshot,
error_message=None,
)
self._emit_voice_trace(
source="text",
status=final_status,
surgery_id=surgery_id,
confirmation_id=confirmation_id,
asr_text=text,
resolved_label=chosen if not rejected else None,
rejected=rejected,
)
if rejected:
return VoiceResolveResult(
resolved_label=None,
rejected=True,
asr_text=text,
audio_object_key=None,
message="已否认全部候选,未记消耗。",
)
return VoiceResolveResult(
resolved_label=chosen,
rejected=False,
asr_text=text,
audio_object_key=None,
message="已确认并记一条消耗。",
)
async def _persist_audit(
self,
*,
surgery_id: str,
confirmation_id: str,
status: str,
audio_object_key: str | None,
audio_content_type: str | None,
audio_size_bytes: int | None,
audio_sha256: str | None,
asr_text: str | None,
resolved_label: str | None,
options_snapshot_json: str | None,
error_message: str | None,
) -> None:
try:
async with AsyncSessionLocal() as session:
async with session.begin():
await self._audits.save_audit(
session,
surgery_id=surgery_id,
confirmation_id=confirmation_id,
status=status,
audio_object_key=audio_object_key,
audio_content_type=audio_content_type,
audio_size_bytes=audio_size_bytes,
audio_sha256=audio_sha256,
asr_text=asr_text,
resolved_label=resolved_label,
options_snapshot_json=options_snapshot_json,
error_message=error_message,
)
except Exception as exc:
logger.error("Persist voice audit failed: {}", exc)