Files
operating-room-monitor-server/app/services/voice_resolution.py
Kevin 132702aea9 refactor: 统一耗材视觉算法并扩展语音确认至全量候选清单
- 以 ConsumableVisionAlgorithmService 替代 consumable_classifier 与 tear_action;
  可选手部检测权重,未配置时全帧分类;时间窗众数与 Excel 白名单配置。
- 语音待确认:ASR 先匹配 pending topk,再匹配本台 candidate_consumables;
  记账 item_id 与 vision 一致使用 name_to_code。
- 更新 config、Compose、.env.example、依赖(pandas/openpyxl)与测试。

Made-with: Cursor
2026-04-22 16:31:12 +08:00

364 lines
13 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Resolve pending consumable confirmation from uploaded WAV: MinIO + Baidu ASR + parse."""
from __future__ import annotations
import json
from dataclasses import dataclass
from fastapi.concurrency import run_in_threadpool
from loguru import logger
from app.config import Settings
from app.database import AsyncSessionLocal
from app.repositories.voice_audits import VoiceAuditRepository
from app.services.audio_wav import WavDecodeError, wav_bytes_to_pcm16k_mono_s16le
from app.services.baidu_speech import BaiduSpeechNotConfiguredError, BaiduSpeechService
from app.services.minio_audio_storage import MinioAudioStorageService, StoredAudio
from app.services.video.session_manager import CameraSessionManager
from app.services.voice_confirm import (
is_rejection_phrase,
match_voice_choice_against_candidates,
parse_voice_choice,
)
from app.surgery_errors import SurgeryPipelineError
@dataclass(frozen=True)
class VoiceResolveResult:
resolved_label: str | None
rejected: bool
asr_text: str | None
audio_object_key: str | None
message: str
class VoiceConfirmationService:
"""Upload audio to MinIO, run Baidu ASR, parse choice, resolve pending queue entry."""
def __init__(
self,
settings: Settings,
sessions: CameraSessionManager,
baidu: BaiduSpeechService,
minio: MinioAudioStorageService,
audits: VoiceAuditRepository,
) -> None:
self._s = settings
self._sessions = sessions
self._baidu = baidu
self._minio = minio
self._audits = audits
async def resolve_from_wav(
self,
*,
surgery_id: str,
confirmation_id: str,
wav_bytes: bytes,
filename: str,
content_type: str | None,
) -> VoiceResolveResult:
_ = filename # reserved for logging / future MIME sniff
if len(wav_bytes) > self._s.voice_upload_max_bytes:
await self._persist_audit(
surgery_id=surgery_id,
confirmation_id=confirmation_id,
status="invalid_audio",
audio_object_key=None,
audio_content_type=content_type,
audio_size_bytes=len(wav_bytes),
audio_sha256=None,
asr_text=None,
resolved_label=None,
options_snapshot_json=None,
error_message="音频超过大小限制",
)
raise SurgeryPipelineError(
"VOICE_AUDIO_INVALID",
f"音频大小超过限制(最大 {self._s.voice_upload_max_bytes} 字节)。",
)
if not self._minio.configured:
raise SurgeryPipelineError(
"MINIO_NOT_CONFIGURED",
"服务端未配置 MinIO无法保存语音追溯文件。",
)
if not self._baidu.configured:
raise SurgeryPipelineError(
"BAIDU_NOT_CONFIGURED",
"服务端未配置百度语音,无法进行语音识别。",
)
pending = self._sessions.get_pending_confirmation_by_id(
surgery_id, confirmation_id
)
if pending is None:
raise SurgeryPipelineError(
"CONFIRMATION_NOT_FOUND",
"未找到该待确认项或已处理。",
)
option_labels = [a.strip() for a, _ in pending.options if a.strip()]
options_snapshot = json.dumps(
[{"label": a, "confidence": b} for a, b in pending.options],
ensure_ascii=False,
)
stored: StoredAudio | None = None
try:
await run_in_threadpool(self._minio.ensure_bucket)
stored = await run_in_threadpool(
lambda: self._minio.upload_voice_wav(
surgery_id=surgery_id,
confirmation_id=confirmation_id,
data=wav_bytes,
content_type=content_type,
)
)
except Exception as exc:
logger.warning("MinIO upload failed: {}", exc)
await self._persist_audit(
surgery_id=surgery_id,
confirmation_id=confirmation_id,
status="upload_failed",
audio_object_key=None,
audio_content_type=content_type,
audio_size_bytes=len(wav_bytes),
audio_sha256=None,
asr_text=None,
resolved_label=None,
options_snapshot_json=options_snapshot,
error_message=str(exc),
)
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=str(exc))
raise SurgeryPipelineError(
"MINIO_UPLOAD_FAILED",
f"语音文件上传失败:{exc}",
) from exc
try:
pcm = await run_in_threadpool(wav_bytes_to_pcm16k_mono_s16le, wav_bytes)
except WavDecodeError as exc:
await self._persist_audit(
surgery_id=surgery_id,
confirmation_id=confirmation_id,
status="invalid_audio",
audio_object_key=stored.object_key,
audio_content_type=content_type,
audio_size_bytes=stored.size_bytes,
audio_sha256=stored.sha256_hex,
asr_text=None,
resolved_label=None,
options_snapshot_json=options_snapshot,
error_message=str(exc),
)
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=str(exc))
raise SurgeryPipelineError(
"VOICE_AUDIO_INVALID",
f"无法解析 WAV 音频:{exc}",
) from exc
try:
asr_payload = await run_in_threadpool(
self._baidu.asr, pcm, "pcm", 16000, None
)
except BaiduSpeechNotConfiguredError as exc:
raise SurgeryPipelineError(
"BAIDU_NOT_CONFIGURED",
str(exc),
) from exc
except Exception as exc:
await self._persist_audit(
surgery_id=surgery_id,
confirmation_id=confirmation_id,
status="asr_failed",
audio_object_key=stored.object_key,
audio_content_type=content_type,
audio_size_bytes=stored.size_bytes,
audio_sha256=stored.sha256_hex,
asr_text=None,
resolved_label=None,
options_snapshot_json=options_snapshot,
error_message=str(exc),
)
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=str(exc))
raise SurgeryPipelineError(
"VOICE_ASR_FAILED",
f"语音识别调用失败:{exc}",
) from exc
if not isinstance(asr_payload, dict):
msg = "ASR 返回格式异常"
await self._persist_audit(
surgery_id=surgery_id,
confirmation_id=confirmation_id,
status="asr_failed",
audio_object_key=stored.object_key,
audio_content_type=content_type,
audio_size_bytes=stored.size_bytes,
audio_sha256=stored.sha256_hex,
asr_text=None,
resolved_label=None,
options_snapshot_json=options_snapshot,
error_message=msg,
)
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=msg)
raise SurgeryPipelineError("VOICE_ASR_FAILED", msg)
if asr_payload.get("err_no") != 0:
msg = (
f"asr_err_{asr_payload.get('err_no')}: "
f"{asr_payload.get('err_msg')}"
)
await self._persist_audit(
surgery_id=surgery_id,
confirmation_id=confirmation_id,
status="asr_failed",
audio_object_key=stored.object_key,
audio_content_type=content_type,
audio_size_bytes=stored.size_bytes,
audio_sha256=stored.sha256_hex,
asr_text=None,
resolved_label=None,
options_snapshot_json=options_snapshot,
error_message=msg,
)
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=msg)
raise SurgeryPipelineError("VOICE_ASR_FAILED", msg)
results = asr_payload.get("result")
text: str | None = None
if isinstance(results, list) and results:
text = str(results[0])
elif isinstance(results, str):
text = results
text = (text or "").strip()
if not text:
msg = "语音识别结果为空"
await self._persist_audit(
surgery_id=surgery_id,
confirmation_id=confirmation_id,
status="asr_failed",
audio_object_key=stored.object_key,
audio_content_type=content_type,
audio_size_bytes=stored.size_bytes,
audio_sha256=stored.sha256_hex,
asr_text=None,
resolved_label=None,
options_snapshot_json=options_snapshot,
error_message=msg,
)
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=msg)
raise SurgeryPipelineError("VOICE_ASR_FAILED", msg)
self._sessions.record_voice_trace(surgery_id, asr_text=text, error=None)
rejected = is_rejection_phrase(text)
chosen: str | None = None
if not rejected:
chosen = parse_voice_choice(text, option_labels)
if chosen is None:
surgery_candidates = self._sessions.get_surgery_candidate_consumables(
surgery_id
)
chosen = match_voice_choice_against_candidates(
text, surgery_candidates
)
if not rejected and not chosen:
msg = (
"无法从语音中匹配候选项或本台手术候选清单中的耗材名称,"
"请重试或说「不是」否认全部"
)
await self._persist_audit(
surgery_id=surgery_id,
confirmation_id=confirmation_id,
status="parse_failed",
audio_object_key=stored.object_key,
audio_content_type=content_type,
audio_size_bytes=stored.size_bytes,
audio_sha256=stored.sha256_hex,
asr_text=text,
resolved_label=None,
options_snapshot_json=options_snapshot,
error_message=msg,
)
self._sessions.record_voice_trace(surgery_id, asr_text=text, error=msg)
raise SurgeryPipelineError("VOICE_PARSE_FAILED", msg)
await self._sessions.resolve_pending_confirmation(
surgery_id,
confirmation_id,
chosen_label=chosen,
rejected=rejected,
)
final_status = "rejected" if rejected else "recognized"
await self._persist_audit(
surgery_id=surgery_id,
confirmation_id=confirmation_id,
status=final_status,
audio_object_key=stored.object_key,
audio_content_type=content_type,
audio_size_bytes=stored.size_bytes,
audio_sha256=stored.sha256_hex,
asr_text=text,
resolved_label=chosen if not rejected else None,
options_snapshot_json=options_snapshot,
error_message=None,
)
if rejected:
return VoiceResolveResult(
resolved_label=None,
rejected=True,
asr_text=text,
audio_object_key=stored.object_key,
message="已否认全部候选,未记消耗。",
)
return VoiceResolveResult(
resolved_label=chosen,
rejected=False,
asr_text=text,
audio_object_key=stored.object_key,
message="已确认并记一条消耗。",
)
async def _persist_audit(
self,
*,
surgery_id: str,
confirmation_id: str,
status: str,
audio_object_key: str | None,
audio_content_type: str | None,
audio_size_bytes: int | None,
audio_sha256: str | None,
asr_text: str | None,
resolved_label: str | None,
options_snapshot_json: str | None,
error_message: str | None,
) -> None:
try:
async with AsyncSessionLocal() as session:
async with session.begin():
await self._audits.save_audit(
session,
surgery_id=surgery_id,
confirmation_id=confirmation_id,
status=status,
audio_object_key=audio_object_key,
audio_content_type=audio_content_type,
audio_size_bytes=audio_size_bytes,
audio_sha256=audio_sha256,
asr_text=asr_text,
resolved_label=resolved_label,
options_snapshot_json=options_snapshot_json,
error_message=error_message,
)
except Exception as exc:
logger.error("Persist voice audit failed: {}", exc)