feat: surgery pipeline API, video inference, voice confirm, and tests
- Add FastAPI routes for surgery start/end, results, pending confirmation (WAV upload), and health checks. - Implement RTSP/Hikvision capture, consumable classification, session manager, MinIO/Baidu voice resolution, and DB persistence. - Add documentation (client API, video backends, staging checklist) and sample camera/RTSP config. - Add pytest suite (API contract, session manager, voice, repositories, pipeline persistence) and httpx dev dependency. - Replace deprecated HTTP_422_UNPROCESSABLE_ENTITY with HTTP_422_UNPROCESSABLE_CONTENT. - Fix SurgeryPipeline DB reads to use an explicit transaction with autobegin disabled. Made-with: Cursor
This commit is contained in:
349
app/services/voice_resolution.py
Normal file
349
app/services/voice_resolution.py
Normal file
@@ -0,0 +1,349 @@
|
||||
"""Resolve pending consumable confirmation from uploaded WAV: MinIO + Baidu ASR + parse."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
|
||||
from fastapi.concurrency import run_in_threadpool
|
||||
from loguru import logger
|
||||
|
||||
from app.config import Settings
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.repositories.voice_audits import VoiceAuditRepository
|
||||
from app.services.audio_wav import WavDecodeError, wav_bytes_to_pcm16k_mono_s16le
|
||||
from app.services.baidu_speech import BaiduSpeechNotConfiguredError, BaiduSpeechService
|
||||
from app.services.minio_audio_storage import MinioAudioStorageService, StoredAudio
|
||||
from app.services.video.session_manager import CameraSessionManager
|
||||
from app.services.voice_confirm import is_rejection_phrase, parse_voice_choice
|
||||
from app.surgery_errors import SurgeryPipelineError
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class VoiceResolveResult:
|
||||
resolved_label: str | None
|
||||
rejected: bool
|
||||
asr_text: str | None
|
||||
audio_object_key: str | None
|
||||
message: str
|
||||
|
||||
|
||||
class VoiceConfirmationService:
|
||||
"""Upload audio to MinIO, run Baidu ASR, parse choice, resolve pending queue entry."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
settings: Settings,
|
||||
sessions: CameraSessionManager,
|
||||
baidu: BaiduSpeechService,
|
||||
minio: MinioAudioStorageService,
|
||||
audits: VoiceAuditRepository,
|
||||
) -> None:
|
||||
self._s = settings
|
||||
self._sessions = sessions
|
||||
self._baidu = baidu
|
||||
self._minio = minio
|
||||
self._audits = audits
|
||||
|
||||
async def resolve_from_wav(
|
||||
self,
|
||||
*,
|
||||
surgery_id: str,
|
||||
confirmation_id: str,
|
||||
wav_bytes: bytes,
|
||||
filename: str,
|
||||
content_type: str | None,
|
||||
) -> VoiceResolveResult:
|
||||
_ = filename # reserved for logging / future MIME sniff
|
||||
|
||||
if len(wav_bytes) > self._s.voice_upload_max_bytes:
|
||||
await self._persist_audit(
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
status="invalid_audio",
|
||||
audio_object_key=None,
|
||||
audio_content_type=content_type,
|
||||
audio_size_bytes=len(wav_bytes),
|
||||
audio_sha256=None,
|
||||
asr_text=None,
|
||||
resolved_label=None,
|
||||
options_snapshot_json=None,
|
||||
error_message="音频超过大小限制",
|
||||
)
|
||||
raise SurgeryPipelineError(
|
||||
"VOICE_AUDIO_INVALID",
|
||||
f"音频大小超过限制(最大 {self._s.voice_upload_max_bytes} 字节)。",
|
||||
)
|
||||
|
||||
if not self._minio.configured:
|
||||
raise SurgeryPipelineError(
|
||||
"MINIO_NOT_CONFIGURED",
|
||||
"服务端未配置 MinIO,无法保存语音追溯文件。",
|
||||
)
|
||||
|
||||
if not self._baidu.configured:
|
||||
raise SurgeryPipelineError(
|
||||
"BAIDU_NOT_CONFIGURED",
|
||||
"服务端未配置百度语音,无法进行语音识别。",
|
||||
)
|
||||
|
||||
pending = self._sessions.get_pending_confirmation_by_id(
|
||||
surgery_id, confirmation_id
|
||||
)
|
||||
if pending is None:
|
||||
raise SurgeryPipelineError(
|
||||
"CONFIRMATION_NOT_FOUND",
|
||||
"未找到该待确认项或已处理。",
|
||||
)
|
||||
|
||||
option_labels = [a.strip() for a, _ in pending.options if a.strip()]
|
||||
options_snapshot = json.dumps(
|
||||
[{"label": a, "confidence": b} for a, b in pending.options],
|
||||
ensure_ascii=False,
|
||||
)
|
||||
|
||||
stored: StoredAudio | None = None
|
||||
try:
|
||||
await run_in_threadpool(self._minio.ensure_bucket)
|
||||
stored = await run_in_threadpool(
|
||||
lambda: self._minio.upload_voice_wav(
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
data=wav_bytes,
|
||||
content_type=content_type,
|
||||
)
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("MinIO upload failed: {}", exc)
|
||||
await self._persist_audit(
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
status="upload_failed",
|
||||
audio_object_key=None,
|
||||
audio_content_type=content_type,
|
||||
audio_size_bytes=len(wav_bytes),
|
||||
audio_sha256=None,
|
||||
asr_text=None,
|
||||
resolved_label=None,
|
||||
options_snapshot_json=options_snapshot,
|
||||
error_message=str(exc),
|
||||
)
|
||||
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=str(exc))
|
||||
raise SurgeryPipelineError(
|
||||
"MINIO_UPLOAD_FAILED",
|
||||
f"语音文件上传失败:{exc}",
|
||||
) from exc
|
||||
|
||||
try:
|
||||
pcm = await run_in_threadpool(wav_bytes_to_pcm16k_mono_s16le, wav_bytes)
|
||||
except WavDecodeError as exc:
|
||||
await self._persist_audit(
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
status="invalid_audio",
|
||||
audio_object_key=stored.object_key,
|
||||
audio_content_type=content_type,
|
||||
audio_size_bytes=stored.size_bytes,
|
||||
audio_sha256=stored.sha256_hex,
|
||||
asr_text=None,
|
||||
resolved_label=None,
|
||||
options_snapshot_json=options_snapshot,
|
||||
error_message=str(exc),
|
||||
)
|
||||
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=str(exc))
|
||||
raise SurgeryPipelineError(
|
||||
"VOICE_AUDIO_INVALID",
|
||||
f"无法解析 WAV 音频:{exc}",
|
||||
) from exc
|
||||
|
||||
try:
|
||||
asr_payload = await run_in_threadpool(
|
||||
self._baidu.asr, pcm, "pcm", 16000, None
|
||||
)
|
||||
except BaiduSpeechNotConfiguredError as exc:
|
||||
raise SurgeryPipelineError(
|
||||
"BAIDU_NOT_CONFIGURED",
|
||||
str(exc),
|
||||
) from exc
|
||||
except Exception as exc:
|
||||
await self._persist_audit(
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
status="asr_failed",
|
||||
audio_object_key=stored.object_key,
|
||||
audio_content_type=content_type,
|
||||
audio_size_bytes=stored.size_bytes,
|
||||
audio_sha256=stored.sha256_hex,
|
||||
asr_text=None,
|
||||
resolved_label=None,
|
||||
options_snapshot_json=options_snapshot,
|
||||
error_message=str(exc),
|
||||
)
|
||||
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=str(exc))
|
||||
raise SurgeryPipelineError(
|
||||
"VOICE_ASR_FAILED",
|
||||
f"语音识别调用失败:{exc}",
|
||||
) from exc
|
||||
|
||||
if not isinstance(asr_payload, dict):
|
||||
msg = "ASR 返回格式异常"
|
||||
await self._persist_audit(
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
status="asr_failed",
|
||||
audio_object_key=stored.object_key,
|
||||
audio_content_type=content_type,
|
||||
audio_size_bytes=stored.size_bytes,
|
||||
audio_sha256=stored.sha256_hex,
|
||||
asr_text=None,
|
||||
resolved_label=None,
|
||||
options_snapshot_json=options_snapshot,
|
||||
error_message=msg,
|
||||
)
|
||||
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=msg)
|
||||
raise SurgeryPipelineError("VOICE_ASR_FAILED", msg)
|
||||
|
||||
if asr_payload.get("err_no") != 0:
|
||||
msg = (
|
||||
f"asr_err_{asr_payload.get('err_no')}: "
|
||||
f"{asr_payload.get('err_msg')}"
|
||||
)
|
||||
await self._persist_audit(
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
status="asr_failed",
|
||||
audio_object_key=stored.object_key,
|
||||
audio_content_type=content_type,
|
||||
audio_size_bytes=stored.size_bytes,
|
||||
audio_sha256=stored.sha256_hex,
|
||||
asr_text=None,
|
||||
resolved_label=None,
|
||||
options_snapshot_json=options_snapshot,
|
||||
error_message=msg,
|
||||
)
|
||||
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=msg)
|
||||
raise SurgeryPipelineError("VOICE_ASR_FAILED", msg)
|
||||
|
||||
results = asr_payload.get("result")
|
||||
text: str | None = None
|
||||
if isinstance(results, list) and results:
|
||||
text = str(results[0])
|
||||
elif isinstance(results, str):
|
||||
text = results
|
||||
text = (text or "").strip()
|
||||
|
||||
if not text:
|
||||
msg = "语音识别结果为空"
|
||||
await self._persist_audit(
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
status="asr_failed",
|
||||
audio_object_key=stored.object_key,
|
||||
audio_content_type=content_type,
|
||||
audio_size_bytes=stored.size_bytes,
|
||||
audio_sha256=stored.sha256_hex,
|
||||
asr_text=None,
|
||||
resolved_label=None,
|
||||
options_snapshot_json=options_snapshot,
|
||||
error_message=msg,
|
||||
)
|
||||
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=msg)
|
||||
raise SurgeryPipelineError("VOICE_ASR_FAILED", msg)
|
||||
|
||||
self._sessions.record_voice_trace(surgery_id, asr_text=text, error=None)
|
||||
|
||||
rejected = is_rejection_phrase(text)
|
||||
chosen: str | None = None
|
||||
if not rejected:
|
||||
chosen = parse_voice_choice(text, option_labels)
|
||||
|
||||
if not rejected and not chosen:
|
||||
msg = "无法从语音中匹配候选项,请重试或说「不是」否认全部"
|
||||
await self._persist_audit(
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
status="parse_failed",
|
||||
audio_object_key=stored.object_key,
|
||||
audio_content_type=content_type,
|
||||
audio_size_bytes=stored.size_bytes,
|
||||
audio_sha256=stored.sha256_hex,
|
||||
asr_text=text,
|
||||
resolved_label=None,
|
||||
options_snapshot_json=options_snapshot,
|
||||
error_message=msg,
|
||||
)
|
||||
self._sessions.record_voice_trace(surgery_id, asr_text=text, error=msg)
|
||||
raise SurgeryPipelineError("VOICE_PARSE_FAILED", msg)
|
||||
|
||||
await self._sessions.resolve_pending_confirmation(
|
||||
surgery_id,
|
||||
confirmation_id,
|
||||
chosen_label=chosen,
|
||||
rejected=rejected,
|
||||
)
|
||||
|
||||
final_status = "rejected" if rejected else "recognized"
|
||||
await self._persist_audit(
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
status=final_status,
|
||||
audio_object_key=stored.object_key,
|
||||
audio_content_type=content_type,
|
||||
audio_size_bytes=stored.size_bytes,
|
||||
audio_sha256=stored.sha256_hex,
|
||||
asr_text=text,
|
||||
resolved_label=chosen if not rejected else None,
|
||||
options_snapshot_json=options_snapshot,
|
||||
error_message=None,
|
||||
)
|
||||
|
||||
if rejected:
|
||||
return VoiceResolveResult(
|
||||
resolved_label=None,
|
||||
rejected=True,
|
||||
asr_text=text,
|
||||
audio_object_key=stored.object_key,
|
||||
message="已否认全部候选,未记消耗。",
|
||||
)
|
||||
return VoiceResolveResult(
|
||||
resolved_label=chosen,
|
||||
rejected=False,
|
||||
asr_text=text,
|
||||
audio_object_key=stored.object_key,
|
||||
message="已确认并记一条消耗。",
|
||||
)
|
||||
|
||||
async def _persist_audit(
|
||||
self,
|
||||
*,
|
||||
surgery_id: str,
|
||||
confirmation_id: str,
|
||||
status: str,
|
||||
audio_object_key: str | None,
|
||||
audio_content_type: str | None,
|
||||
audio_size_bytes: int | None,
|
||||
audio_sha256: str | None,
|
||||
asr_text: str | None,
|
||||
resolved_label: str | None,
|
||||
options_snapshot_json: str | None,
|
||||
error_message: str | None,
|
||||
) -> None:
|
||||
try:
|
||||
async with AsyncSessionLocal() as session:
|
||||
async with session.begin():
|
||||
await self._audits.save_audit(
|
||||
session,
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
status=status,
|
||||
audio_object_key=audio_object_key,
|
||||
audio_content_type=audio_content_type,
|
||||
audio_size_bytes=audio_size_bytes,
|
||||
audio_sha256=audio_sha256,
|
||||
asr_text=asr_text,
|
||||
resolved_label=resolved_label,
|
||||
options_snapshot_json=options_snapshot_json,
|
||||
error_message=error_message,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("Persist voice audit failed: {}", exc)
|
||||
Reference in New Issue
Block a user