feat: surgery pipeline API, video inference, voice confirm, and tests

- Add FastAPI routes for surgery start/end, results, pending confirmation (WAV upload), and health checks. - Implement RTSP/Hikvision capture, consumable classification, session manager, MinIO/Baidu voice resolution, and DB persistence. - Add documentation (client API, video backends, staging checklist) and sample camera/RTSP config. - Add pytest suite (API contract, session manager, voice, repositories, pipeline persistence) and httpx dev dependency. - Replace deprecated HTTP_422_UNPROCESSABLE_ENTITY with HTTP_422_UNPROCESSABLE_CONTENT. - Fix SurgeryPipeline DB reads to use an explicit transaction with autobegin disabled. Made-with: Cursor
2026-04-21 18:33:54 +08:00
parent d1a3d029ec
commit 04866559db
56 changed files with 7196 additions and 43 deletions
--- a/app/services/baidu_speech.py
+++ b/app/services/baidu_speech.py
@@ -0,0 +1,66 @@
+from __future__ import annotations
+
+from threading import Lock
+from typing import Any
+
+from aip import AipSpeech
+
+from app.config import settings
+
+
+class BaiduSpeechNotConfiguredError(RuntimeError):
+    """未配置 BAIDU_SPEECH_APP_ID / API_KEY / SECRET_KEY 时调用接口会抛出。"""
+
+
+class BaiduSpeechService:
+    """百度短语音识别（asr）与在线语音合成（synthesis），基于 `baidu-aip` 的 `AipSpeech`。"""
+
+    def __init__(self) -> None:
+        self._client: AipSpeech | None = None
+        self._lock = Lock()
+
+    @property
+    def configured(self) -> bool:
+        return settings.baidu_speech_configured
+
+    def _client_or_raise(self) -> AipSpeech:
+        if not self.configured:
+            raise BaiduSpeechNotConfiguredError(
+                "百度语音未配置：请设置 BAIDU_SPEECH_APP_ID、BAIDU_SPEECH_API_KEY、"
+                "BAIDU_SPEECH_SECRET_KEY"
+            )
+        with self._lock:
+            if self._client is None:
+                client = AipSpeech(
+                    settings.baidu_speech_app_id,
+                    settings.baidu_speech_api_key,
+                    settings.baidu_speech_secret_key,
+                )
+                if settings.baidu_speech_connection_timeout_ms is not None:
+                    client.setConnectionTimeoutInMillis(
+                        settings.baidu_speech_connection_timeout_ms
+                    )
+                if settings.baidu_speech_socket_timeout_ms is not None:
+                    client.setSocketTimeoutInMillis(settings.baidu_speech_socket_timeout_ms)
+                self._client = client
+            return self._client
+
+    def asr(
+        self,
+        speech: bytes | None = None,
+        format: str = "pcm",
+        rate: int = 16000,
+        options: dict[str, Any] | None = None,
+    ) -> dict[str, Any]:
+        """短语音识别。返回百度 JSON（含 `err_no`、`result` 等）。"""
+        return self._client_or_raise().asr(speech, format, rate, options)
+
+    def synthesis(
+        self,
+        text: str,
+        lang: str = "zh",
+        ctp: int = 1,
+        options: dict[str, Any] | None = None,
+    ) -> bytes | dict[str, Any]:
+        """在线语音合成。成功为音频二进制；失败为错误信息 dict。"""
+        return self._client_or_raise().synthesis(text, lang, ctp, options)