feat: surgery pipeline API, video inference, voice confirm, and tests

- Add FastAPI routes for surgery start/end, results, pending confirmation (WAV upload), and health checks. - Implement RTSP/Hikvision capture, consumable classification, session manager, MinIO/Baidu voice resolution, and DB persistence. - Add documentation (client API, video backends, staging checklist) and sample camera/RTSP config. - Add pytest suite (API contract, session manager, voice, repositories, pipeline persistence) and httpx dev dependency. - Replace deprecated HTTP_422_UNPROCESSABLE_ENTITY with HTTP_422_UNPROCESSABLE_CONTENT. - Fix SurgeryPipeline DB reads to use an explicit transaction with autobegin disabled. Made-with: Cursor
2026-04-21 18:33:54 +08:00
parent d1a3d029ec
commit 04866559db
56 changed files with 7196 additions and 43 deletions
--- a/app/config.py
+++ b/app/config.py
@@ -1,12 +1,149 @@
+import json
+from pathlib import Path
+from urllib.parse import quote_plus
+from typing import Any, Literal
+
+from pydantic import Field, field_validator
 from pydantic_settings import BaseSettings, SettingsConfigDict

+_PACKAGE_DIR = Path(__file__).resolve().parent
+
+
+def _default_consumable_classifier_weights() -> str:
+    """耗材识别与分类（YOLO-cls）：`app/resources/consumable_classifier.pt`。"""
+    return str(_PACKAGE_DIR / "resources" / "consumable_classifier.pt")
+
+
+def _default_tear_action_weights() -> str:
+    """撕扯耗材动作识别：`app/resources/tear_action.pt`。"""
+    return str(_PACKAGE_DIR / "resources" / "tear_action.pt")
+
+
+def _default_camera_rtsp_urls_sample_path() -> str:
+    """示例映射路径（可复制为自有 `camera_rtsp_urls.json` 后在环境变量中引用）。"""
+    return str(_PACKAGE_DIR / "resources" / "camera_rtsp_urls.sample.json")
+

 class Settings(BaseSettings):
    """Application configuration loaded from environment / .env."""

-    database_url: str = (
-        "postgresql+asyncpg://postgres:postgres@localhost:5432/operation_room"
+    database_url: str | None = None
+    postgres_user: str = "postgres"
+    postgres_password: str = "postgres"
+    postgres_db: str = "operation_room"
+    postgres_host: str = "localhost"
+    postgres_port: int = 35432
+    consumable_classifier_weights: str | None = None
+    consumable_classifier_imgsz: int = 224
+    #: Explicit Ultralytics device (e.g. cpu, mps, cuda:0). Empty -> macOS prefers MPS; Linux prefers CUDA if available.
+    consumable_classifier_device: str = ""
+    consumable_classifier_topk: int = 5
+    tear_action_weights: str | None = None
+    tear_action_imgsz: int = 224
+    tear_action_device: str = ""
+    tear_action_topk: int = 5
+    #: 开始/结束手术时调用录制流水线的最大尝试次数（含首次）。
+    surgery_recording_max_attempts: int = Field(default=3, ge=1, le=20)
+    #: 两次尝试之间的等待秒数。
+    surgery_recording_retry_delay_seconds: float = Field(default=1.0, ge=0.0, le=60.0)
+
+    # --- 视频：RTSP / 海康 SDK 双后端 ---
+    #: 默认后端：`rtsp` | `hikvision_sdk` | `auto`（auto：SDK 动态库可用且 HIKVISION_SDK_ENABLED 时优先 SDK）。
+    video_default_backend: Literal["rtsp", "hikvision_sdk", "auto"] = "rtsp"
+    #: 按摄像头覆盖后端，JSON 对象，例如 `{"or-cam-01":"rtsp","or-cam-02":"hikvision_sdk"}`。
+    video_camera_backend_overrides_json: str = ""
+    #: 单 URL 模板，例如 `rtsp://user:pass@192.168.1.64:554/Streaming/Channels/101`（可用 `{camera_id}`）。
+    video_rtsp_url_template: str = ""
+    #: 每路 RTSP 完整 URL，JSON 对象；与 `video_rtsp_urls_json_file` 合并时，**本字段覆盖同键**。
+    video_rtsp_urls_json: str = ""
+    #: 从文件加载 camera_id -> rtsp_url（UTF-8 JSON 对象）。示例见 app/resources/camera_rtsp_urls.sample.json。
+    video_rtsp_urls_json_file: str = ""
+    #: 打开 RTSP 并读到首帧的超时（秒）。
+    video_open_timeout_sec: float = Field(default=15.0, ge=1.0, le=120.0)
+    #: 连续读帧失败达到该次数后释放连接并尝试重连。
+    video_read_failure_reconnect_threshold: int = Field(default=15, ge=1, le=500)
+    #: 重连前等待秒数（亦用于 open 失败后的退避）。
+    video_reconnect_backoff_seconds: float = Field(default=1.0, ge=0.1, le=60.0)
+    #: 推理抽帧间隔（秒）。
+    video_inference_interval_sec: float = Field(default=2.0, ge=0.2, le=60.0)
+    #: 分类置信度阈值（兼容旧逻辑）：低于 `video_voice_confirm_min_confidence` 的帧不参与自动确认或语音追问。
+    video_inference_confidence_threshold: float = Field(
+        default=0.35, ge=0.0, le=1.0
    )
+    #: 达到或超过该置信度时，自动记一条耗材消耗（需通过候选清单校验）。
+    video_auto_confirm_confidence: float = Field(default=0.55, ge=0.0, le=1.0)
+    #: 置信度处于 [本值, video_auto_confirm_confidence) 时尝试语音追问（需有可播报的 top 候选）。
+    video_voice_confirm_min_confidence: float = Field(default=0.35, ge=0.0, le=1.0)
+    #: 是否启用低置信度时的人工确认（客户端拉取待确认项并回传结果；不依赖服务端麦克风/扬声器）。
+    voice_confirmation_enabled: bool = True
+    #: 语音确认记帐时的 doctor_id。
+    video_voice_confirm_doctor_id: str = "voice"
+    #: （已弃用）服务端本机录音秒数；当前闭环由客户端采集语音，此项仅保留兼容旧配置。
+    voice_record_seconds: float = Field(default=5.0, ge=1.0, le=30.0)
+    #: （已弃用）服务端 ffmpeg 音频输入；当前闭环不依赖服务端录音。
+    voice_ffmpeg_input: str = ""
+    #: 手术结束后归档写库失败时，后台重试落库的间隔（秒）。
+    archive_persist_retry_interval_seconds: float = Field(
+        default=30.0, ge=5.0, le=3600.0
+    )
+    #: 同一物品重复记一条消耗的最短间隔（秒）。
+    video_detail_cooldown_sec: float = Field(default=15.0, ge=0.0, le=3600.0)
+    #: 送模型 JPEG 质量。
+    video_jpeg_quality: int = Field(default=85, ge=40, le=100)
+    #: 写入消耗明细时的 doctor_id（无外部医生 ID 来源时的占位）。
+    video_result_doctor_id: str = "vision"
+
+    #: 海康 SDK `.so` 所在目录（容器内可挂载 `/opt/hikvision/lib`）。
+    hikvision_lib_dir: str = "/opt/hikvision/lib"
+    #: 为 true 时 `auto` 模式才会优先走 SDK；亦为 SDK 登录的前提之一。
+    hikvision_sdk_enabled: bool = False
+    hikvision_device_ip: str = ""
+    hikvision_device_port: int = Field(default=8000, ge=1, le=65535)
+    hikvision_user: str = ""
+    hikvision_password: str = ""
+    #: 预览 URL 模板中的通道号等（如 101 主码流常写作 channel 拼接）。
+    hikvision_channel: int = Field(default=1, ge=1, le=512)
+    #: SDK 登录成功后用于拉流的 RTSP 模板；占位符如 `{ip} {user} {password} {channel} {camera_id}`。
+    hikvision_preview_rtsp_template: str = ""
+    #: 与 VIDEO_RTSP_URLS_JSON 类似，按 camera_id 指定 SDK 路径下的预览 RTSP。
+    hikvision_camera_rtsp_urls_json: str = ""
+    #: SDK 登录失败时是否仍尝试用通用 RTSP 映射拉流（仅当能解析到 RTSP URL 时）。
+    hikvision_sdk_fallback_to_rtsp: bool = True
+    #: 百度语音（`baidu-aip` AipSpeech：短语音识别 + 在线合成）。在控制台创建应用后填写。
+    baidu_speech_app_id: str = ""
+    baidu_speech_api_key: str = ""
+    baidu_speech_secret_key: str = ""
+    #: 建立连接超时（毫秒）。未设置则使用 SDK 默认。
+    baidu_speech_connection_timeout_ms: int | None = None
+    #: 传输数据超时（毫秒）。未设置则使用 SDK 默认。
+    baidu_speech_socket_timeout_ms: int | None = None
+
+    # --- MinIO：语音确认原始 WAV 追溯存储 ---
+    #: 为空则视为未配置 MinIO，语音确认接口将返回业务错误（联调需配置）。
+    minio_endpoint: str = ""
+    minio_access_key: str = ""
+    minio_secret_key: str = ""
+    minio_bucket: str = "operation-room-voice"
+    #: 是否使用 HTTPS（MinIO 常见为 false，走 9000 明文或 TLS）。
+    minio_secure: bool = False
+    #: 可选区域（部分 S3 兼容实现需要）。
+    minio_region: str = ""
+    #: 上传医生语音 WAV 的最大字节数（默认 10MB）。
+    voice_upload_max_bytes: int = Field(default=10 * 1024 * 1024, ge=64, le=50 * 1024 * 1024)
+
+    @field_validator("consumable_classifier_weights", mode="before")
+    @classmethod
+    def consumable_classifier_weights_default(cls, value: object) -> str:
+        if value is None or value == "":
+            return _default_consumable_classifier_weights()
+        return str(value)
+
+    @field_validator("tear_action_weights", mode="before")
+    @classmethod
+    def tear_action_weights_default(cls, value: object) -> str:
+        if value is None or value == "":
+            return _default_tear_action_weights()
+        return str(value)

    model_config = SettingsConfigDict(
        env_file=".env",
@@ -14,5 +151,92 @@ class Settings(BaseSettings):
        extra="ignore",
    )

+    @property
+    def sqlalchemy_database_url(self) -> str:
+        component_values = (
+            self.postgres_user,
+            self.postgres_password,
+            self.postgres_db,
+            self.postgres_host,
+            self.postgres_port,
+        )
+        default_component_values = (
+            "postgres",
+            "postgres",
+            "operation_room",
+            "localhost",
+            35432,
+        )
+
+        if component_values != default_component_values or not self.database_url:
+            user = quote_plus(self.postgres_user)
+            password = quote_plus(self.postgres_password)
+            database = quote_plus(self.postgres_db)
+            return (
+                "postgresql+asyncpg://"
+                f"{user}:{password}@{self.postgres_host}:{self.postgres_port}/{database}"
+            )
+
+        return self.database_url
+
+    @property
+    def baidu_speech_configured(self) -> bool:
+        return bool(
+            self.baidu_speech_app_id.strip()
+            and self.baidu_speech_api_key.strip()
+            and self.baidu_speech_secret_key.strip()
+        )
+
+    @property
+    def minio_configured(self) -> bool:
+        return bool(
+            self.minio_endpoint.strip()
+            and self.minio_access_key.strip()
+            and self.minio_secret_key.strip()
+            and self.minio_bucket.strip()
+        )
+
+    @staticmethod
+    def _parse_rtsp_urls_object(raw: str) -> dict[str, str]:
+        raw = (raw or "").strip()
+        if not raw:
+            return {}
+        try:
+            data: Any = json.loads(raw)
+        except json.JSONDecodeError as exc:
+            raise ValueError(f"Invalid VIDEO_RTSP_URLS_JSON: {exc}") from exc
+        if not isinstance(data, dict):
+            raise ValueError("VIDEO_RTSP_URLS_JSON must be a JSON object")
+        return {str(k): str(v) for k, v in data.items()}
+
+    def video_rtsp_url_map(self) -> dict[str, str]:
+        """合并文件与内联 JSON；内联键覆盖文件。"""
+        merged: dict[str, str] = {}
+        path_raw = (self.video_rtsp_urls_json_file or "").strip()
+        if path_raw:
+            path = Path(path_raw).expanduser()
+            if not path.is_file():
+                raise ValueError(
+                    f"VIDEO_RTSP_URLS_JSON_FILE is set but file not found: {path}"
+                )
+            try:
+                file_obj: Any = json.loads(path.read_text(encoding="utf-8"))
+            except json.JSONDecodeError as exc:
+                raise ValueError(
+                    f"Invalid JSON in VIDEO_RTSP_URLS_JSON_FILE {path}: {exc}"
+                ) from exc
+            if not isinstance(file_obj, dict):
+                raise ValueError(
+                    f"VIDEO_RTSP_URLS_JSON_FILE must contain a JSON object: {path}"
+                )
+            merged = {str(k): str(v) for k, v in file_obj.items()}
+        merged.update(self._parse_rtsp_urls_object(self.video_rtsp_urls_json))
+        return merged
+
+    @property
+    def camera_rtsp_urls_sample_path(self) -> str:
+        """仓库内示例映射路径（供文档与联调引用）。"""
+        return _default_camera_rtsp_urls_sample_path()
+

 settings = Settings()