refactor: 统一耗材视觉算法并扩展语音确认至全量候选清单

- 以 ConsumableVisionAlgorithmService 替代 consumable_classifier 与 tear_action；可选手部检测权重，未配置时全帧分类；时间窗众数与 Excel 白名单配置。 - 语音待确认：ASR 先匹配 pending topk，再匹配本台 candidate_consumables；记账 item_id 与 vision 一致使用 name_to_code。 - 更新 config、Compose、.env.example、依赖（pandas/openpyxl）与测试。 Made-with: Cursor
2026-04-22 16:31:12 +08:00
parent 4c4550d58b
commit 132702aea9
18 changed files with 791 additions and 476 deletions
--- a/app/config.py
+++ b/app/config.py
@@ -14,11 +14,6 @@ def _default_consumable_classifier_weights() -> str:
    return str(_PACKAGE_DIR / "resources" / "consumable_classifier.pt")


-def _default_tear_action_weights() -> str:
-    """撕扯耗材动作识别：`app/resources/tear_action.pt`。"""
-    return str(_PACKAGE_DIR / "resources" / "tear_action.pt")
-
-
 def _default_camera_rtsp_urls_sample_path() -> str:
    """示例映射路径（可复制为自有 `camera_rtsp_urls.json` 后在环境变量中引用）。"""
    return str(_PACKAGE_DIR / "resources" / "camera_rtsp_urls.sample.json")
@@ -38,10 +33,19 @@ class Settings(BaseSettings):
    #: Explicit Ultralytics device (e.g. cpu, mps, cuda:0). Empty -> macOS prefers MPS; Linux prefers CUDA if available.
    consumable_classifier_device: str = ""
    consumable_classifier_topk: int = 5
-    tear_action_weights: str | None = None
-    tear_action_imgsz: int = 224
-    tear_action_device: str = ""
-    tear_action_topk: int = 5
+    #: 耗材分类 top1 最低置信度（手部 ROI 或全帧送入分类器后的门槛）。
+    consumable_min_cls_confidence: float = Field(default=0.5, ge=0.0, le=1.0)
+    #: 可选：`视频中的商品信息表.xlsx`（含「商品名称」「产品编码」）；空则物品 id 用名称本身。
+    consumable_catalog_xlsx_path: str = ""
+    #: 与离线脚本一致的时间窗（秒）；窗内多次推理取众数后再走自动记账 / 语音追问逻辑。
+    consumable_vision_window_sec: float = Field(default=15.0, ge=0.5, le=600.0)
+    #: 手部检测 YOLO 权重；空或文件不存在时退化为「全帧送分类器」（兼容仅有关分类权重的环境）。
+    hand_detection_weights: str = ""
+    hand_detection_imgsz: int = Field(default=640, ge=32, le=4096)
+    hand_detection_conf: float = Field(default=0.25, ge=0.0, le=1.0)
+    hand_detection_pad_ratio: float = Field(default=0.30, ge=0.0, le=2.0)
+    hand_detection_min_crop_px: int = Field(default=64, ge=8, le=4096)
+    hand_detection_device: str = ""
    #: 开始/结束手术时调用录制流水线的最大尝试次数（含首次）。
    surgery_recording_max_attempts: int = Field(default=3, ge=1, le=20)
    #: 两次尝试之间的等待秒数。
@@ -138,13 +142,6 @@ class Settings(BaseSettings):
            return _default_consumable_classifier_weights()
        return str(value)

-    @field_validator("tear_action_weights", mode="before")
-    @classmethod
-    def tear_action_weights_default(cls, value: object) -> str:
-        if value is None or value == "":
-            return _default_tear_action_weights()
-        return str(value)
-
    model_config = SettingsConfigDict(
        env_file=".env",
        env_file_encoding="utf-8",