Update consumable pipeline, client API docs, and deployment config

- Refine effective candidate consumables and classifier labels - Adjust vision algorithm, TSV logging, and video session wiring - Refresh client surgery HTTP contract doc and staging/video docs - Update settings, docker-compose prod, tests, and uv.lock Made-with: Cursor
2026-04-24 11:05:17 +08:00
parent 3d7bd70355
commit 557fcee803
15 changed files with 529 additions and 636 deletions
--- a/app/services/consumable_vision_algorithm.py
+++ b/app/services/consumable_vision_algorithm.py
@@ -5,7 +5,7 @@

 from __future__ import annotations

-import math
+from typing import Any
 import os
 import sys
 from collections import Counter
@@ -14,8 +14,8 @@ from pathlib import Path
 from threading import Lock

 import numpy as np
+import yaml
 from loguru import logger
-from openpyxl import load_workbook
 from ultralytics import YOLO

 from app.config import Settings, settings
@@ -79,22 +79,6 @@ class ClsTop3:
    t3_pid: str


-def _find_col_idx(headers: list[object], want: str) -> int | None:
-    want = want.strip()
-    for i, h in enumerate(headers):
-        if str(h).strip() == want:
-            return i
-    return None
-
-
-def _cell_empty(value: object) -> bool:
-    if value is None:
-        return True
-    if isinstance(value, float) and math.isnan(value):
-        return True
-    return False
-
-
 def _norm_product_name(name: str) -> str:
    s = (name or "").strip()
    if s == "一次性医用垫单":
@@ -102,49 +86,73 @@ def _norm_product_name(name: str) -> str:
    return s


-def load_name_to_product_code(xlsx: Path) -> dict[str, str]:
-    """商品名称 -> 产品编码（白名单键为归一化后的名称）。"""
-    wb = load_workbook(filename=str(xlsx), read_only=True, data_only=True)
+def load_name_to_label_id_from_yaml(path: Path) -> dict[str, str]:
+    """从 ``consumable_classifier_labels.yaml`` 得到：归一化商品名 -> 业务 label_id（可与 ``names`` 下标一一对应；多规格为 ``a/b/...``）。"""
    try:
-        ws = wb.worksheets[0]
-        rows = ws.iter_rows(values_only=True)
-        header = next(rows, None)
-        if header is None:
-            raise ValueError("Excel 为空")
-        headers = list(header)
-        i_code = _find_col_idx(headers, "产品编码")
-        i_name = _find_col_idx(headers, "商品名称")
-        if i_code is None or i_name is None:
-            raise ValueError("Excel 缺少「产品编码」或「商品名称」列")
+        raw = path.read_text(encoding="utf-8")
+    except OSError as exc:
+        logger.warning("无法读取耗材 label YAML {}: {}", path, exc)
+        return {}
+    try:
+        data: Any
+        data = yaml.safe_load(raw)
+    except yaml.YAMLError as exc:
+        logger.warning("解析耗材 label YAML 失败 {}: {}", path, exc)
+        return {}
+    if not isinstance(data, dict):
+        return {}
+    names_raw = data.get("names")
+    label_raw = data.get("label_id")
+    if not isinstance(names_raw, dict) or not isinstance(label_raw, dict):
+        return {}
+    out: dict[str, str] = {}
+    for k, v in names_raw.items():
+        try:
+            i = int(k)
+        except (TypeError, ValueError):
+            continue
+        name = str(v).strip() if v is not None else ""
+        if not name:
+            continue
+        lid: Any = None
+        if i in label_raw:
+            lid = label_raw[i]
+        elif str(i) in label_raw:
+            lid = label_raw[str(i)]
+        if lid is None or (isinstance(lid, str) and not str(lid).strip()):
+            continue
+        id_str = str(lid).strip()
+        out[_norm_product_name(name)] = id_str
+    return out

-        m: dict[str, str] = {}
-        dups: set[str] = set()
-        for row in rows:
-            if not row:
-                continue
-            raw = row[i_name] if i_name < len(row) else None
-            if _cell_empty(raw):
-                continue
-            n = _norm_product_name(str(raw).strip())
-            if not n:
-                continue
-            code = row[i_code] if i_code < len(row) else None
-            if _cell_empty(code):
-                continue
-            sc = str(code).strip()
-            if n in m and m[n] != sc:
-                dups.add(n)
-                continue
-            if n not in m:
-                m[n] = sc
-    finally:
-        wb.close()
-    if dups:
-        logger.warning(
-            "Excel 中以下商品名称对应多组产品编码，已保留首次映射: {}",
-            "；".join(sorted(dups)[:12]) + (" …" if len(dups) > 12 else ""),
-        )
-    return m
+
+def list_sorted_class_names_from_yaml(path: Path) -> list[str]:
+    """自 ``names`` 段按类索引升序取类名字符串（与训练/权重一致）。"""
+    try:
+        raw = path.read_text(encoding="utf-8")
+    except OSError:
+        return []
+    try:
+        data: Any = yaml.safe_load(raw)
+    except yaml.YAMLError:
+        return []
+    if not isinstance(data, dict):
+        return []
+    names_raw = data.get("names")
+    if not isinstance(names_raw, dict):
+        return []
+    items: list[tuple[int, str]] = []
+    for k, v in names_raw.items():
+        try:
+            i = int(k)
+        except (TypeError, ValueError):
+            continue
+        s = str(v).strip() if v is not None else ""
+        if not s:
+            continue
+        items.append((i, _norm_product_name(s)))
+    items.sort(key=lambda t: t[0])
+    return [n for _, n in items]


 def collect_hand_boxes(model: YOLO, boxes) -> list[tuple[float, float, float, float]]:
@@ -317,7 +325,7 @@ class ConsumableVisionAlgorithmService:
        self._cls_lock = Lock()

    def effective_candidate_consumables(self, requested: list[str]) -> list[str]:
-        """请求体中的耗材子集；未提供（缺省或仅空白）时用目录 Excel 全部商品名，无目录则用分类模型全部类名。"""
+        """请求体中的耗材子集；未提供（缺省或仅空白）时先用 ``consumable_classifier_labels.yaml`` 的 ``names``，无有效 YAML 则分类模型类名。"""
        out: list[str] = []
        seen: set[str] = set()
        for c in requested:
@@ -329,23 +337,12 @@ class ConsumableVisionAlgorithmService:
        if out:
            return out

-        xlsx_raw = (self._s.consumable_catalog_xlsx_path or "").strip()
-        if xlsx_raw:
-            path = Path(xlsx_raw).expanduser()
-            if path.is_file():
-                try:
-                    full = load_name_to_product_code(path)
-                except Exception as exc:
-                    logger.warning("读取耗材目录 Excel 失败，回退到模型类名: {}", exc)
-                else:
-                    if full:
-                        return sorted(full.keys())
-                    logger.warning("耗材目录 Excel 无有效行，回退到模型类名")
-            else:
-                logger.warning(
-                    "耗材目录 Excel 路径已配置但文件不存在: {}，回退到模型类名",
-                    path,
-                )
+        yaml_path = Path(self._s.consumable_classifier_labels_yaml_path).expanduser()
+        if yaml_path.is_file():
+            ylist = list_sorted_class_names_from_yaml(yaml_path)
+            if ylist:
+                return ylist
+            logger.warning("耗材 label YAML 中无有效 names: {}", yaml_path)

        cls_model = self._get_cls()
        labels = sorted(
@@ -356,21 +353,26 @@ class ConsumableVisionAlgorithmService:
    def build_name_mapping(
        self, candidate_consumables: list[str]
    ) -> dict[str, str]:
-        """分类标签 -> 业务物品 id（Excel 产品编码；无表时用名称自身）。"""
+        """分类类名(归一化) -> 业务 id：仅 ``consumable_classifier_labels.yaml`` 的 ``label_id``；无映射时用语义类名作 id。"""
        stripped = [_norm_product_name(c.strip()) for c in candidate_consumables if c.strip()]
        candidates_norm = {n: n for n in stripped}
-        xlsx_raw = (self._s.consumable_catalog_xlsx_path or "").strip()
-        if xlsx_raw:
-            path = Path(xlsx_raw).expanduser()
-            if path.is_file():
-                full = load_name_to_product_code(path)
-                out: dict[str, str] = {}
-                for norm in candidates_norm:
-                    if norm in full:
-                        out[norm] = full[norm]
-                return out
-            logger.warning("耗材目录 Excel 路径已配置但文件不存在: {}", path)
-        return {n: n for n in candidates_norm}
+        if not candidates_norm:
+            return {}
+
+        yaml_path = Path(self._s.consumable_classifier_labels_yaml_path).expanduser()
+        yaml_map: dict[str, str] = {}
+        if yaml_path.is_file():
+            try:
+                yaml_map = load_name_to_label_id_from_yaml(yaml_path)
+            except Exception as exc:  # noqa: BLE001
+                logger.warning("加载耗材 label YAML 失败 {}: {}", yaml_path, exc)
+        else:
+            logger.debug("耗材 label YAML 不存在: {}", yaml_path)
+
+        out: dict[str, str] = {}
+        for norm in candidates_norm:
+            out[norm] = yaml_map.get(norm) or norm
+        return out

    def _det_weights(self) -> Path | None:
        raw = (self._s.hand_detection_weights or "").strip()
--- a/app/services/consumption_tsv_log.py
+++ b/app/services/consumption_tsv_log.py
@@ -20,7 +20,7 @@ from app.services.consumable_vision_algorithm import ClsTop3, _norm_product_name
 from app.terminal_markdown import print_markdown_stderr

 # 制表符分隔；时间范围用 U+2013 连接；本窗消耗数量恒为 1。
-# top2/top3 为模型原始排序（未按手术候选重排）；item_id 仅写产品编码，无编码时留空。
+# top2/top3 为模型原始排序（未按手术候选重排）；item_id 只写与展示名不同的业务 id（label_id），与名称相同时留空。
 HEADER = (
    "item_id\titem_name\tqty\tdoctor_id\ttimestamp\t"
    "top2_name\ttop2_conf\ttop3_name\ttop3_conf\n"
@@ -97,7 +97,7 @@ def resolve_consumption_ids(
 ) -> tuple[str, str]:
    """TSV 第一列 item_id 与内存汇总键。

-    - ``tsv_item_id``：仅产品编码（或模型侧 t1_pid）；与展示名相同则视为无独立编码，留空。
+    - ``tsv_item_id``：业务 id（或模型侧 t1_pid）；与展示名相同则视为无独立 id，留空。
    - ``totals_key``：汇总用稳定键；无编码时用归一化名称，避免多行空 id 碰撞。
    """
    n = (t1_name or "").strip()
--- a/app/services/video/session_manager.py
+++ b/app/services/video/session_manager.py
@@ -140,7 +140,7 @@ class CameraSessionManager:
        if not resolved:
            raise SurgeryPipelineError(
                "RECORDING_CANNOT_START",
-                "耗材候选为空：请在请求中传入 candidate_consumables，或配置耗材目录 Excel / 分类模型。",
+                "耗材候选为空：请在请求中传入 candidate_consumables，或提供有效的 consumable_classifier_labels.yaml / 分类模型。",
            )
        if not any(str(x).strip() for x in candidate_consumables):
            logger.info(
--- a/app/services/video/session_registry.py
+++ b/app/services/video/session_registry.py
@@ -59,7 +59,7 @@ class CameraStreamInferState:
@dataclass
 class SurgerySessionState:
    candidate_consumables: list[str]
-    #: 分类类名（归一化） -> 业务物品 id（Excel 产品编码或名称）。
+    #: 分类类名（归一化） -> 业务物品 id（YAML label_id 或类名）
    name_to_code: dict[str, str] = field(default_factory=dict)
    camera_infer: dict[str, CameraStreamInferState] = field(default_factory=dict)
    details: list[SurgeryConsumptionStored] = field(default_factory=list)