refactor: 统一耗材视觉算法并扩展语音确认至全量候选清单

- 以 ConsumableVisionAlgorithmService 替代 consumable_classifier 与 tear_action；可选手部检测权重，未配置时全帧分类；时间窗众数与 Excel 白名单配置。 - 语音待确认：ASR 先匹配 pending topk，再匹配本台 candidate_consumables；记账 item_id 与 vision 一致使用 name_to_code。 - 更新 config、Compose、.env.example、依赖（pandas/openpyxl）与测试。 Made-with: Cursor
2026-04-22 16:31:12 +08:00
parent 4c4550d58b
commit 132702aea9
18 changed files with 791 additions and 476 deletions
--- a/app/services/video/session_manager.py
+++ b/app/services/video/session_manager.py
@@ -13,14 +13,16 @@ from app.config import Settings
 from app.database import AsyncSessionLocal
 from app.repositories.surgery_results import SurgeryResultRepository
 from app.schemas import SurgeryConsumptionDetail
-from app.services.consumable_classifier import (
-    ConsumableClassifierService,
+from app.services.consumable_vision_algorithm import (
+    ClsTop3,
+    ConsumableVisionAlgorithmService,
    PredictionCandidate,
    PredictionResult,
+    _norm_product_name,
+    cls_top3_to_prediction_result,
+    window_bucket_to_best_snap,
 )
-from app.services.tear_action import TearActionService
 from app.services.video.backend_resolver import BackendResolver
-from app.services.video.frame_encode import frame_to_jpeg_bytes
 from app.services.video.hikvision_runtime import HikvisionInitRefCount, HikvisionRuntime
 from app.services.video.rtsp_capture import RtspCapture
 from app.services.video.types import VideoBackendKind
@@ -41,9 +43,21 @@ class PendingConsumableConfirmation:
    model_top1_confidence: float


+@dataclass
+class CameraStreamInferState:
+    """单路视频上的时间窗投票（与离线算法一致）。"""
+
+    votes: list[tuple[float, str, ClsTop3]] = field(default_factory=list)
+    stream_t0: float | None = None
+    next_bucket: int = 0
+
+
@dataclass
 class SurgerySessionState:
    candidate_consumables: list[str]
+    #: 分类类名（归一化） -> 业务物品 id（Excel 产品编码或名称）。
+    name_to_code: dict[str, str] = field(default_factory=dict)
+    camera_infer: dict[str, CameraStreamInferState] = field(default_factory=dict)
    details: list[SurgeryConsumptionDetail] = field(default_factory=list)
    lock: asyncio.Lock = field(default_factory=asyncio.Lock)
    ready: asyncio.Event = field(default_factory=asyncio.Event)
@@ -94,14 +108,12 @@ class CameraSessionManager:
        self,
        *,
        settings: Settings,
-        consumable_classifier: ConsumableClassifierService,
-        tear_action: TearActionService,
+        vision_algorithm: ConsumableVisionAlgorithmService,
        hikvision_runtime: HikvisionRuntime | None,
        result_repository: SurgeryResultRepository | None = None,
    ) -> None:
        self._s = settings
-        self._classifier = consumable_classifier
-        self._tear = tear_action
+        self._vision = vision_algorithm
        self._hik = hikvision_runtime
        self._repo = result_repository
        self._resolver = BackendResolver(settings, hikvision_runtime=hikvision_runtime)
@@ -221,8 +233,10 @@ class CameraSessionManager:
                        "该手术号存在尚未写入数据库的历史结果，请修复数据库或等待自动重试成功后再开始。",
                    )

+        name_to_code = self._vision.build_name_mapping(candidate_consumables)
        state = SurgerySessionState(
            candidate_consumables=list(candidate_consumables),
+            name_to_code=name_to_code,
        )
        stop_event = asyncio.Event()
        readies = [asyncio.Event() for _ in camera_ids]
@@ -388,6 +402,12 @@ class CameraSessionManager:
            return None
        return p

+    def get_surgery_candidate_consumables(self, surgery_id: str) -> list[str]:
+        """本台手术开始手术时传入的耗材候选清单（语音可任选其中一项，不限于模型 topk）。"""
+        if surgery_id not in self._active:
+            return []
+        return list(self._active[surgery_id].state.candidate_consumables)
+
    def next_pending_confirmation(
        self, surgery_id: str
    ) -> PendingConsumableConfirmation | None:
@@ -436,20 +456,23 @@ class CameraSessionManager:
                    "CONFIRMATION_INVALID",
                    "请提供 chosen_label 或设置 rejected=true。",
                )
-            allowed = {lbl.strip() for lbl, _ in pending.options if lbl.strip()}
+            allowed_pending = {lbl.strip() for lbl, _ in pending.options if lbl.strip()}
+            allowed_surgery = {c.strip() for c in st.candidate_consumables if c.strip()}
            if rejected:
                pending.status = "rejected"
            else:
                label = chosen_label.strip() if chosen_label else ""
-                if label not in allowed:
+                if label not in allowed_pending and label not in allowed_surgery:
                    raise SurgeryPipelineError(
                        "CONFIRMATION_INVALID",
-                        f"所选耗材不在候选列表中：{chosen_label!r}",
+                        f"所选耗材不在本台手术候选清单或本次追问选项中：{chosen_label!r}",
                    )
                pending.status = "confirmed"
+                norm = _norm_product_name(label)
+                item_id = st.name_to_code.get(norm, label)
                self._append_confirmed_detail_locked(
                    state=st,
-                    item_id=label,
+                    item_id=item_id,
                    item_name=label,
                    doctor_id=self._s.video_voice_confirm_doctor_id,
                    source="voice",
@@ -582,13 +605,11 @@ class CameraSessionManager:
                    continue
                last_infer = now
                try:
-                    jpeg = await asyncio.to_thread(
-                        frame_to_jpeg_bytes,
+                    snap = await asyncio.to_thread(
+                        self._vision.infer_frame_bgr,
                        frame,
-                        quality=self._s.video_jpeg_quality,
+                        state.name_to_code,
                    )
-                    cls_res = await self._classifier.predict_image_bytes(jpeg)
-                    tear_res = await self._tear.predict_image_bytes(jpeg)
                except Exception as exc:
                    logger.debug(
                        "Inference skip camera={} surgery={}: {}",
@@ -598,11 +619,45 @@ class CameraSessionManager:
                    )
                    continue

-                await self._handle_classification_result(
-                    state=state,
-                    cls_res=cls_res,
-                    tear_label=tear_res.label,
-                )
+                if snap is None:
+                    continue
+
+                wsec = self._s.consumable_vision_window_sec
+                pending_preds: list[PredictionResult] = []
+                async with state.lock:
+                    cis = state.camera_infer.setdefault(
+                        camera_id, CameraStreamInferState()
+                    )
+                    if cis.stream_t0 is None:
+                        cis.stream_t0 = time.monotonic()
+                    t_rel = time.monotonic() - cis.stream_t0
+                    cis.votes.append((t_rel, snap.t1_name, snap))
+                    current_b = int(t_rel // wsec)
+                    while cis.next_bucket < current_b:
+                        b = cis.next_bucket
+                        cis.next_bucket += 1
+                        lo, hi = b * wsec, (b + 1) * wsec
+                        bucket_pts = [
+                            (p, sn) for (t, p, sn) in cis.votes if lo <= t < hi
+                        ]
+                        cis.votes = [
+                            (t, p, sn)
+                            for (t, p, sn) in cis.votes
+                            if not (lo <= t < hi)
+                        ]
+                        if not bucket_pts:
+                            continue
+                        best = window_bucket_to_best_snap(bucket_pts)
+                        if best is not None:
+                            pending_preds.append(
+                                cls_top3_to_prediction_result(best)
+                            )
+
+                for cls_res in pending_preds:
+                    await self._handle_classification_result(
+                        state=state,
+                        cls_res=cls_res,
+                    )
        finally:
            if cap is not None:
                await asyncio.to_thread(cap.release)
@@ -616,11 +671,10 @@ class CameraSessionManager:
        *,
        state: SurgerySessionState,
        cls_res: PredictionResult,
-        tear_label: str,
    ) -> None:
-        _ = tear_label
        conf = cls_res.confidence
        label = (cls_res.label or "").strip()
+        item_id = state.name_to_code.get(label, label)
        voice_floor = self._s.video_voice_confirm_min_confidence
        if conf < voice_floor:
            return
@@ -639,7 +693,7 @@ class CameraSessionManager:
        if conf >= auto_th and in_allowed(label):
            await self._append_confirmed_detail(
                state=state,
-                item_id=label or "unknown",
+                item_id=item_id or label or "unknown",
                item_name=label or "unknown",
                doctor_id=self._s.video_result_doctor_id,
                source="vision",