From 975a2198a55fc51a324d1ded29cdeda1a8e4d88d Mon Sep 17 00:00:00 2001
From: hsz <2091085305@qq.com>
Date: Fri, 5 Jun 2026 15:12:15 +0800
Subject: [PATCH] 6.5

---
 README.md                     |  4 ++-
 scripts/visualize_pipeline.py | 63 +++++++++++++++++++++++------------
 2 files changed, 44 insertions(+), 23 deletions(-)

diff --git a/README.md b/README.md
index cd4cb59..e74928c 100644
--- a/README.md
+++ b/README.md
@@ -112,7 +112,7 @@ python scripts/visualize_pipeline.py \
 | 叠加层 | 说明 |
 |--------|------|
 | 青色虚线框 | 篮子 ROI（需 `--basket-roi`，与 `--save-basket-roi` 配套） |
-| 绿色框 | 段内手部检测（`hand_detect.pt`） |
+| 绿色框 | 段内手部检测（与 `configs/default_config.yaml` 的 `hand.backend` 一致；默认 **MediaPipe** `weights/hand_landmarker.task`，标签为「手 mp」） |
 | 黄色粗框 | 双手 union ROI（与 Phase2 一致） |
 | 顶部信息条 | TSV 该段时间段的 rank、Top3 或失败原因 |
 | 片头 | 视频/TSV 路径 + 离线 `医生信息：` 汇总 |
@@ -121,6 +121,8 @@ python scripts/visualize_pipeline.py \
 
 **中文显示**：叠加文字使用 Pillow + 系统 CJK 字体（默认 `NotoSansCJK-Regular.ttc`）。若出现方框/乱码，请安装 `fonts-noto-cjk`，或通过 `--font /path/to/font.ttc` / 环境变量 `VIS_CJK_FONT` 指定字体。
 
+**手部后端**：与 `main_basket` 共用 `hand` 配置段。默认 `hand.backend: mediapipe` + `hand.mediapipe_task: weights/hand_landmarker.task`（每帧最多 2 只手）。对比 YOLO 旧行为：`--hand-backend yolo`。
+
 **篮筐附近手框与 ROI**：提供 `--basket-roi` 时，默认只绘制靠近篮子的手（篮子框外扩 20% 后 IoU > `contact_iou_on`），**黄色 ROI** 由其中与篮子 IoU 最高的两只手合并。背景手不再绘制。关闭过滤用 `--no-hand-basket-filter`；贴边漏检可试 `--basket-expand-frac 0.3` 或略降 `--hand-basket-min-iou 0.02`。
 
 **本地 smoke**（无真实手术视频时）：
diff --git a/scripts/visualize_pipeline.py b/scripts/visualize_pipeline.py
index b64bdcc..e3bb65f 100644
--- a/scripts/visualize_pipeline.py
+++ b/scripts/visualize_pipeline.py
@@ -15,7 +15,6 @@ from typing import Any
 
 import cv2
 import numpy as np
-from ultralytics import YOLO
 
 PACK_ROOT = Path(__file__).resolve().parent.parent
 _SCRIPTS = Path(__file__).resolve().parent
@@ -28,11 +27,13 @@ ensure_code_on_path(PACK_ROOT)
 
 from basket_segmenter import load_basket_roi_json  # noqa: E402
 from config import load_run_config  # noqa: E402
-from pipeline.hand_roi_merge import bbox_iou_xyxy, two_largest_hands, union_xyxy  # noqa: E402
-from run_segments_consumable_vote import (  # noqa: E402
-    collect_hand_boxes,
-    pad_box_bottom_only,
+from hand_detector import (  # noqa: E402
+    create_hand_detector,
+    detect_hands_xyxy,
+    validate_hand_assets,
 )
+from pipeline.hand_roi_merge import bbox_iou_xyxy, two_largest_hands, union_xyxy  # noqa: E402
+from run_segments_consumable_vote import pad_box_bottom_only  # noqa: E402
 from vis_text import CjkTextRenderer  # noqa: E402
 from visualize_tsv import (  # noqa: E402
     SegmentVis,
@@ -232,7 +233,7 @@ def _scale_basket_xyxy(
 
 
 def detect_hands_and_union(
-    det_model: YOLO,
+    det: Any,
     frame: np.ndarray,
     *,
     det_conf: float,
@@ -249,17 +250,16 @@ def detect_hands_and_union(
     有篮子时默认：仅保留靠近篮子的手，黄 ROI 由其中 IoU 最高的两只合并。
     """
     h, w = frame.shape[:2]
-    r = det_model.predict(
-        frame, imgsz=imgsz_det, conf=det_conf, verbose=False, **predict_kw
-    )[0]
-    hand_confs: list[tuple[list[float], float]] = []
-    if r.boxes is not None:
-        names = det_model.names
-        for box in r.boxes:
-            cid = int(box.cls[0])
-            if names.get(cid, "") == "hand":
-                conf = float(box.conf[0]) if box.conf is not None else 0.0
-                hand_confs.append((box.xyxy[0].tolist(), conf))
+    hands = detect_hands_xyxy(
+        det,
+        frame,
+        det_conf=det_conf,
+        imgsz_det=imgsz_det,
+        predict_kw=predict_kw,
+    )
+    hand_confs: list[tuple[list[float], float]] = [
+        (xyxy, 1.0) for xyxy in hands
+    ]
 
     if (
         basket_xyxy is not None
@@ -346,9 +346,17 @@ def run_visualize(args: argparse.Namespace, cfg: Any) -> int:
     if not tsv_path.is_file():
         print(f"[vis] TSV 不存在: {tsv_path}", file=sys.stderr)
         return 1
-    if not Path(cfg.hand_model).is_file():
-        print(f"[vis] 缺少手部权重: {cfg.hand_model}", file=sys.stderr)
+
+    ok, hand_lab = validate_hand_assets(cfg)
+    if not ok:
+        backend = str(getattr(cfg, "hand_backend", "yolo"))
+        if backend == "mediapipe":
+            print(f"[vis] 缺少 MediaPipe 手部模型: {cfg.hand_mediapipe_task}", file=sys.stderr)
+        else:
+            print(f"[vis] 缺少手部权重: {cfg.hand_model}", file=sys.stderr)
         return 1
+    hand_is_mediapipe = str(getattr(cfg, "hand_backend", "yolo")).lower() == "mediapipe"
+    print(f"[vis] 手部检测: {hand_lab}")
 
     segments, doctor_summary = parse_result_tsv(tsv_path)
     if not segments:
@@ -394,7 +402,7 @@ def run_visualize(args: argparse.Namespace, cfg: Any) -> int:
     if cfg.half:
         predict_kw["half"] = True
 
-    det_model = YOLO(str(cfg.hand_model))
+    det = create_hand_detector(cfg)
     cap = cv2.VideoCapture(str(video_path))
     if not cap.isOpened():
         print(f"[vis] 无法打开视频: {video_path}", file=sys.stderr)
@@ -469,7 +477,7 @@ def run_visualize(args: argparse.Namespace, cfg: Any) -> int:
             if basket_roi is not None:
                 basket_for_det = _scale_basket_xyxy(basket_roi, sx, sy)
             cached_union, cached_hand_confs = detect_hands_and_union(
-                det_model,
+                det,
                 frame,
                 det_conf=float(cfg.det_conf),
                 imgsz_det=int(cfg.imgsz_det),
@@ -485,8 +493,9 @@ def run_visualize(args: argparse.Namespace, cfg: Any) -> int:
         if in_segment:
             for hxyxy, conf in cached_hand_confs:
                 x1, y1, x2, y2 = (int(round(v)) for v in hxyxy[:4])
+                hand_lbl = "手 mp" if hand_is_mediapipe else f"手 {conf:.2f}"
                 draw_labeled_box(
-                    vis, x1, y1, x2, y2, (0, 220, 0), f"手 {conf:.2f}",
+                    vis, x1, y1, x2, y2, (0, 220, 0), hand_lbl,
                     thickness=lw,
                     text=cjk,
                 )
@@ -526,6 +535,8 @@ def run_visualize(args: argparse.Namespace, cfg: Any) -> int:
             print(f"[vis] 进度 {frame_idx}/{total_frames or '?'} 帧, 手检次数={det_calls}")
 
     cap.release()
+    if hasattr(det, "close"):
+        det.close()
     if proc.stdin:
         proc.stdin.close()
     rc = proc.wait()
@@ -585,9 +596,17 @@ def main() -> int:
         default=0.2,
         help="判定靠近篮子时外扩 ROI 比例（默认 0.2）",
     )
+    ap.add_argument(
+        "--hand-backend",
+        choices=("mediapipe", "yolo"),
+        default=None,
+        help="覆盖 yaml hand.backend（默认 mediapipe + hand_landmarker.task）",
+    )
     args = ap.parse_args()
 
     cfg = load_run_config(PACK_ROOT, args.config.resolve())
+    if args.hand_backend is not None:
+        cfg.hand_backend = args.hand_backend
     return run_visualize(args, cfg)