From 975a2198a55fc51a324d1ded29cdeda1a8e4d88d Mon Sep 17 00:00:00 2001 From: hsz <2091085305@qq.com> Date: Fri, 5 Jun 2026 15:12:15 +0800 Subject: [PATCH] 6.5 --- README.md | 4 ++- scripts/visualize_pipeline.py | 63 +++++++++++++++++++++++------------ 2 files changed, 44 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index cd4cb59..e74928c 100644 --- a/README.md +++ b/README.md @@ -112,7 +112,7 @@ python scripts/visualize_pipeline.py \ | 叠加层 | 说明 | |--------|------| | 青色虚线框 | 篮子 ROI(需 `--basket-roi`,与 `--save-basket-roi` 配套) | -| 绿色框 | 段内手部检测(`hand_detect.pt`) | +| 绿色框 | 段内手部检测(与 `configs/default_config.yaml` 的 `hand.backend` 一致;默认 **MediaPipe** `weights/hand_landmarker.task`,标签为「手 mp」) | | 黄色粗框 | 双手 union ROI(与 Phase2 一致) | | 顶部信息条 | TSV 该段时间段的 rank、Top3 或失败原因 | | 片头 | 视频/TSV 路径 + 离线 `医生信息:` 汇总 | @@ -121,6 +121,8 @@ python scripts/visualize_pipeline.py \ **中文显示**:叠加文字使用 Pillow + 系统 CJK 字体(默认 `NotoSansCJK-Regular.ttc`)。若出现方框/乱码,请安装 `fonts-noto-cjk`,或通过 `--font /path/to/font.ttc` / 环境变量 `VIS_CJK_FONT` 指定字体。 +**手部后端**:与 `main_basket` 共用 `hand` 配置段。默认 `hand.backend: mediapipe` + `hand.mediapipe_task: weights/hand_landmarker.task`(每帧最多 2 只手)。对比 YOLO 旧行为:`--hand-backend yolo`。 + **篮筐附近手框与 ROI**:提供 `--basket-roi` 时,默认只绘制靠近篮子的手(篮子框外扩 20% 后 IoU > `contact_iou_on`),**黄色 ROI** 由其中与篮子 IoU 最高的两只手合并。背景手不再绘制。关闭过滤用 `--no-hand-basket-filter`;贴边漏检可试 `--basket-expand-frac 0.3` 或略降 `--hand-basket-min-iou 0.02`。 **本地 smoke**(无真实手术视频时): diff --git a/scripts/visualize_pipeline.py b/scripts/visualize_pipeline.py index b64bdcc..e3bb65f 100644 --- a/scripts/visualize_pipeline.py +++ b/scripts/visualize_pipeline.py @@ -15,7 +15,6 @@ from typing import Any import cv2 import numpy as np -from ultralytics import YOLO PACK_ROOT = Path(__file__).resolve().parent.parent _SCRIPTS = Path(__file__).resolve().parent @@ -28,11 +27,13 @@ ensure_code_on_path(PACK_ROOT) from basket_segmenter import load_basket_roi_json # noqa: E402 from config import load_run_config # noqa: E402 -from pipeline.hand_roi_merge import bbox_iou_xyxy, two_largest_hands, union_xyxy # noqa: E402 -from run_segments_consumable_vote import ( # noqa: E402 - collect_hand_boxes, - pad_box_bottom_only, +from hand_detector import ( # noqa: E402 + create_hand_detector, + detect_hands_xyxy, + validate_hand_assets, ) +from pipeline.hand_roi_merge import bbox_iou_xyxy, two_largest_hands, union_xyxy # noqa: E402 +from run_segments_consumable_vote import pad_box_bottom_only # noqa: E402 from vis_text import CjkTextRenderer # noqa: E402 from visualize_tsv import ( # noqa: E402 SegmentVis, @@ -232,7 +233,7 @@ def _scale_basket_xyxy( def detect_hands_and_union( - det_model: YOLO, + det: Any, frame: np.ndarray, *, det_conf: float, @@ -249,17 +250,16 @@ def detect_hands_and_union( 有篮子时默认:仅保留靠近篮子的手,黄 ROI 由其中 IoU 最高的两只合并。 """ h, w = frame.shape[:2] - r = det_model.predict( - frame, imgsz=imgsz_det, conf=det_conf, verbose=False, **predict_kw - )[0] - hand_confs: list[tuple[list[float], float]] = [] - if r.boxes is not None: - names = det_model.names - for box in r.boxes: - cid = int(box.cls[0]) - if names.get(cid, "") == "hand": - conf = float(box.conf[0]) if box.conf is not None else 0.0 - hand_confs.append((box.xyxy[0].tolist(), conf)) + hands = detect_hands_xyxy( + det, + frame, + det_conf=det_conf, + imgsz_det=imgsz_det, + predict_kw=predict_kw, + ) + hand_confs: list[tuple[list[float], float]] = [ + (xyxy, 1.0) for xyxy in hands + ] if ( basket_xyxy is not None @@ -346,9 +346,17 @@ def run_visualize(args: argparse.Namespace, cfg: Any) -> int: if not tsv_path.is_file(): print(f"[vis] TSV 不存在: {tsv_path}", file=sys.stderr) return 1 - if not Path(cfg.hand_model).is_file(): - print(f"[vis] 缺少手部权重: {cfg.hand_model}", file=sys.stderr) + + ok, hand_lab = validate_hand_assets(cfg) + if not ok: + backend = str(getattr(cfg, "hand_backend", "yolo")) + if backend == "mediapipe": + print(f"[vis] 缺少 MediaPipe 手部模型: {cfg.hand_mediapipe_task}", file=sys.stderr) + else: + print(f"[vis] 缺少手部权重: {cfg.hand_model}", file=sys.stderr) return 1 + hand_is_mediapipe = str(getattr(cfg, "hand_backend", "yolo")).lower() == "mediapipe" + print(f"[vis] 手部检测: {hand_lab}") segments, doctor_summary = parse_result_tsv(tsv_path) if not segments: @@ -394,7 +402,7 @@ def run_visualize(args: argparse.Namespace, cfg: Any) -> int: if cfg.half: predict_kw["half"] = True - det_model = YOLO(str(cfg.hand_model)) + det = create_hand_detector(cfg) cap = cv2.VideoCapture(str(video_path)) if not cap.isOpened(): print(f"[vis] 无法打开视频: {video_path}", file=sys.stderr) @@ -469,7 +477,7 @@ def run_visualize(args: argparse.Namespace, cfg: Any) -> int: if basket_roi is not None: basket_for_det = _scale_basket_xyxy(basket_roi, sx, sy) cached_union, cached_hand_confs = detect_hands_and_union( - det_model, + det, frame, det_conf=float(cfg.det_conf), imgsz_det=int(cfg.imgsz_det), @@ -485,8 +493,9 @@ def run_visualize(args: argparse.Namespace, cfg: Any) -> int: if in_segment: for hxyxy, conf in cached_hand_confs: x1, y1, x2, y2 = (int(round(v)) for v in hxyxy[:4]) + hand_lbl = "手 mp" if hand_is_mediapipe else f"手 {conf:.2f}" draw_labeled_box( - vis, x1, y1, x2, y2, (0, 220, 0), f"手 {conf:.2f}", + vis, x1, y1, x2, y2, (0, 220, 0), hand_lbl, thickness=lw, text=cjk, ) @@ -526,6 +535,8 @@ def run_visualize(args: argparse.Namespace, cfg: Any) -> int: print(f"[vis] 进度 {frame_idx}/{total_frames or '?'} 帧, 手检次数={det_calls}") cap.release() + if hasattr(det, "close"): + det.close() if proc.stdin: proc.stdin.close() rc = proc.wait() @@ -585,9 +596,17 @@ def main() -> int: default=0.2, help="判定靠近篮子时外扩 ROI 比例(默认 0.2)", ) + ap.add_argument( + "--hand-backend", + choices=("mediapipe", "yolo"), + default=None, + help="覆盖 yaml hand.backend(默认 mediapipe + hand_landmarker.task)", + ) args = ap.parse_args() cfg = load_run_config(PACK_ROOT, args.config.resolve()) + if args.hand_backend is not None: + cfg.hand_backend = args.hand_backend return run_visualize(args, cfg)