commit 07816bd18a67b19f4c05b3838e2ef77c7883f640 Author: hsz <2091085305@qq.com> Date: Tue Jun 2 16:59:42 2026 +0800 6.2 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..62502ef --- /dev/null +++ b/.gitignore @@ -0,0 +1,38 @@ +# 推理结果(保留空目录占位) +/output/* +!/output/.gitkeep + +# 用户放入的测试视频 / 转码产物(保留商品表 Excel) +/input/*.mp4 +/input/*.avi +/input/*.mkv +/input/*.mov +/input/remuxed/ + +# 运行期 ROI / 日志 +/output/*.json +/output/*.txt +/output/*.log + +# Python 环境与缓存 +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +.venv/ +venv/ +.env +*.egg-info/ +dist/ +build/ + +# Jupyter / 临时文件 +.ipynb_checkpoints/ +*.swp +*~ + +# IDE / 系统 +.DS_Store +.idea/ +.vscode/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..ce8510e --- /dev/null +++ b/README.md @@ -0,0 +1,113 @@ +# 手术室耗材篮子识别包(离线 + 推流) + +段内流程:**手检(≥2 手 union)→ 好坏帧门控 → 耗材分类**;离线另含**医生识别**。 + +与 `configs/default_config.yaml` 当前参数一致(`imgsz_det: 1920`、`contact+1~+6` 等)。 + +## 环境要求 + +- Python 3.10+(推荐 conda 环境 `yolo`) +- NVIDIA GPU + CUDA +- `python3-tk`(每次运行首帧弹窗框选篮子 ROI) +- `ffmpeg` / `ffprobe`(HEVC 视频建议先转 H.264,见下文) + +## 快速安装 + +```bash +cd /path/to/6.1 +bash setup.sh + +# 若用 conda(推荐) +conda activate yolo +pip install -r requirements.txt +``` + +## 三个入口 + +| 脚本 | 用途 | +|------|------| +| `main_basket.py` | **离线**:全片篮子接触分段 → Phase2 → gap 合并 → 医生识别 | +| `main_basket_stream.py` | **推流/本地 MP4 模拟推流**:逐帧触发 → 段内识别 → 实时写 TSV | +| `main_segments_offline.py` | 按 TSV 时间段对离线 MP4 重跑段内识别(校验用) | + +## 1. 离线跑视频 + +```bash +conda activate yolo +cd /path/to/6.1 + +python main_basket.py \ + --video /path/to/your.mp4 \ + --excel input/视频中的商品信息表.xlsx \ + --out output/result_offline.txt \ + --save-basket-roi output/basket_roi.json \ + --config configs/default_config.yaml +``` + +运行后**弹窗框选篮子 ROI**,然后自动全片扫描 + 段内识别。 + +## 2. 推流(或本地 MP4 测试) + +```bash +python main_basket_stream.py \ + --rtsp /path/to/your.mp4 \ + --excel input/视频中的商品信息表.xlsx \ + --out output/result_stream.txt \ + --save-basket-roi output/basket_roi_stream.json \ + --config configs/default_config.yaml +``` + +- 本地 MP4:`stream.infer_source: file` → 段内**回源 4K**(与离线一致) +- 真 RTSP:无法 seek 时回退 JPEG 缓存(`cache_max_width: 1920`) + +## 3. HEVC 视频 + +4K HEVC 可能导致 OpenCV 解码不稳定,建议先转码: + +```bash +bash scripts/remux_hevc.sh /path/to/source.mp4 +# 输出: input/remuxed/_h264.mp4 +``` + +## 配置说明(`configs/default_config.yaml`) + +| 段 | 关键参数 | +|----|----------| +| `phase2` | `imgsz_det: 1920`,`pad_bottom_ratio: 0.5`,`det_conf: 0.6` | +| `classification` | 好帧 0.8,耗材 0.8,重试 0.6 / 0.5 | +| `basket` | `iou_on: 0.03`,`confirm: 0.1`,`cooldown: 3`,窗口 contact+1~+6 | +| `stream` | 段窗口与 basket 一致;`infer_source: file` | +| `io` | `use_whitelist: false`(全 41 类) | + +## 模型文件(`weights/`) + +- `hand_detect.pt` — 手部检测 +- `goodbad_frame.pt` — 好坏帧门控 +- `haocai_classify.pt` — 耗材分类 + +## 输出格式 + +12 列 TSV + 离线末尾一行 `医生信息:...`(推流无医生行)。 + +## 目录结构 + +``` +6.1/ +├── main_basket.py # 离线入口 +├── main_basket_stream.py # 推流入口 +├── main_segments_offline.py # TSV 段内重跑 +├── configs/default_config.yaml +├── weights/ # 3 个 YOLO 权重 +├── input/视频中的商品信息表.xlsx +├── doctor_identity_package/ # 医生识别(仅离线) +├── src/ code/ # 编排与算法 +├── output/ # 结果输出目录 +├── setup.sh requirements.txt +└── README.md +``` + +## 常见问题 + +1. **CUDA OOM**:勿将 `imgsz_det` 设为 3840;当前 1920 在 8GB 显卡可用。 +2. **无 GUI**:无法弹窗框选 ROI,需另备 ROI JSON(当前入口每次仍弹窗)。 +3. **分段过多**:可调大 `basket.confirm_seconds` 或 `cooldown_seconds`。 diff --git a/code/dataset.py b/code/dataset.py new file mode 100644 index 0000000..046f843 --- /dev/null +++ b/code/dataset.py @@ -0,0 +1,1421 @@ +#!/usr/bin/env python3 +""" +从 ~/data/haocai/ 递归扫描「叶子会话目录」(含 mp4 + xlsx,且子目录中不再含 mp4), +按 Excel 中的时间段从对应视频抽帧,输出到「输出根/images/<商品名称>/<规格>/」并生成 JSON 元数据。 +输出分辨率默认与源视频帧一致;可用 --max-width / --max-height 限制最大尺寸(仅缩小、不放大)。 +可选 --sample-every N:按全局成功保存顺序,每第 N 张在 JSON 中标记 sample=true(便于抽检)。 +可选 --limit N:最多生成 N 条(图片或片段),用于快速检查 JSON 格式;0 表示不限制。 +可选 --extract-backend:抽帧方式。默认 auto(有 ffmpeg 则用 ffmpeg)。默认精确 seek(-ss 在 -i 之后); + 可加 --ffmpeg-fast-seek 换快 seek(部分 HEVC/H.265 文件会得到全灰无效帧,脚本会自动改回精确 seek 重试)。 + 建议安装 ffprobe 与 ffmpeg,时长/帧率以 ffprobe 为准。 +可选 --detect-bbox:用 Grounding DINO(transformers + torch)检测人体并输出 bbox 到 JSON。 +可选 --save-vis:在输出根下单独目录(默认 vis/)生成与 images 同结构的 *_vis.jpg,框与英文类别叠加在图上。 + +列约定(与样本数据一致): +- 单个 xlsx、两个视频:约 A–J,表头含「视频1」「视频2」时间段列(常见为第 9、10 列)。 +- 单个 xlsx、一个视频:约 A–I,最后一列为「视频内时间段」。 +- 两个 xlsx、两个视频:每个文件 A–I,最后一列为该视频「视频内时间段」;按文件名中的 01/02 与视频配对。 +""" + +from __future__ import annotations + +import argparse +import hashlib +import json +import re +import shutil +import subprocess +import sys +import time +from dataclasses import dataclass, asdict +from pathlib import Path +from typing import Any, Callable, Iterator, Optional + +import cv2 +import numpy as np +import pandas as pd + +# 临时 / 锁文件 +_IGNORE_XLSX = re.compile(r"^~\$|^\._|^\.\~", re.I) + + +def _log(msg: str) -> None: + """运行日志(stderr,立即刷新)。""" + ts = time.strftime("%H:%M:%S") + print(f"[{ts}] {msg}", file=sys.stderr, flush=True) + + +@dataclass +class ImageRecord: + name: str + path: str + label_category: str # 商品名称 + size: str # 规格 + sample: bool = False # 每第 N 张(见 --sample-every)为 True + # YOLO 格式 [x_center, y_center, w, h] 归一化 0–1;未启用检测或未检出时为 None + bbox_xywhn: Optional[list[float]] = None + detection_score: Optional[float] = None + + +@dataclass +class VideoMeta: + """视频流元数据;优先来自 ffprobe(比 OpenCV 对 HEVC/VFR 更可靠)。""" + + width: int + height: int + fps: float + duration_sec: float + frame_count: int = 0 + + +def _parse_fraction(s: str) -> float: + s = (s or "").strip() + if not s or s == "0/0": + return 0.0 + if "/" in s: + a, b = s.split("/", 1) + try: + den = float(b) + return float(a) / den if den else 0.0 + except ValueError: + return 0.0 + try: + return float(s) + except ValueError: + return 0.0 + + +def _ffprobe_video_meta(path: Path, ffprobe_bin: str) -> Optional[VideoMeta]: + if not shutil.which(ffprobe_bin): + return None + cmd = [ + ffprobe_bin, + "-v", + "error", + "-select_streams", + "v:0", + "-show_entries", + "stream=width,height,avg_frame_rate,r_frame_rate,nb_frames,duration", + "-show_entries", + "format=duration", + "-of", + "json", + str(path), + ] + try: + p = subprocess.run( + cmd, capture_output=True, text=True, timeout=60, check=False + ) + except (subprocess.TimeoutExpired, OSError): + return None + if p.returncode != 0 or not p.stdout: + return None + try: + data = json.loads(p.stdout) + except json.JSONDecodeError: + return None + streams = data.get("streams") or [] + if not streams: + return None + st = streams[0] + w = int(st.get("width") or 0) + h = int(st.get("height") or 0) + if w < 2 or h < 2: + return None + fps = _parse_fraction(str(st.get("avg_frame_rate") or "")) + if fps <= 0: + fps = _parse_fraction(str(st.get("r_frame_rate") or "")) + dur_s = float(st.get("duration") or 0.0) + fmt = data.get("format") or {} + if dur_s <= 0: + dur_s = float(fmt.get("duration") or 0.0) + nbf = st.get("nb_frames") + frame_count = 0 + if nbf is not None and str(nbf).strip() and str(nbf).upper() != "N/A": + try: + frame_count = int(nbf) + except (TypeError, ValueError): + frame_count = 0 + if frame_count <= 0 and dur_s > 0 and fps > 0: + frame_count = int(round(dur_s * fps)) + if fps <= 0 and dur_s > 0 and frame_count > 0: + fps = frame_count / dur_s + if fps <= 0: + fps = 25.0 + return VideoMeta( + width=w, + height=h, + fps=float(fps), + duration_sec=float(dur_s), + frame_count=frame_count, + ) + + +def _opencv_video_meta(path: Path) -> VideoMeta: + cap = cv2.VideoCapture(str(path), cv2.CAP_FFMPEG) + if not cap.isOpened(): + return VideoMeta(0, 0, 25.0, 0.0, 0) + try: + w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) or 0) + h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) or 0) + fps = float(cap.get(cv2.CAP_PROP_FPS) or 25.0) + frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0) + duration = ( + (frame_count / fps) if fps > 0 and frame_count > 0 else 0.0 + ) + return VideoMeta( + width=w, height=h, fps=fps, duration_sec=duration, frame_count=frame_count + ) + finally: + cap.release() + + +# 同一视频在一张表里会抽多次帧;缓存 ffprobe 结果,避免每个时间点都跑一遍 ffprobe。 +_VIDEO_META_CACHE: dict[tuple[str, str], VideoMeta] = {} + + +def get_video_meta(path: Path, ffprobe_bin: str = "ffprobe") -> VideoMeta: + key = (str(Path(path).resolve()), ffprobe_bin) + if key in _VIDEO_META_CACHE: + return _VIDEO_META_CACHE[key] + m = _ffprobe_video_meta(path, ffprobe_bin) + if m is not None: + _VIDEO_META_CACHE[key] = m + return m + m = _opencv_video_meta(path) + _VIDEO_META_CACHE[key] = m + return m + + +def _clamp_time_sec(t_sec: float, meta: VideoMeta) -> float: + if meta.duration_sec > 0: + margin = 1.0 / max(meta.fps, 1.0) + return float( + min(max(0.0, t_sec), max(0.0, meta.duration_sec - margin)) + ) + return max(0.0, t_sec) + + +def _time_to_frame_index(t_sec: float, meta: VideoMeta) -> int: + fps = meta.fps if meta.fps > 0 else 25.0 + t = _clamp_time_sec(t_sec, meta) + idx = int(round(t * fps)) + if meta.frame_count > 0: + idx = min(idx, meta.frame_count - 1) + return max(0, idx) + + +def _expand_root(p: str | Path) -> Path: + return Path(p).expanduser().resolve() + + +def _is_real_xlsx(path: Path) -> bool: + if path.suffix.lower() not in (".xlsx", ".xls"): + return False + name = path.name + if name.startswith("~$") or name.startswith(".~"): + return False + if _IGNORE_XLSX.search(name): + return False + return True + + +def _is_real_mp4(path: Path) -> bool: + if path.suffix.lower() != ".mp4": + return False + if ".crdownload" in path.name.lower(): + return False + return True + + +def _dir_has_mp4_recursive(d: Path) -> bool: + if not d.is_dir(): + return False + try: + for p in d.rglob("*.mp4"): + if _is_real_mp4(p): + return True + except OSError: + pass + return False + + +def iter_leaf_session_dirs(root: Path) -> Iterator[Path]: + """叶子目录:直接包含至少一个有效 mp4 与 xlsx,且其子目录内不再出现 mp4。""" + import os + + root = root.resolve() + if not root.is_dir(): + return + + for dirpath, dirnames, filenames in os.walk(root, topdown=True): + p = Path(dirpath) + mp4s = [p / f for f in filenames if _is_real_mp4(p / f)] + xlsxs = [p / f for f in filenames if _is_real_xlsx(p / f)] + if not mp4s or not xlsxs: + continue + sub_has_mp4 = False + for sub in dirnames: + if _dir_has_mp4_recursive(p / sub): + sub_has_mp4 = True + break + if sub_has_mp4: + continue + yield p + + +def _video_sort_key(path: Path) -> tuple: + stem = path.stem + m = re.search(r"(\d+)", stem) + n = int(m.group(1)) if m else 10**9 + return (n, stem.lower()) + + +def list_videos(session_dir: Path) -> list[Path]: + vids = [p for p in session_dir.iterdir() if p.is_file() and _is_real_mp4(p)] + return sorted(vids, key=_video_sort_key) + + +def list_excels(session_dir: Path) -> list[Path]: + xs = [p for p in session_dir.iterdir() if p.is_file() and _is_real_xlsx(p)] + return sorted(xs, key=lambda p: p.name.lower()) + + +def _excel_pair_key(path: Path) -> tuple: + m = re.search(r"(\d+)", path.stem) + n = int(m.group(1)) if m else 10**9 + return (n, path.name.lower()) + + +def _normalize_header(s: Any) -> str: + if s is None or (isinstance(s, float) and pd.isna(s)): + return "" + return str(s).strip() + + +def _find_col(df: pd.DataFrame, *candidates: str) -> str | None: + cols = [str(c).strip() for c in df.columns] + for want in candidates: + for c in df.columns: + h = _normalize_header(c) + if h == want or want in h: + return c + return None + + +def normalize_haocai_class_name(name: str) -> str: + """ + 与 build_haocai_dataset_hand_crops.row_product 保持一致的类名归一。 + Excel 与训练类名在个别耗材上同物异名,此处合并为同一条目。 + """ + s = (name or "").strip() + if s == "一次性使用灭菌棉签": + return "一次性医用灭菌棉签" + if s in ( + "一次性使用手术衣", + "一次性使用手术单(一次性医用垫单)", + "一次性医用垫单", + ): + return "一次性使用手术单" + return s + + +def parse_time_range(text: Any) -> tuple[float, float] | None: + """ + 支持: + - 1.23-2.23 → 1 分 23 秒 到 2 分 23 秒 + - 0.05-0.11 → 0 分 5 秒 到 0 分 11 秒(点后为两位秒) + - 00:10-00:16 / 00:10-00:16 → mm:ss + """ + if text is None or (isinstance(text, float) and pd.isna(text)): + return None + s = str(text).strip() + if not s or s.lower() == "nan": + return None + + # 全角冒号 + s = s.replace(":", ":") + + # mm:ss - mm:ss + m = re.match( + r"^\s*(\d{1,2}):(\d{2})\s*[-–—~~]\s*(\d{1,2}):(\d{2})\s*$", + s, + ) + if m: + h1, m1, h2, m2 = m.groups() + a = int(h1) * 60 + int(m1) + b = int(h2) * 60 + int(m2) + return (float(min(a, b)), float(max(a, b))) + + # M.SS - M.SS(分.秒,秒为 1~2 位时按两位秒理解) + m = re.match( + r"^\s*(\d+)\s*\.\s*(\d{1,2})\s*[-–—~~]\s*(\d+)\s*\.\s*(\d{1,2})\s*$", + s, + ) + if m: + mm1, ss1, mm2, ss2 = m.groups() + ss1 = ss1.zfill(2)[:2] + ss2 = ss2.zfill(2)[:2] + a = int(mm1) * 60 + int(ss1) + b = int(mm2) * 60 + int(ss2) + return (float(min(a, b)), float(max(a, b))) + + return None + + +def _midpoint_seconds(start: float, end: float) -> float: + return max(0.0, (start + end) / 2.0) + + +def _sample_time_in_tear_segment( + start: float, + end: float, + *, + mode: str = "tear_first_half", +) -> float: + """ + 在 Excel 标注的「撕」时间段 [start, end] 内选取抽帧时刻。 + + - tear_first_half(默认):落在区间**前半段**,取该半段内 3/4 分位 + t = start + 0.375 * (end - start),与「后半段 3/4」对称。 + - tear_second_half:整段的后 3/4 分位 t = start + 0.75 * (end - start)。 + - midpoint:取 (start+end)/2。 + """ + if end <= start: + return max(0.0, start) + span = end - start + if mode == "midpoint": + return _midpoint_seconds(start, end) + if mode == "tear_second_half": + return max(0.0, start + 0.75 * span) + # tear_first_half + return max(0.0, start + 0.375 * span) + + +def resize_frame_to_max( + frame: Any, + max_width: int, + max_height: int, +) -> Any: + """ + 将帧限制在 max_width×max_height 以内,保持宽高比。 + max_width / max_height 为 0 表示该方向不限制;二者均为 0 则返回原帧(原始分辨率)。 + 仅缩小不放大。 + """ + if frame is None: + return None + if max_width <= 0 and max_height <= 0: + return frame + h, w = frame.shape[:2] + scales: list[float] = [] + if max_width > 0: + scales.append(max_width / w) + if max_height > 0: + scales.append(max_height / h) + if not scales: + return frame + scale = min(scales) + scale = min(scale, 1.0) + if scale >= 1.0: + return frame + nw = max(1, int(round(w * scale))) + nh = max(1, int(round(h * scale))) + return cv2.resize(frame, (nw, nh), interpolation=cv2.INTER_AREA) + + +def save_frame_jpeg( + frame: Any, + out_path: Path, + jpeg_quality: int = 85, + max_width: int = 0, + max_height: int = 0, +) -> tuple[bool, Optional[np.ndarray]]: + """按 max_width/max_height 可选缩小后以 JPEG 写出;返回 (是否成功, 与磁盘一致的 BGR 图)。""" + img = resize_frame_to_max(frame, max_width, max_height) + if img is None: + return False, None + params = [int(cv2.IMWRITE_JPEG_QUALITY), jpeg_quality] + ok = bool(cv2.imwrite(str(out_path), img, params)) + return ok, img if ok else None + + +def save_bbox_vis_jpeg( + img_bgr: np.ndarray, + out_path: Path, + bbox_xywhn: Optional[list[float]], + detection_score: Optional[float], + jpeg_quality: int = 85, +) -> bool: + """在副本上画框后保存为 JPEG。bbox_xywhn 为 YOLO 格式归一化 [cx, cy, w, h]。""" + vis = img_bgr.copy() + h, w = vis.shape[:2] + if bbox_xywhn and len(bbox_xywhn) == 4: + cx, cy, bw, bh = bbox_xywhn + x1 = int(round((cx - bw / 2) * w)) + y1 = int(round((cy - bh / 2) * h)) + x2 = int(round((cx + bw / 2) * w)) + y2 = int(round((cy + bh / 2) * h)) + x1 = max(0, min(x1, w - 1)) + x2 = max(0, min(x2, w - 1)) + y1 = max(0, min(y1, h - 1)) + y2 = max(0, min(y2, h - 1)) + cv2.rectangle(vis, (x1, y1), (x2, y2), (0, 220, 0), max(1, min(w, h) // 400)) + cap = f"{detection_score:.2f}" if detection_score is not None else "det" + (tw, th), _ = cv2.getTextSize(cap, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) + ty = max(y1 - 4, th + 4) + cv2.rectangle(vis, (x1, ty - th - 4), (x1 + tw + 4, ty + 2), (0, 220, 0), -1) + cv2.putText(vis, cap, (x1 + 2, ty), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA) + else: + cv2.putText(vis, "no detection", (8, 24), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (128, 128, 128), 2, cv2.LINE_AA) + out_path.parent.mkdir(parents=True, exist_ok=True) + params = [int(cv2.IMWRITE_JPEG_QUALITY), jpeg_quality] + return bool(cv2.imwrite(str(out_path), vis, params)) + + +def _write_vis_if_enabled( + vis_out_root: Optional[Path], + label_category: str, + size: str, + fname: str, + img_bgr: np.ndarray, + bbox_xywhn: Optional[list[float]], + detection_score: Optional[float], +) -> None: + if vis_out_root is None: + return + vis_dir = _product_image_dir(vis_out_root, label_category, size) + vis_dir.mkdir(parents=True, exist_ok=True) + vis_path = vis_dir / f"{Path(fname).stem}_vis.jpg" + save_bbox_vis_jpeg(img_bgr, vis_path, bbox_xywhn, detection_score) + + +def _clip_xyxy_xyxy( + xyxy: list[float], w: int, h: int +) -> list[float]: + x1, y1, x2, y2 = xyxy + x1 = float(max(0, min(x1, w - 1))) + x2 = float(max(0, min(x2, w))) + y1 = float(max(0, min(y1, h - 1))) + y2 = float(max(0, min(y2, h))) + if x2 <= x1: + x2 = min(x1 + 1.0, float(w)) + if y2 <= y1: + y2 = min(y1 + 1.0, float(h)) + return [x1, y1, x2, y2] + + +def _xyxy_to_xywhn(xyxy: list[float], w: int, h: int) -> list[float]: + """xyxy 像素 → YOLO [x_center, y_center, width, height] 归一化 0–1。""" + x1, y1, x2, y2 = xyxy + bw = x2 - x1 + bh = y2 - y1 + cx = (x1 + x2) / 2.0 + cy = (y1 + y2) / 2.0 + return [cx / w, cy / h, bw / w, bh / h] + + +class GroundingDinoDetector: + """ + 使用 Grounding DINO(HuggingFace transformers)做开放词汇检测。 + 返回得分最高的一个框:YOLO 格式 [cx, cy, w, h] 归一化 + 分数。 + """ + + def __init__( + self, + model_id: str = "IDEA-Research/grounding-dino-base", + prompt: str = "person .", + box_threshold: float = 0.30, + text_threshold: float = 0.25, + ) -> None: + import torch + from PIL import Image as _PILImage # noqa: F401 + from transformers import AutoModelForZeroShotObjectDetection, AutoProcessor + + self._torch = torch + self._PILImage = _PILImage + self._device = "cuda" if torch.cuda.is_available() else "cpu" + self._processor = AutoProcessor.from_pretrained(model_id) + self._model = AutoModelForZeroShotObjectDetection.from_pretrained(model_id).to(self._device) + self._model.eval() + self.prompt = prompt + self.box_threshold = box_threshold + self.text_threshold = text_threshold + _log(f"GroundingDinoDetector loaded: {model_id} on {self._device}") + + def detect(self, img_bgr: np.ndarray) -> tuple[ + Optional[list[float]], + Optional[float], + ]: + h, w = img_bgr.shape[:2] + if w < 2 or h < 2: + return None, None + + rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) + pil = self._PILImage.fromarray(rgb) + + with self._torch.no_grad(): + inputs = self._processor(images=pil, text=self.prompt, return_tensors="pt").to(self._device) + outputs = self._model(**inputs) + target_sizes = self._torch.tensor([[h, w]], device=self._device) + try: + results = self._processor.post_process_grounded_object_detection( + outputs, + inputs.input_ids, + threshold=self.box_threshold, + text_threshold=self.text_threshold, + target_sizes=target_sizes, + )[0] + except TypeError: + results = self._processor.post_process_grounded_object_detection( + outputs, + inputs.input_ids, + box_threshold=self.box_threshold, + text_threshold=self.text_threshold, + target_sizes=target_sizes, + )[0] + + if results is None or len(results["boxes"]) == 0: + return None, None + + best_idx = int(results["scores"].argmax().item()) + b = results["boxes"][best_idx].tolist() + score = float(results["scores"][best_idx].item()) + xyxy = _clip_xyxy_xyxy([float(b[0]), float(b[1]), float(b[2]), float(b[3])], w, h) + xywhn = _xyxy_to_xywhn(xyxy, w, h) + return xywhn, score + + +def _is_degenerate_gray_frame(img: np.ndarray) -> bool: + """ffmpeg 快 seek 在部分 HEVC 码流上可能输出近似中性灰、几乎无纹理的无效帧。""" + if img is None or img.size == 0: + return True + m = float(np.mean(img)) + s = float(np.std(img)) + return 118.0 <= m <= 138.0 and s < 8.0 + + +def extract_frame_ffmpeg( + video_path: Path, + t_sec: float, + *, + ffmpeg_bin: str = "ffmpeg", + ffprobe_bin: str = "ffprobe", + accurate_seek: bool = True, + timeout_sec: float = 600.0, +) -> np.ndarray | None: + """ + 使用 ffmpeg 解码单帧。时间戳 clamp 优先用 ffprobe,避免 OpenCV 对 HEVC 的 fps/时长偏差。 + + accurate_seek=True(默认):-ss 在 -i 之后,解码正确,长视频较慢。 + accurate_seek=False:-ss 在 -i 之前,快,少数文件仍可能异常。 + """ + if not shutil.which(ffmpeg_bin): + return None + meta = get_video_meta(video_path, ffprobe_bin) + if meta.width < 2 or meta.height < 2: + return None + t_clamped = _clamp_time_sec(t_sec, meta) + w, h = meta.width, meta.height + expected_raw = w * h * 3 + + def _run_ffmpeg(cmd: list[str]) -> tuple[Optional[bytes], Optional[str]]: + try: + p = subprocess.run( + cmd, + capture_output=True, + timeout=timeout_sec, + check=False, + ) + except subprocess.TimeoutExpired: + return None, "timeout" + err = (p.stderr or b"").decode("utf-8", errors="replace")[:800] + if p.returncode != 0: + return None, err or f"exit {p.returncode}" + if not p.stdout: + return None, err or "empty stdout" + return p.stdout, None + + def _decode_png(data: bytes) -> Optional[np.ndarray]: + arr = np.frombuffer(data, dtype=np.uint8) + img = cv2.imdecode(arr, cv2.IMREAD_COLOR) + return img + + # 1) 精确 seek + PNG(通用) + if accurate_seek: + cmd_png = [ + ffmpeg_bin, + "-hide_banner", + "-loglevel", + "error", + "-i", + str(video_path), + "-ss", + f"{t_clamped:.6f}", + "-frames:v", + "1", + "-an", + "-f", + "image2pipe", + "-vcodec", + "png", + "-", + ] + else: + cmd_png = [ + ffmpeg_bin, + "-hide_banner", + "-loglevel", + "error", + "-ss", + f"{t_clamped:.6f}", + "-i", + str(video_path), + "-frames:v", + "1", + "-an", + "-f", + "image2pipe", + "-vcodec", + "png", + "-", + ] + out, err = _run_ffmpeg(cmd_png) + if out is not None: + img = _decode_png(out) + if img is not None and img.size > 0: + if not accurate_seek and _is_degenerate_gray_frame(img): + _log( + f"快 seek 输出疑似灰帧,改用精确 seek: {video_path.name} t={t_clamped:.2f}s" + ) + return extract_frame_ffmpeg( + video_path, + t_sec, + ffmpeg_bin=ffmpeg_bin, + ffprobe_bin=ffprobe_bin, + accurate_seek=True, + timeout_sec=timeout_sec, + ) + return img + if err and err != "timeout": + _log(f"ffmpeg PNG 解码失败: {video_path.name}: {err[:200]}") + + # 2) 精确 seek + raw BGR(避免 PNG 编解码;尺寸来自 ffprobe) + cmd_raw = [ + ffmpeg_bin, + "-hide_banner", + "-loglevel", + "error", + "-i", + str(video_path), + "-ss", + f"{t_clamped:.6f}", + "-frames:v", + "1", + "-an", + "-f", + "rawvideo", + "-pix_fmt", + "bgr24", + "-s", + f"{w}x{h}", + "-", + ] + if not accurate_seek: + cmd_raw = [ + ffmpeg_bin, + "-hide_banner", + "-loglevel", + "error", + "-ss", + f"{t_clamped:.6f}", + "-i", + str(video_path), + "-frames:v", + "1", + "-an", + "-f", + "rawvideo", + "-pix_fmt", + "bgr24", + "-s", + f"{w}x{h}", + "-", + ] + out2, err2 = _run_ffmpeg(cmd_raw) + if out2 is not None and len(out2) == expected_raw: + img = np.frombuffer(out2, dtype=np.uint8).reshape((h, w, 3)).copy() + if not accurate_seek and _is_degenerate_gray_frame(img): + _log( + f"快 seek raw 疑似灰帧,改用精确 seek: {video_path.name} t={t_clamped:.2f}s" + ) + return extract_frame_ffmpeg( + video_path, + t_sec, + ffmpeg_bin=ffmpeg_bin, + ffprobe_bin=ffprobe_bin, + accurate_seek=True, + timeout_sec=timeout_sec, + ) + return img + if err2 and err2 != "timeout": + _log(f"ffmpeg rawvideo 失败: {video_path.name}: {err2[:200]}") + + return None + + +def extract_frame_opencv_sequential( + video_path: Path, + t_sec: float, + ffprobe_bin: str = "ffprobe", +) -> Any | None: + """ + 从第 0 帧顺序读到目标帧;帧索引由 ffprobe 元数据计算(比仅用 OpenCV fps 更稳)。 + """ + meta = get_video_meta(video_path, ffprobe_bin) + target_idx = _time_to_frame_index(t_sec, meta) + cap = cv2.VideoCapture(str(video_path), cv2.CAP_FFMPEG) + if not cap.isOpened(): + return None + try: + frame: Any | None = None + for _ in range(target_idx + 1): + ok, frame = cap.read() + if not ok or frame is None: + return None + return frame + finally: + cap.release() + + +def make_extract_frame_fn( + backend: str, + ffmpeg_bin: str, + ffprobe_bin: str, + accurate_seek: bool, +) -> tuple[Callable[[Path, float], Any | None], str]: + """ + 返回 (抽帧函数, 实际后端说明)。 + auto:有 ffmpeg 用 ffmpeg,否则 OpenCV 顺序解码。 + """ + b = backend.strip().lower() + if b == "auto": + b = "ffmpeg" if shutil.which(ffmpeg_bin) else "opencv" + if b == "ffmpeg" and not shutil.which(ffmpeg_bin): + _log(f"未找到 {ffmpeg_bin!r},改用 OpenCV 顺序解码(较慢)") + b = "opencv" + if b == "ffmpeg": + + def fn_ffmpeg(p: Path, t: float) -> Any | None: + img = extract_frame_ffmpeg( + p, + t, + ffmpeg_bin=ffmpeg_bin, + ffprobe_bin=ffprobe_bin, + accurate_seek=accurate_seek, + ) + if img is None: + return extract_frame_opencv_sequential(p, t, ffprobe_bin) + return img + + mode = "ffmpeg_accurate" if accurate_seek else "ffmpeg_fast" + return fn_ffmpeg, mode + def fn_cv_only(p: Path, t: float) -> Any | None: + return extract_frame_opencv_sequential(p, t, ffprobe_bin) + + return fn_cv_only, "opencv_sequential" + + +def _unique_image_name( + session_rel: str, + row_idx: int, + video_tag: str, + time_raw: str, + ext: str = ".jpg", +) -> str: + h = hashlib.sha1( + f"{session_rel}|{row_idx}|{video_tag}|{time_raw}".encode("utf-8") + ).hexdigest()[:16] + safe = re.sub(r"[^\w\u4e00-\u9fff\-]+", "_", session_rel)[-80:] + return f"{safe}__r{row_idx}_{video_tag}_{h}{ext}" + + +def _sanitize_dir_segment(text: Any, fallback: str) -> str: + """目录名:去掉路径非法字符,过长截断;空则用 fallback。""" + if text is None: + return fallback + if isinstance(text, float) and pd.isna(text): + return fallback + t = str(text).strip() + if not t: + return fallback + t = re.sub(r'[/\\:\0<>"|?*]+', "_", t) + t = t.strip(" .") + if not t or all(c == "." for c in t): + return fallback + max_len = 180 + if len(t) > max_len: + t = t[:max_len].rstrip() + return t or fallback + + +def _product_image_dir( + images_out: Path, label_category: str, size: str +) -> Path: + """images/<商品名称>/<规格>/""" + d_name = _sanitize_dir_segment(label_category, "未命名商品") + d_spec = _sanitize_dir_segment(size, "未填规格") + return images_out / d_name / d_spec + + +def _read_excel(path: Path) -> pd.DataFrame: + return pd.read_excel(path, header=0) + + +def _limit_reached(records: list[ImageRecord], limit: int) -> bool: + """limit>0 且已保存条数达到上限时返回 True。""" + return limit > 0 and len(records) >= limit + + +def _record_saved( + records: list[ImageRecord], + global_idx: list[int], + sample_every: int, + fname: str, + out_path: Path, + label_category: str, + size: str, + bbox_xywhn: Optional[list[float]] = None, + detection_score: Optional[float] = None, +) -> None: + """global_idx[0] 为已成功保存张数;每第 sample_every 张标记 sample(N=10 → 第 10、20… 张)。""" + global_idx[0] += 1 + sample = bool( + sample_every > 0 and global_idx[0] % sample_every == 0 + ) + records.append( + ImageRecord( + name=fname, + path=str(out_path.resolve()), + label_category=label_category, + size=size, + sample=sample, + bbox_xywhn=bbox_xywhn, + detection_score=detection_score, + ) + ) + + +def _bbox_from_detector( + detector: Optional[GroundingDinoDetector], + img_bgr: Optional[np.ndarray], +) -> tuple[Optional[list[float]], Optional[float]]: + if detector is None or img_bgr is None: + return None, None + return detector.detect(img_bgr) + + +def process_session( + session_dir: Path, + data_root: Path, + images_out: Path, + records: list[ImageRecord], + global_idx: list[int], + sample_every: int, + limit: int = 0, + max_width: int = 0, + max_height: int = 0, + bbox_detector: Optional[GroundingDinoDetector] = None, + vis_out_root: Optional[Path] = None, + extract_frame_fn: Callable[[Path, float], Any | None] = extract_frame_opencv_sequential, + time_sample_mode: str = "tear_first_half", +) -> int: + """处理一个叶子目录,返回成功写入的图片数量。limit>0 时最多再写入到总条数达 limit。""" + videos = list_videos(session_dir) + excels = list_excels(session_dir) + if not videos or not excels: + return 0 + + session_rel = str(session_dir.relative_to(data_root)) + n_ok = 0 + + def row_product(row: pd.Series, df: pd.DataFrame) -> tuple[str, str]: + c_name = _find_col(df, "商品名称") + c_spec = _find_col(df, "规格") + name = "" + spec = "" + if c_name is not None: + v = row.get(c_name) + if v is not None and not (isinstance(v, float) and pd.isna(v)): + name = str(v).strip() + if c_spec is not None: + v = row.get(c_spec) + if v is not None and not (isinstance(v, float) and pd.isna(v)): + spec = str(v).strip() + return normalize_haocai_class_name(name), spec + + # 两个 Excel + 两个视频:各读各表,按行与对应视频抽帧 + if len(excels) >= 2 and len(videos) >= 2: + excel_list = sorted(excels, key=_excel_pair_key) + vid_list = sorted(videos, key=_video_sort_key) + pairs = min(len(excel_list), len(vid_list), 2) + for pi in range(pairs): + df = _read_excel(excel_list[pi]) + vid = vid_list[pi] + time_col = _find_col( + df, + "视频内时间段", + "视频01内时间段", + "视频02内时间段", + ) + if time_col is None: + # 最后一列常为时间 + time_col = df.columns[-1] + for ri, (_, row) in enumerate(df.iterrows()): + if _limit_reached(records, limit): + return n_ok + tr = row.get(time_col) + pr = parse_time_range(tr) + if pr is None: + continue + t0, t1 = pr + label, size = row_product(row, df) + if not label and not size: + continue + t_mid = _sample_time_in_tear_segment( + t0, t1, mode=time_sample_mode + ) + frame = extract_frame_fn(vid, t_mid) + if frame is None: + continue + fname = _unique_image_name( + session_rel, ri, f"v{pi + 1}", str(tr) + ) + out_dir = _product_image_dir(images_out, label, size) + out_dir.mkdir(parents=True, exist_ok=True) + out_path = out_dir / fname + saved, img_out = save_frame_jpeg( + frame, + out_path, + max_width=max_width, + max_height=max_height, + ) + if saved: + bx, ds = _bbox_from_detector(bbox_detector, img_out) + _record_saved( + records, global_idx, sample_every, + fname, out_path, label, size, + bbox_xywhn=bx, detection_score=ds, + ) + _write_vis_if_enabled( + vis_out_root, label, size, fname, img_out, bx, ds, + ) + n_ok += 1 + if _limit_reached(records, limit): + return n_ok + return n_ok + + # 单个 Excel + if len(excels) == 1: + df = _read_excel(excels[0]) + c_v1 = _find_col(df, "视频1内时间段", "视频01内时间段") + c_v2 = _find_col(df, "视频2内时间段", "视频02内时间段") + + if len(videos) >= 2 and c_v1 is not None and c_v2 is not None: + vid_list = sorted(videos, key=_video_sort_key)[:2] + for ri, (_, row) in enumerate(df.iterrows()): + for vi, (c_time, vid) in enumerate( + zip([c_v1, c_v2], vid_list) + ): + if _limit_reached(records, limit): + return n_ok + tr = row.get(c_time) + pr = parse_time_range(tr) + if pr is None: + continue + t_mid = _sample_time_in_tear_segment( + *pr, mode=time_sample_mode + ) + frame = extract_frame_fn(vid, t_mid) + if frame is None: + continue + label, size = row_product(row, df) + fname = _unique_image_name( + session_rel, ri, f"v{vi + 1}", str(tr) + ) + out_dir = _product_image_dir(images_out, label, size) + out_dir.mkdir(parents=True, exist_ok=True) + out_path = out_dir / fname + saved, img_out = save_frame_jpeg( + frame, + out_path, + max_width=max_width, + max_height=max_height, + ) + if saved: + bx, ds = _bbox_from_detector(bbox_detector, img_out) + _record_saved( + records, global_idx, sample_every, + fname, out_path, label, size, + bbox_xywhn=bx, detection_score=ds, + ) + _write_vis_if_enabled( + vis_out_root, label, size, fname, img_out, bx, ds, + ) + n_ok += 1 + if _limit_reached(records, limit): + return n_ok + return n_ok + + # 单视频:最后一列或「视频内时间段」 + time_col = _find_col(df, "视频内时间段", "视频1内时间段") + if time_col is None: + time_col = df.columns[-1] + vid = vid_list[0] if (vid_list := sorted(videos, key=_video_sort_key)) else None + if vid is None: + return 0 + for ri, (_, row) in enumerate(df.iterrows()): + if _limit_reached(records, limit): + return n_ok + tr = row.get(time_col) + pr = parse_time_range(tr) + if pr is None: + continue + t_mid = _sample_time_in_tear_segment( + *pr, mode=time_sample_mode + ) + frame = extract_frame_fn(vid, t_mid) + if frame is None: + continue + label, size = row_product(row, df) + fname = _unique_image_name(session_rel, ri, "v1", str(tr)) + out_dir = _product_image_dir(images_out, label, size) + out_dir.mkdir(parents=True, exist_ok=True) + out_path = out_dir / fname + saved, img_out = save_frame_jpeg( + frame, + out_path, + max_width=max_width, + max_height=max_height, + ) + if saved: + bx, ds = _bbox_from_detector(bbox_detector, img_out) + _record_saved( + records, global_idx, sample_every, + fname, out_path, label, size, + bbox_xywhn=bx, detection_score=ds, + ) + _write_vis_if_enabled( + vis_out_root, label, size, fname, img_out, bx, ds, + ) + n_ok += 1 + if _limit_reached(records, limit): + return n_ok + return n_ok + + # 其余情况:尝试用第一个 Excel + 第一个视频 + if excels and videos: + df = _read_excel(excels[0]) + time_col = _find_col(df, "视频内时间段") or df.columns[-1] + vid = sorted(videos, key=_video_sort_key)[0] + for ri, (_, row) in enumerate(df.iterrows()): + if _limit_reached(records, limit): + return n_ok + tr = row.get(time_col) + pr = parse_time_range(tr) + if pr is None: + continue + t_mid = _sample_time_in_tear_segment( + *pr, mode=time_sample_mode + ) + frame = extract_frame_fn(vid, t_mid) + if frame is None: + continue + label, size = row_product(row, df) + fname = _unique_image_name(session_rel, ri, "v1", str(tr)) + out_dir = _product_image_dir(images_out, label, size) + out_dir.mkdir(parents=True, exist_ok=True) + out_path = out_dir / fname + saved, img_out = save_frame_jpeg( + frame, + out_path, + max_width=max_width, + max_height=max_height, + ) + if saved: + bx, ds = _bbox_from_detector(bbox_detector, img_out) + _record_saved( + records, global_idx, sample_every, + fname, out_path, label, size, + bbox_xywhn=bx, detection_score=ds, + ) + _write_vis_if_enabled( + vis_out_root, label, size, fname, img_out, bx, ds, + ) + n_ok += 1 + if _limit_reached(records, limit): + return n_ok + return n_ok + + +def main() -> int: + parser = argparse.ArgumentParser(description="浩材视频抽帧数据集生成") + parser.add_argument( + "--data-root", + type=str, + default="~/data/haocai", + help="数据根目录(默认 ~/data/haocai)", + ) + parser.add_argument( + "--output-dir", + type=str, + default="./haocai_dataset", + help="输出根目录(图片与 JSON 放在其下)", + ) + parser.add_argument( + "--json-name", + type=str, + default="dataset.json", + help="JSON 文件名(位于 output-dir 下)", + ) + parser.add_argument( + "--images-subdir", + type=str, + default="images", + help="图片子目录名(位于 output-dir 下)", + ) + parser.add_argument( + "--sample-every", + type=int, + default=0, + metavar="N", + help="全局按保存顺序计数,每第 N 张在 JSON 中 sample=true(0 表示全部 sample=false)", + ) + parser.add_argument( + "--limit", + type=int, + default=0, + metavar="N", + help="最多生成 N 条记录(与 JSON 条目数一致),用于试跑检查格式;0 表示不限制", + ) + parser.add_argument( + "--max-width", + type=int, + default=0, + metavar="PX", + help="输出 JPEG 最大宽度(像素),0=不限制(默认,保持原始分辨率)", + ) + parser.add_argument( + "--max-height", + type=int, + default=0, + metavar="PX", + help="输出 JPEG 最大高度(像素),0=不限制(默认)。与 --max-width 同时生效时缩放到可放入矩形内", + ) + parser.add_argument( + "--detect-bbox", + action="store_true", + help="用 Grounding DINO 检测人体并写 bbox 到 JSON(需 pip install transformers torch pillow)", + ) + parser.add_argument( + "--dino-model-id", + type=str, + default="IDEA-Research/grounding-dino-base", + metavar="ID", + help="Grounding DINO HuggingFace 模型 ID", + ) + parser.add_argument( + "--dino-prompt", + type=str, + default="person .", + metavar="TEXT", + help="Grounding DINO 检测 prompt(默认 'person .')", + ) + parser.add_argument( + "--dino-box-threshold", + type=float, + default=0.30, + metavar="F", + help="Grounding DINO box 置信度阈值(默认 0.30)", + ) + parser.add_argument( + "--dino-text-threshold", + type=float, + default=0.25, + metavar="F", + help="Grounding DINO text 置信度阈值(默认 0.25)", + ) + parser.add_argument( + "--save-vis", + action="store_true", + help="在 output-dir 下写入可视化图(默认子目录 vis/),与 images 同目录结构,文件名为 <原名>_vis.jpg", + ) + parser.add_argument( + "--vis-subdir", + type=str, + default="vis", + help="可视化 JPEG 所在子目录名(位于 output-dir 下,默认 vis)", + ) + parser.add_argument( + "--extract-backend", + type=str, + choices=("auto", "ffmpeg", "opencv"), + default="auto", + help="抽帧:auto=有 ffmpeg 则用 ffmpeg(推荐,HEVC 不易花屏);" + "ffmpeg=必须可用 ffmpeg;opencv=顺序解码,无 ffmpeg 时可用但较慢", + ) + parser.add_argument( + "--ffmpeg-bin", + type=str, + default="ffmpeg", + metavar="CMD", + help="ffmpeg 可执行文件名或绝对路径(默认 ffmpeg)", + ) + parser.add_argument( + "--ffprobe-bin", + type=str, + default="ffprobe", + metavar="CMD", + help="ffprobe 可执行文件名(用于时长/帧率/分辨率;默认 ffprobe)", + ) + parser.add_argument( + "--ffmpeg-fast-seek", + action="store_true", + help="快 seek:-ss 在 -i 之前,长视频抽帧快很多;默认精确 seek 从开头解码到目标时刻,故很慢", + ) + parser.add_argument( + "--sample-midpoint", + action="store_true", + help="时间段内抽帧取中点;默认取「撕」区间前半段(半段内 3/4 分位)", + ) + parser.add_argument( + "--tear-second-half", + action="store_true", + help="撕时间段内用整段后半 3/4 分位(旧默认);与默认前半段二选一", + ) + args = parser.parse_args() + + if args.sample_every < 0: + print("--sample-every 须 >= 0", file=sys.stderr) + return 2 + if args.limit < 0: + print("--limit 须 >= 0", file=sys.stderr) + return 2 + if args.max_width < 0 or args.max_height < 0: + print("--max-width / --max-height 须 >= 0", file=sys.stderr) + return 2 + bbox_detector: Optional[GroundingDinoDetector] = None + if args.detect_bbox: + try: + _log("Grounding DINO bbox detection enabled") + _log( + f"model={args.dino_model_id}, prompt={args.dino_prompt!r}, " + f"box_threshold={args.dino_box_threshold}, " + f"text_threshold={args.dino_text_threshold}" + ) + bbox_detector = GroundingDinoDetector( + model_id=args.dino_model_id, + prompt=args.dino_prompt, + box_threshold=args.dino_box_threshold, + text_threshold=args.dino_text_threshold, + ) + except Exception as e: + print( + f"启用 --detect-bbox 失败: {type(e).__name__}: {e}\n" + "请确认已安装: pip install transformers torch pillow", + file=sys.stderr, + ) + return 2 + + data_root = _expand_root(args.data_root) + out_root = _expand_root(args.output_dir) + images_out = out_root / args.images_subdir + images_out.mkdir(parents=True, exist_ok=True) + + vis_out_root: Optional[Path] = None + if args.save_vis: + vis_out_root = out_root / args.vis_subdir + vis_out_root.mkdir(parents=True, exist_ok=True) + + records: list[ImageRecord] = [] + global_idx = [0] + total = 0 + sessions = list(iter_leaf_session_dirs(data_root)) + if not sessions: + print(f"未找到叶子会话目录(需同时含 mp4 与 xlsx): {data_root}", file=sys.stderr) + + if not shutil.which(args.ffprobe_bin): + _log( + f"未找到 {args.ffprobe_bin!r},时长/帧率将仅用 OpenCV(HEVC 可能偏差);" + "建议: conda install ffmpeg 或 apt install ffmpeg" + ) + extract_frame_fn, extract_mode = make_extract_frame_fn( + args.extract_backend, + args.ffmpeg_bin, + args.ffprobe_bin, + accurate_seek=not args.ffmpeg_fast_seek, + ) + _log(f"抽帧后端: {extract_mode}") + if args.sample_midpoint: + time_sample_mode = "midpoint" + elif args.tear_second_half: + time_sample_mode = "tear_second_half" + else: + time_sample_mode = "tear_first_half" + _log( + "时间段采样: " + + ( + "中点(--sample-midpoint)" + if time_sample_mode == "midpoint" + else ( + "撕区间后半段 3/4(--tear-second-half)" + if time_sample_mode == "tear_second_half" + else "撕区间前半段(默认,半段内 3/4 分位)" + ) + ) + ) + if extract_mode.startswith("ffmpeg") and not args.ffmpeg_fast_seek: + _log( + "精确 seek(默认)在长视频、大时间戳时很慢:每次抽帧都会从文件开头解码到目标时刻。" + "若可接受略快 seek,请加 --ffmpeg-fast-seek 加速。" + ) + + for sd in sorted(sessions): + if _limit_reached(records, args.limit): + break + n = process_session( + sd, + data_root, + images_out, + records, + global_idx, + args.sample_every, + args.limit, + args.max_width, + args.max_height, + bbox_detector, + vis_out_root, + extract_frame_fn=extract_frame_fn, + time_sample_mode=time_sample_mode, + ) + total += n + print(f"{sd.relative_to(data_root)}: {n} 张") + + json_path = out_root / args.json_name + payload = [asdict(r) for r in records] + json_path.write_text( + json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8" + ) + lim_note = f"(limit={args.limit})" if args.limit > 0 else "" + vis_note = ( + f",可视化目录: {vis_out_root}" + if vis_out_root is not None + else "" + ) + print( + f"共写入 {total} 张图片{lim_note},JSON 条目 {len(records)},元数据: {json_path}{vis_note}" + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/code/repo_root.py b/code/repo_root.py new file mode 100644 index 0000000..baa7e09 --- /dev/null +++ b/code/repo_root.py @@ -0,0 +1,6 @@ +"""仓库根目录常量:本文件必须位于含 dataset.py 的 code/ 根目录。""" +from __future__ import annotations + +from pathlib import Path + +CODE_ROOT = Path(__file__).resolve().parent diff --git a/code/video_clip_cls/infer_single_0506/run_segments_consumable_vote.py b/code/video_clip_cls/infer_single_0506/run_segments_consumable_vote.py new file mode 100644 index 0000000..656a5ab --- /dev/null +++ b/code/video_clip_cls/infer_single_0506/run_segments_consumable_vote.py @@ -0,0 +1,564 @@ +#!/usr/bin/env python3 +""" +仅在「时间段 txt」内跑:人手检测 → **逐帧**好/坏门控(**top1 为 good 且 top1conf>阈值**,默认阈值 0.9) +→ 仅通过的帧跑 41 类耗材分类;(可选)仅保留 **耗材 softmax 最大值 > --haocai-min-conf** 的帧; +对保留帧的标签序列做 **滑动窗口多数票平滑**,再 **`consumable` 取平滑后序列众数**。 + +**avg_softmax_*** :仅对上述「高置信耗材帧」统计;类别为 softmax 均值分布前三;置信度为三档边际 softmax 在时间上的平均。 + +不扫全片;每段从视频中按起止时间解码。 + +用法(建议在 yolo 环境): + python code/video_clip_cls/infer_single_0506/run_segments_consumable_vote.py \\ + --segments .../03视频_segments_mutual_exclusive_score_gt_0.1.txt \\ + --video .../03视频.mp4 \\ + --out .../03视频_segments_consumables.txt +""" + +from __future__ import annotations + +import argparse +import sys +from collections import Counter +from pathlib import Path + +import cv2 +import numpy as np +from ultralytics import YOLO + +for _repo in Path(__file__).resolve().parents: + if (_repo / "repo_root.py").is_file() and (_repo / "dataset.py").is_file(): + if str(_repo) not in sys.path: + sys.path.insert(0, str(_repo)) + break +else: + raise RuntimeError("未定位到仓库 code/ 根目录") + +from repo_root import CODE_ROOT # noqa: E402 + + +def parse_segments_txt(path: Path) -> list[tuple[int, float, float]]: + rows: list[tuple[int, float, float]] = [] + for raw in path.read_text(encoding="utf-8").splitlines(): + if not raw.strip() or raw.lower().startswith("rank"): + continue + parts = raw.split("\t") + if len(parts) < 4: + continue + rank = int(parts[0].strip()) + t0 = float(parts[1].strip()) + t1 = float(parts[2].strip()) + rows.append((rank, t0, t1)) + return rows + + +def collect_hand_boxes(det_model: YOLO, boxes) -> list[list[float]]: + names = det_model.names + out: list[list[float]] = [] + for box in boxes: + cid = int(box.cls[0]) + label = names.get(cid, "") + if label == "hand": + out.append(box.xyxy[0].tolist()) + return out + + +def pad_box( + xyxy: list[float], img_w: int, img_h: int, pad_ratio: float +) -> tuple[int, int, int, int]: + """四向等比外扩(legacy;Phase2 现用 pad_box_bottom_only)。""" + x1, y1, x2, y2 = xyxy + bw, bh = x2 - x1, y2 - y1 + px, py = bw * pad_ratio, bh * pad_ratio + return ( + max(0, int(x1 - px)), + max(0, int(y1 - py)), + min(img_w, int(x2 + px)), + min(img_h, int(y2 + py)), + ) + + +def pad_box_bottom_only( + xyxy: list[float], img_w: int, img_h: int, bottom_ratio: float +) -> tuple[int, int, int, int]: + """紧框 union 后仅向下延伸:y2 += 框高 * bottom_ratio;上/左/右不变。""" + x1, y1, x2, y2 = xyxy + bh = y2 - y1 + dy = bh * float(bottom_ratio) + return ( + max(0, int(x1)), + max(0, int(y1)), + min(img_w, int(x2)), + min(img_h, int(y2 + dy)), + ) + + +def largest_hand(hands: list[list[float]]) -> list[float]: + def area(b: list[float]) -> float: + return max(0.0, b[2] - b[0]) * max(0.0, b[3] - b[1]) + + return max(hands, key=area) + + +def _float_top1conf(pr) -> float: + tc = pr.top1conf + if tc is None: + return 0.0 + if isinstance(tc, (float, int, np.floating)): + return float(tc) + return float(tc.detach().float().cpu().item()) + + +def passes_good_gate_top1_conf( + gb_model: YOLO, + crop: np.ndarray, + gb_names: dict, + imgsz: int, + top1_conf_must_exceed: float, +) -> bool: + """好/坏分类:predicted top1 为 good,且 top1conf 严格大于给定阈值。""" + if crop.size == 0: + return False + r = gb_model.predict(crop, imgsz=imgsz, verbose=False)[0] + pr = r.probs + if pr is None: + return False + tid = int(pr.top1) + label = str(gb_names.get(tid, "")).strip().lower() + conf = _float_top1conf(pr) + return label == "good" and conf > top1_conf_must_exceed + + +def haocai_softmax_probs( + cls_model: YOLO, crop: np.ndarray, imgsz: int, n_cls: int +) -> np.ndarray | None: + """耗材分类:返回长度 n_cls 的 softmax 概率向量(与模型 top1 一致)。""" + if crop.size == 0: + return None + r = cls_model.predict(crop, imgsz=imgsz, verbose=False)[0] + pr = r.probs + if pr is None or pr.data is None: + return None + v = pr.data.detach().float().cpu().numpy().astype(np.float64).ravel() + if v.size < n_cls: + v = np.resize(v, n_cls) + v = v[:n_cls].copy() + s = float(np.sum(v)) + if s <= 1e-12: + return None + # 若未归一化则 softmax + if abs(s - 1.0) > 0.08: + v = v - float(np.max(v)) + e = np.exp(np.clip(v, -40.0, 40.0)) + out = e / float(np.sum(e)) + return out + return v / s + + +def _cls_name(names: dict, idx: int) -> str: + return str(names.get(int(idx), str(idx))) + + +def mean_softmax_top3( + probs_list: list[np.ndarray], cls_names: dict +) -> tuple[list[str], list[float]]: + """ + 类名:多帧 softmax 按类逐维算术平均,在平均向量上取概率最大的前三类。 + + 置信度(与类名解耦):逐帧对 softmax 从高到低排序,取第 1/2/3 大的概率, + 再在各帧上对这三档分别做算术平均(「帧内边际 topk」的时间平均)。 + 返回三个槽位(不足则用空字符串与 0.0 补齐)。 + """ + names_out: list[str] = [] + probs_out: list[float] = [] + if not probs_list: + for _ in range(3): + names_out.append("") + probs_out.append(0.0) + return names_out, probs_out + stacked = np.stack(probs_list, axis=0) + p = np.mean(stacked, axis=0, dtype=np.float64) + order = np.argsort(-p) + for k in range(3): + if k < order.size: + j = int(order[k]) + names_out.append(_cls_name(cls_names, j)) + else: + names_out.append("") + # 逐帧降序 softmax,对第 1/2/3 档做时间平均 + row_sorted = np.sort(stacked, axis=1)[:, ::-1] + n_cls = row_sorted.shape[1] + for k in range(3): + if k < n_cls: + probs_out.append(float(np.mean(row_sorted[:, k], dtype=np.float64))) + else: + probs_out.append(0.0) + return names_out, probs_out + + +def smooth_labels_majority(labels: list[str], window: int) -> list[str]: + """ + 对时间有序的类别名做平滑:对每个位置取以该位置为中心、长度为奇数 window 的邻域, + 用邻域内众数替换(打破平局时用最邻域计数最高者)。 + window<=1 时原样返回。 + """ + if window <= 1 or not labels: + return list(labels) + w = window if window % 2 == 1 else window + 1 + half = w // 2 + n = len(labels) + out: list[str] = [] + for i in range(n): + lo = max(0, i - half) + hi = min(n, i + half + 1) + chunk = labels[lo:hi] + top, _c = Counter(chunk).most_common(1)[0] + out.append(top) + return out + + +def process_segment( + cap: cv2.VideoCapture, + det: YOLO, + gb: YOLO, + cls_m: YOLO, + *, + start_sec: float, + end_sec: float, + seek_margin_sec: float, + det_conf: float, + pad_ratio: float, + imgsz_det: int, + imgsz_cls: int, + frame_stride: int, + good_top1_conf_threshold: float, + haocai_min_conf: float, + smooth_label_window: int, + gb_names: dict, + cls_names: dict, +) -> dict: + # HEVC/部分 mp4:直接 seek 到 start 易产生坏参考帧;先往回跳再顺序解码丢到起点。 + probe_from = float(max(0.0, start_sec - seek_margin_sec)) + cap.set(cv2.CAP_PROP_POS_MSEC, probe_from * 1000.0) + synced_frame: np.ndarray | None = None + synced_t: float | None = None + tol = 0.04 + while True: + ok0, grab = cap.read() + if not ok0 or grab is None: + synced_frame, synced_t = None, None + break + t0 = float(cap.get(cv2.CAP_PROP_POS_MSEC)) / 1000.0 + if t0 + tol >= start_sec: + synced_frame, synced_t = grab, t0 + break + + w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + + n_cls_key_max = max(int(k) for k in cls_names.keys()) + n_cls = n_cls_key_max + 1 + + n_hand_frames = 0 + # top1==good 且 top1conf>阈值的帧数(门控通过即计数,与是否成功得到 softmax 无关) + n_gate_pass = 0 + cls_labels: list[str] = [] + cls_prob_rows: list[np.ndarray] = [] + frames_read_in_segment = 0 + + def one_frame(fr: np.ndarray, _t_abs: float) -> None: + nonlocal frames_read_in_segment, n_hand_frames, n_gate_pass, cls_labels, cls_prob_rows + frames_read_in_segment += 1 + if frame_stride > 1 and (frames_read_in_segment - 1) % frame_stride != 0: + return + + r0 = det.predict( + fr, + conf=det_conf, + imgsz=imgsz_det, + verbose=False, + )[0] + hands = collect_hand_boxes(det, r0.boxes) if r0.boxes else [] + if not hands: + return + + n_hand_frames += 1 + xyxy = largest_hand(hands) + x1, y1, x2, y2 = pad_box(xyxy, w, h, pad_ratio) + crop = fr[y1:y2, x1:x2] + ok_gate = passes_good_gate_top1_conf( + gb, crop, gb_names, imgsz_cls, good_top1_conf_threshold + ) + if ok_gate: + n_gate_pass += 1 + vec = haocai_softmax_probs(cls_m, crop, imgsz_cls, n_cls) + if vec is not None: + top_prob = float(np.max(vec)) + if top_prob <= haocai_min_conf: + return + cls_prob_rows.append(vec) + cls_labels.append(_cls_name(cls_names, int(np.argmax(vec)))) + + if synced_frame is not None and synced_t is not None: + if synced_t <= end_sec + 0.08: + one_frame(synced_frame, synced_t) + + while True: + ok, frame = cap.read() + if not ok or frame is None: + break + t = float(cap.get(cv2.CAP_PROP_POS_MSEC)) / 1000.0 + if t > end_sec + 0.08: + break + if t + 1e-6 < start_sec: + continue + one_frame(frame, t) + + if n_hand_frames == 0: + return { + "consumable": "(段内未检测到手部)", + "n_hand_frames": 0, + "n_gate_pass": 0, + "n_predictions": 0, + "top_vote_count": 0, + "avg_top1_cls": "", + "avg_top1_prob": "", + "avg_top2_cls": "", + "avg_top2_prob": "", + "avg_top3_cls": "", + "avg_top3_prob": "", + } + + if not cls_labels: + return { + "consumable": ( + "(无满足条件的耗材帧:好帧置信度或未过门控" + + ( + "" if haocai_min_conf <= 0.0 + else ",或耗材 top1 softmax 不大于阈值" + ) + + ")" + ), + "n_hand_frames": n_hand_frames, + "n_gate_pass": n_gate_pass, + "n_predictions": 0, + "top_vote_count": 0, + "avg_top1_cls": "", + "avg_top1_prob": "", + "avg_top2_cls": "", + "avg_top2_prob": "", + "avg_top3_cls": "", + "avg_top3_prob": "", + } + + smoothed = smooth_labels_majority(cls_labels, smooth_label_window) + top_name, vote_n = Counter(smoothed).most_common(1)[0] + a1, ap1 = mean_softmax_top3(cls_prob_rows, cls_names) + return { + "consumable": top_name, + "n_hand_frames": n_hand_frames, + "n_gate_pass": n_gate_pass, + "n_predictions": len(cls_labels), + "top_vote_count": int(vote_n), + "avg_top1_cls": a1[0], + "avg_top1_prob": f"{ap1[0]:.6f}", + "avg_top2_cls": a1[1], + "avg_top2_prob": f"{ap1[1]:.6f}", + "avg_top3_cls": a1[2], + "avg_top3_prob": f"{ap1[2]:.6f}", + } + + +def main() -> int: + ap = argparse.ArgumentParser( + description="手检 + 逐帧 top1=good 且 top1conf>阈值门控 + 耗材分类;段内众数" + ) + ap.add_argument( + "--segments", + type=Path, + default=Path(__file__).resolve().parent + / "results" + / "03视频_segments_mutual_exclusive_score_gt_0.1.txt", + ) + ap.add_argument( + "--video", + type=Path, + default=CODE_ROOT.parent + / "data/haocai/5月6号视频/5月6日第二次视频/03视频.mp4", + ) + ap.add_argument( + "--hand-model", + type=Path, + default=CODE_ROOT + / "hand_detection/runs/hand_det_y11s_multiframe-better/weights/best.pt", + ) + ap.add_argument( + "--goodbad-model", + type=Path, + default=CODE_ROOT + / "goodORbad_frame/runs/goodbad_frame_y11m_e50/weights/best.pt", + ) + ap.add_argument( + "--haocai-model", + type=Path, + default=CODE_ROOT + / "haocai_classify/runs/haocai_cls_41cls_goodframe_lastest-0.95" + / "weights/best.pt", + ) + ap.add_argument( + "--out", + type=Path, + default=Path(__file__).resolve().parent + / "results" + / "03视频_segments_consumables.txt", + ) + ap.add_argument( + "--good-top1-conf-threshold", + type=float, + default=0.90, + dest="good_top1_conf_threshold", + help="逐帧:仅当 top1 为 good 且 top1conf **严格大于**该值时才跑耗材分类(默认对应 top1conf>0.9)", + ) + ap.add_argument( + "--haocai-min-conf", + type=float, + default=0.0, + metavar="P", + help="耗材:仅 softmax 最大值 **严格大于** P 的帧计入标签与 softmax 统计(0 表示不按耗材置信度筛)", + ) + ap.add_argument( + "--smooth-label-window", + type=int, + default=1, + metavar="W", + help="耗材标签平滑:长度为 W 的奇数滑动窗口内多数票(W≤1 不平滑);众数取平滑后的序列", + ) + ap.add_argument("--det-conf", type=float, default=0.5) + ap.add_argument("--pad-ratio", type=float, default=0.30) + ap.add_argument("--imgsz-det", type=int, default=640) + ap.add_argument("--imgsz-cls", type=int, default=224) + ap.add_argument( + "--frame-stride", + type=int, + default=1, + help=">1 时代码逐帧解码但每 N 帧推理一次(省算力,结论可能略粗糙)", + ) + ap.add_argument( + "--seek-margin-sec", + type=float, + default=3.0, + help="HEVC 等非关键帧 seek 时往回多跳若干秒再解码到段起点,减轻花屏", + ) + args = ap.parse_args() + + seg_path = args.segments.resolve() + vid_path = args.video.resolve() + if not seg_path.is_file(): + print("找不到时间段文件:", seg_path, file=sys.stderr) + return 1 + if not vid_path.is_file(): + print("找不到视频:", vid_path, file=sys.stderr) + return 1 + for pt, lab in ( + (args.hand_model, "hand"), + (args.goodbad_model, "good/bad"), + (args.haocai_model, "haocai cls"), + ): + if not Path(pt).is_file(): + print(f"缺少{lab} 权重:", pt, file=sys.stderr) + return 1 + + segments = parse_segments_txt(seg_path) + if not segments: + print("时间段为空:", seg_path, file=sys.stderr) + return 1 + + print("加载模型…", flush=True) + det = YOLO(str(args.hand_model)) + gb = YOLO(str(args.goodbad_model)) + cls_m = YOLO(str(args.haocai_model)) + gb_names = gb.names + cls_names = cls_m.names + + cap = cv2.VideoCapture(str(vid_path)) + if not cap.isOpened(): + print("无法打开视频:", vid_path, file=sys.stderr) + return 1 + + sep = "\t" + out_lines = [ + sep.join([ + "rank", + "start_sec", + "end_sec", + "consumable", + "n_hand_frames", + "n_frames_top1_good_conf_gt_thresh", + "n_consumable_predictions", + "top_label_vote_count", + "avg_softmax_top1_cls", + "avg_softmax_top1_prob", + "avg_softmax_top2_cls", + "avg_softmax_top2_prob", + "avg_softmax_top3_cls", + "avg_softmax_top3_prob", + ]) + ] + + try: + for rank, t0, t1 in segments: + print(f"段落 rank={rank} [{t0:.3f},{t1:.3f}]s …", flush=True) + info = process_segment( + cap, + det, + gb, + cls_m, + start_sec=t0, + end_sec=t1, + seek_margin_sec=args.seek_margin_sec, + det_conf=args.det_conf, + pad_ratio=args.pad_ratio, + imgsz_det=args.imgsz_det, + imgsz_cls=args.imgsz_cls, + frame_stride=max(1, args.frame_stride), + good_top1_conf_threshold=args.good_top1_conf_threshold, + haocai_min_conf=args.haocai_min_conf, + smooth_label_window=max(1, args.smooth_label_window), + gb_names=gb_names, + cls_names=cls_names, + ) + row = sep.join([ + str(rank), + f"{t0:.6f}", + f"{t1:.6f}", + str(info["consumable"]), + str(info["n_hand_frames"]), + str(info["n_gate_pass"]), + str(info["n_predictions"]), + str(info["top_vote_count"]), + info["avg_top1_cls"], + info["avg_top1_prob"], + info["avg_top2_cls"], + info["avg_top2_prob"], + info["avg_top3_cls"], + info["avg_top3_prob"], + ]) + out_lines.append(row) + print( + f" -> {info['consumable']} " + f"(votes {info['top_vote_count']}/{info['n_predictions']}, " + f"goodgate {info['n_gate_pass']}/{info['n_hand_frames']} hand frames)", + flush=True, + ) + finally: + cap.release() + + out_path = args.out.resolve() + out_path.parent.mkdir(parents=True, exist_ok=True) + out_path.write_text("\n".join(out_lines) + "\n", encoding="utf-8") + print("已写出:", out_path, flush=True) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/code/video_clip_cls/scripts/pipeline/__init__.py b/code/video_clip_cls/scripts/pipeline/__init__.py new file mode 100644 index 0000000..709eed1 --- /dev/null +++ b/code/video_clip_cls/scripts/pipeline/__init__.py @@ -0,0 +1 @@ +"""可复用的多模型段内处理子模块。""" diff --git a/code/video_clip_cls/scripts/pipeline/gap_adjacent_merge.py b/code/video_clip_cls/scripts/pipeline/gap_adjacent_merge.py new file mode 100644 index 0000000..a4fe618 --- /dev/null +++ b/code/video_clip_cls/scripts/pipeline/gap_adjacent_merge.py @@ -0,0 +1,132 @@ +"""相邻成功段 gap 小于阈值时合并,pairs_h 拼接后 aggregate_top3_votes。""" +from __future__ import annotations + +from dataclasses import replace +from typing import Callable + +from run_haocai_actionformer_consumables_e2e import aggregate_top3_votes + +from .tear_gate_merge import E2eRow + +_GAP_EPS = 1e-9 + + +def span_key(t0: float, t1: float) -> tuple[float, float]: + return (round(float(t0), 6), round(float(t1), 6)) + + +def group_rows_by_gap( + rows: list[E2eRow], + max_gap_sec: float = 2.0, +) -> list[list[E2eRow]]: + """左→右贪心分组;失败行单独成组且不跨组合并。""" + groups: list[list[E2eRow]] = [] + i = 0 + while i < len(rows): + row = rows[i] + if not row.is_success(): + groups.append([row]) + i += 1 + continue + grp = [row] + j = i + 1 + while j < len(rows): + nxt = rows[j] + if not nxt.is_success(): + break + gap = float(nxt.start_sec) - float(grp[-1].end_sec) + if gap < float(max_gap_sec) - _GAP_EPS: + grp.append(nxt) + j += 1 + else: + break + groups.append(grp) + i = j + return groups + + +def e2e_row_from_pairs( + start_sec: float, + end_sec: float, + pairs: list[tuple[str, float]], + product_map: dict[str, str], + *, + rank: int = 0, +) -> E2eRow: + names, confs = aggregate_top3_votes(pairs) + n1, n2, n3 = (names + ["", "", ""])[:3] + c1, c2, c3 = (confs + [0.0, 0.0, 0.0])[:3] + id1 = product_map.get(n1, "") if n1 else "" + id2 = product_map.get(n2, "") if n2 else "" + id3 = product_map.get(n3, "") if n3 else "" + + def _cf(nm: str, c: float) -> str: + return f"{c:.6f}" if nm else "" + + return E2eRow( + rank=rank, + start_sec=float(start_sec), + end_sec=float(end_sec), + id1=id1, + n1=n1, + c1=_cf(n1, c1), + id2=id2, + n2=n2, + c2=_cf(n2, c2), + id3=id3, + n3=n3, + c3=_cf(n3, c3), + ) + + +def merge_all_by_gap( + rows: list[E2eRow], + span_to_pairs: dict[tuple[float, float], list[tuple[str, float]]], + product_map: dict[str, str], + *, + max_gap_sec: float = 2.0, + log_fn: Callable[[str], None] | None = None, +) -> list[E2eRow]: + """按 gap 分组合并;组内拼接 pairs_h 后重新 aggregate top3。""" + merged: list[E2eRow] = [] + for grp in group_rows_by_gap(rows, max_gap_sec): + if len(grp) == 1: + merged.append(grp[0]) + continue + + all_pairs: list[tuple[str, float]] = [] + pair_counts: list[int] = [] + missing = False + for r in grp: + sk = span_key(r.start_sec, r.end_sec) + pairs = span_to_pairs.get(sk) + if pairs is None: + missing = True + break + pair_counts.append(len(pairs)) + all_pairs.extend(pairs) + + if missing or not all_pairs: + if log_fn and missing: + ranks = ",".join(str(r.rank) for r in grp) + log_fn(f"[gap_merge] 跳过合并 rank={ranks}(缺少 pairs_h 缓存)") + merged.extend(grp) + continue + + out_row = e2e_row_from_pairs( + grp[0].start_sec, + grp[-1].end_sec, + all_pairs, + product_map, + ) + if log_fn: + cnt_str = "+".join(str(n) for n in pair_counts) + ranks = "~".join(str(r.rank) for r in grp) + log_fn( + f"[gap_merge] 合并 rank={ranks} " + f"[{out_row.start_sec:.3f},{out_row.end_sec:.3f}] " + f"pairs 帧数 {cnt_str}={len(all_pairs)}" + ) + merged.append(out_row) + + return [replace(r, rank=i) for i, r in enumerate(merged, start=1)] diff --git a/code/video_clip_cls/scripts/pipeline/hand_roi_merge.py b/code/video_clip_cls/scripts/pipeline/hand_roi_merge.py new file mode 100644 index 0000000..f264130 --- /dev/null +++ b/code/video_clip_cls/scripts/pipeline/hand_roi_merge.py @@ -0,0 +1,131 @@ +""" +双手检测框分组:检测到至少两只手时合并为单个 ROI;不足两只手则跳过该帧。 + +坐标系:全部在原图像素空间(与 Ultralytics xyxy 一致)。 +内存:仅产出 numpy 切片的 .copy() 小图,避免长时间引用整帧。 +""" +from __future__ import annotations + +from dataclasses import dataclass + +import numpy as np + + +@dataclass +class HandMergeConfig: + """两手是否合并为单个外接 ROI 的判定(OR 关系,满足任一即合并)。""" + + # IoU 严格大于该值则合并;默认 0 表示只要有交叠(IoU>0)即合并 + merge_iou_gt: float = 0.0 + # 两框中心欧氏距离(像素)不超过该值则合并;None 表示不启用该项 + merge_center_dist_max_px: float | None = None + # 中心距不超过 frame_diag * 该比例则合并;None 表示不启用(对角线 sqrt(W^2+H^2)) + merge_center_dist_max_frac_diag: float | None = None + + +def bbox_area_xyxy(b: list[float]) -> float: + x1, y1, x2, y2 = b + return max(0.0, x2 - x1) * max(0.0, y2 - y1) + + +def bbox_iou_xyxy(a: list[float], b: list[float]) -> float: + """轴对齐框 IoU。""" + ax1, ay1, ax2, ay2 = a + bx1, by1, bx2, by2 = b + ix1 = max(ax1, bx1) + iy1 = max(ay1, by1) + ix2 = min(ax2, bx2) + iy2 = min(ay2, by2) + iw = max(0.0, ix2 - ix1) + ih = max(0.0, iy2 - iy1) + inter = iw * ih + if inter <= 0: + return 0.0 + ua = bbox_area_xyxy(a) + bbox_area_xyxy(b) - inter + if ua <= 1e-12: + return 0.0 + return inter / ua + + +def bbox_center(xyxy: list[float]) -> tuple[float, float]: + x1, y1, x2, y2 = xyxy + return (0.5 * (x1 + x2), 0.5 * (y1 + y2)) + + +def bbox_center_distance(a: list[float], b: list[float]) -> float: + cx1, cy1 = bbox_center(a) + cx2, cy2 = bbox_center(b) + dx = cx1 - cx2 + dy = cy1 - cy2 + return float((dx * dx + dy * dy) ** 0.5) + + +def union_xyxy(a: list[float], b: list[float]) -> list[float]: + """两框轴对齐最小外接矩形(仍在原图坐标)。""" + ax1, ay1, ax2, ay2 = a + bx1, by1, bx2, by2 = b + return [ + min(ax1, bx1), + min(ay1, by1), + max(ax2, bx2), + max(ay2, by2), + ] + + +def two_largest_hands(hands: list[list[float]]) -> tuple[list[float], list[float]]: + """按面积取最大的两只手( hands 已非空且至少 2 个)。""" + sorted_h = sorted(hands, key=bbox_area_xyxy, reverse=True) + return sorted_h[0], sorted_h[1] + + +def hands_should_merge( + h1: list[float], + h2: list[float], + cfg: HandMergeConfig, + frame_diag: float, +) -> bool: + iou = bbox_iou_xyxy(h1, h2) + if iou > cfg.merge_iou_gt + 1e-12: + return True + d = bbox_center_distance(h1, h2) + if cfg.merge_center_dist_max_px is not None and d <= cfg.merge_center_dist_max_px + 1e-12: + return True + if ( + cfg.merge_center_dist_max_frac_diag is not None + and d <= cfg.merge_center_dist_max_frac_diag * frame_diag + 1e-12 + ): + return True + return False + + +class HandRoiGrouper: + """根据配置把手框列表转为 1~2 张 ROI(带 padding 的裁剪图)。""" + + def __init__( + self, + merge_cfg: HandMergeConfig, + pad_box_fn, + pad_ratio: float, + ) -> None: + self.merge_cfg = merge_cfg + self.pad_box_fn = pad_box_fn + self.pad_ratio = pad_ratio + + def frame_to_rois( + self, + frame: np.ndarray, + hands: list[list[float]], + ) -> list[np.ndarray]: + """ + 从整帧与手框列表得到本帧用于分类的小图列表。 + 至少两只手:取面积最大的两只,合并外接框后 1 张;否则返回空(跳过该帧)。 + """ + h, w = frame.shape[:2] + if len(hands) < 2: + return [] + + h1, h2 = two_largest_hands(hands) + uni = union_xyxy(h1, h2) + x1, y1, x2, y2 = self.pad_box_fn(uni, w, h, self.pad_ratio) + crop = np.ascontiguousarray(frame[y1:y2, x1:x2].copy()) + return [crop] diff --git a/code/video_clip_cls/scripts/pipeline/segment_processor.py b/code/video_clip_cls/scripts/pipeline/segment_processor.py new file mode 100644 index 0000000..3e3b7da --- /dev/null +++ b/code/video_clip_cls/scripts/pipeline/segment_processor.py @@ -0,0 +1,916 @@ +""" +单段时间范围内的流式解码:多手部 ROI → 好帧门控 → 耗材 + 撕膜分类,汇总投票样本。 + +不将整段视频载入内存;每帧处理后可 del 大图与 ROI(由调用方循环内负责)。 +""" +from __future__ import annotations + +import gc +import sys +from collections import Counter +from pathlib import Path +from typing import Any, Callable + +import cv2 +import numpy as np + +for _repo in Path(__file__).resolve().parents: + if (_repo / "repo_root.py").is_file() and (_repo / "dataset.py").is_file(): + CODE_ROOT = _repo + if str(_repo) not in sys.path: + sys.path.insert(0, str(_repo)) + break +else: + raise RuntimeError("未定位到仓库 code/ 根目录") + +_SCRIPTS = CODE_ROOT / "video_clip_cls" / "scripts" +if str(_SCRIPTS) not in sys.path: + sys.path.insert(0, str(_SCRIPTS)) + +_INF = CODE_ROOT / "video_clip_cls" / "infer_single_0506" +if str(_INF) not in sys.path: + sys.path.insert(0, str(_INF)) + +import run_segments_consumable_vote as _rsv # noqa: E402 +from run_haocai_actionformer_consumables_e2e import ( # noqa: E402 + aggregate_top3_votes, + mask_probs_whitelist, +) +from ultralytics import YOLO # noqa: E402 + +from pipeline.hand_roi_merge import HandRoiGrouper, two_largest_hands, union_xyxy # noqa: E402 + +# 与 run_haocai_actionformer_consumables_e2e 段内失败 return 文案一致,供 Phase2 重试判断 +REASON_NO_VALID_HAOCAI_FRAMES = "(无有效耗材帧:好帧/白名单/耗材置信度未全部满足)" +# 推流 / TSV 离线(无好坏帧门控) +REASON_NO_VALID_HAOCAI_FRAMES_STREAM = "(无有效耗材帧:白名单/耗材置信度未满足)" + +collect_hand_boxes = _rsv.collect_hand_boxes +pad_box = _rsv.pad_box_bottom_only +_cls_name = _rsv._cls_name + + +def _float_top1conf(pr: Any) -> float: + tc = pr.top1conf + if tc is None: + return 0.0 + if isinstance(tc, (float, int, np.floating)): + return float(tc) + return float(tc.detach().float().cpu().item()) + + +def passes_good_gate_top1_conf_kw( + gb_model: YOLO, + crop: np.ndarray, + gb_names: dict, + imgsz: int, + top1_conf_must_exceed: float, + predict_kw: dict[str, Any], +) -> bool: + """与 run_segments_consumable_vote 一致,但向 predict 透传 half/device。""" + if crop.size == 0: + return False + r = gb_model.predict(crop, imgsz=imgsz, verbose=False, **predict_kw)[0] + pr = r.probs + if pr is None: + return False + tid = int(pr.top1) + label = str(gb_names.get(tid, "")).strip().lower() + conf = _float_top1conf(pr) + return label == "good" and conf > top1_conf_must_exceed + + +def aggregate_top2_votes( + pairs: list[tuple[str, float]], +) -> tuple[list[str], list[float]]: + """与 aggregate_top3 相同思想,取前二类及次数归一化置信度。""" + empty = (["", ""], [0.0, 0.0]) + if not pairs: + return empty + cnt = Counter(p[0] for p in pairs) + ranked = sorted(cnt.items(), key=lambda x: (-x[1], x[0])) + top = ranked[:2] + if not top: + return empty + total = float(sum(c for _, c in top)) + if total <= 0: + return empty + out_names: list[str] = ["", ""] + out_conf: list[float] = [0.0, 0.0] + for i, (nm, c) in enumerate(top): + out_names[i] = nm + out_conf[i] = float(c) / total + return out_names, out_conf + + +def _clip_xyxy(box: np.ndarray, img_w: int, img_h: int) -> np.ndarray: + """ + 将 xyxy 框裁剪到图像边界,并保证 x2>x1, y2>y1。 + """ + x1, y1, x2, y2 = [float(v) for v in box] + x1 = max(0.0, min(x1, img_w - 1.0)) + y1 = max(0.0, min(y1, img_h - 1.0)) + x2 = max(0.0, min(x2, img_w - 1.0)) + y2 = max(0.0, min(y2, img_h - 1.0)) + if x2 <= x1: + x2 = min(img_w - 1.0, x1 + 1.0) + if y2 <= y1: + y2 = min(img_h - 1.0, y1 + 1.0) + return np.array([x1, y1, x2, y2], dtype=np.float32) + + +def _fuse_hands_to_one_box(hands: list[list[float]], img_w: int, img_h: int) -> np.ndarray | None: + """ + 多手框融合为一个大框(x1,y1,x2,y2),用于段内时序平滑与短时补帧。 + """ + if not hands: + return None + arr = np.asarray(hands, dtype=np.float32) + if arr.ndim != 2 or arr.shape[1] < 4: + return None + x1 = float(np.min(arr[:, 0])) + y1 = float(np.min(arr[:, 1])) + x2 = float(np.max(arr[:, 2])) + y2 = float(np.max(arr[:, 3])) + fused = np.array([x1, y1, x2, y2], dtype=np.float32) + return _clip_xyxy(fused, img_w, img_h) + + +def _crop_two_hands_union( + fr: np.ndarray, + hands: list[list[float]], + pad_ratio: float, +) -> np.ndarray | None: + """至少两只手时取最大两只 union 并 pad;否则 None(跳过该帧)。""" + if len(hands) < 2: + return None + img_h, img_w = fr.shape[:2] + h1, h2 = two_largest_hands(hands) + uni = union_xyxy(h1, h2) + x1, y1, x2, y2 = pad_box(uni, img_w, img_h, pad_ratio) + return fr[y1:y2, x1:x2] + + +class FineGrainedClassifier: + """好坏帧 / 耗材 / 撕膜:薄封装 Ultralytics cls.predict,便于统一 half/device。""" + + def __init__( + self, + gb: YOLO, + cls_m: YOLO, + tear_m: YOLO, + *, + gb_names: dict, + cls_names: dict, + tear_names: dict, + imgsz_cls: int, + predict_kw: dict[str, Any], + ) -> None: + self.gb = gb + self.cls_m = cls_m + self.tear_m = tear_m + self.gb_names = gb_names + self.cls_names = cls_names + self.tear_names = tear_names + self.imgsz_cls = imgsz_cls + self.predict_kw = predict_kw + + def passes_good( + self, + crop: np.ndarray, + good_top1_conf_threshold: float, + ) -> bool: + return passes_good_gate_top1_conf_kw( + self.gb, + crop, + self.gb_names, + self.imgsz_cls, + good_top1_conf_threshold, + self.predict_kw, + ) + + def haocai_label_top_prob( + self, + crop: np.ndarray, + n_cls: int, + allowed_class_idx: frozenset[int] | None, + haocai_min_conf: float, + ) -> tuple[str, float] | None: + if crop.size == 0: + return None + r = self.cls_m.predict(crop, imgsz=self.imgsz_cls, verbose=False, **self.predict_kw)[0] + pr = r.probs + if pr is None or pr.data is None: + return None + v = pr.data.detach().float().cpu().numpy().astype(np.float64).ravel() + if v.size < n_cls: + v = np.resize(v, n_cls) + v = v[:n_cls].copy() + s = float(np.sum(v)) + if s <= 1e-12: + return None + if abs(s - 1.0) > 0.08: + v = v - float(np.max(v)) + e = np.exp(np.clip(v, -40.0, 40.0)) + vec_raw = e / float(np.sum(e)) + else: + vec_raw = v / s + if allowed_class_idx is not None: + vec = mask_probs_whitelist(vec_raw, allowed_class_idx, n_cls) + else: + vec = vec_raw + if vec is None: + return None + top_prob = float(np.max(vec)) + if top_prob <= haocai_min_conf: + return None + label = int(np.argmax(vec)) + return _cls_name(self.cls_names, label), top_prob + + def tear_label_top_conf(self, crop: np.ndarray) -> tuple[str, float] | None: + if crop.size == 0: + return None + r = self.tear_m.predict(crop, imgsz=self.imgsz_cls, verbose=False, **self.predict_kw)[0] + pr = r.probs + if pr is None: + return None + tid = int(pr.top1) + conf = _float_top1conf(pr) + return str(self.tear_names.get(tid, str(tid))).strip(), conf + + +def _maybe_cuda_empty_cache(every: int, frame_idx: int) -> None: + if every <= 0: + return + if frame_idx % every != 0: + return + gc.collect() + try: + import torch + + if torch.cuda.is_available(): + torch.cuda.empty_cache() + except ImportError: + pass + + +def process_segment_multi_hand_tear( + cap: cv2.VideoCapture, + det: YOLO, + fg: FineGrainedClassifier, + grouper: HandRoiGrouper, + *, + start_sec: float, + end_sec: float, + seek_margin_sec: float, + det_conf: float, + imgsz_det: int, + frame_stride: int, + good_top1_conf_threshold: float, + haocai_min_conf: float, + cls_names: dict, + allowed_class_idx: frozenset[int] | None, + tracking_alpha: float = 0.6, + tracking_max_lost_frames: int = 0, + empty_cache_every: int = 0, +) -> dict[str, Any]: + """ + 与 process_segment_e2e 相同 seek 策略;每帧最多两 ROI,逐 ROI做好帧+耗材+撕膜门控。 + """ + probe_from = float(max(0.0, start_sec - seek_margin_sec)) + cap.set(cv2.CAP_PROP_POS_MSEC, probe_from * 1000.0) + synced_frame: np.ndarray | None = None + synced_t: float | None = None + tol = 0.04 + while True: + ok0, grab = cap.read() + if not ok0 or grab is None: + synced_frame, synced_t = None, None + break + t0 = float(cap.get(cv2.CAP_PROP_POS_MSEC)) / 1000.0 + if t0 + tol >= start_sec: + synced_frame, synced_t = grab, t0 + break + + n_cls_key_max = max(int(k) for k in cls_names.keys()) + n_cls = n_cls_key_max + 1 + + n_hand_frames = 0 + n_gate_pass = 0 + # pairs_h 存放段内耗材候选 (类名, 置信度),后续会做“按置信度加权”的段内投票聚合。 + # 仅记录通过门控的样本;失败分支仍按是否为空来判定,不改变既有逻辑。 + pairs_h: list[tuple[str, float]] = [] + pairs_t: list[tuple[str, float]] = [] + frames_read_in_segment = 0 + + def one_frame(fr: np.ndarray) -> None: + nonlocal frames_read_in_segment, n_hand_frames, n_gate_pass, pairs_h, pairs_t + frames_read_in_segment += 1 + idx_local = frames_read_in_segment + _maybe_cuda_empty_cache(empty_cache_every, idx_local) + + if frame_stride > 1 and (frames_read_in_segment - 1) % frame_stride != 0: + return + + r0 = det.predict(fr, conf=det_conf, imgsz=imgsz_det, verbose=False, **fg.predict_kw)[0] + hands = collect_hand_boxes(det, r0.boxes) if r0.boxes else [] + if len(hands) < 2: + return + + n_hand_frames += 1 + rois = grouper.frame_to_rois(fr, hands) + if not rois: + return + for crop in rois: + if not fg.passes_good(crop, good_top1_conf_threshold): + del crop + continue + n_gate_pass += 1 + hc = fg.haocai_label_top_prob( + crop, n_cls, allowed_class_idx, haocai_min_conf + ) + tr = fg.tear_label_top_conf(crop) + del crop + if hc is not None: + pairs_h.append(hc) + if tr is not None: + pairs_t.append(tr) + + if synced_frame is not None and synced_t is not None and synced_t <= end_sec + 0.08: + one_frame(synced_frame) + del synced_frame + synced_frame = None + + while True: + ok, frame = cap.read() + if not ok or frame is None: + break + t = float(cap.get(cv2.CAP_PROP_POS_MSEC)) / 1000.0 + if t > end_sec + 0.08: + del frame + break + if t + 1e-6 < start_sec: + del frame + continue + one_frame(frame) + del frame + + gc.collect() + if empty_cache_every > 0: + try: + import torch + + if torch.cuda.is_available(): + torch.cuda.empty_cache() + except ImportError: + pass + + if n_hand_frames == 0: + return {"ok": False, "reason": "(段内未检测到手部)", "pairs_h": [], "pairs_t": [], "n_gate_pass": 0} + if not pairs_h: + return { + "ok": False, + "reason": REASON_NO_VALID_HAOCAI_FRAMES, + "pairs_h": [], + "pairs_t": pairs_t, + "n_hand_frames": n_hand_frames, + "n_gate_pass": n_gate_pass, + } + + n1, c1 = aggregate_top3_votes(pairs_h) + t1, t2 = aggregate_top2_votes(pairs_t) + return { + "ok": True, + "top_names": n1, + "top_confs": c1, + "tear_top_names": t1, + "tear_top_confs": t2, + "pairs_h": pairs_h, + "pairs_t": pairs_t, + "n_hand_frames": n_hand_frames, + "n_gate_pass": n_gate_pass, + "n_valid_haocai": len(pairs_h), + } + + +def process_segment_multi_hand_tear_with_gate_retries( + cap: cv2.VideoCapture, + det: YOLO, + fg: FineGrainedClassifier, + grouper: HandRoiGrouper, + *, + start_sec: float, + end_sec: float, + seek_margin_sec: float, + det_conf: float, + imgsz_det: int, + frame_stride: int, + good_top1_conf_threshold: float, + good_top1_retry_threshold: float, + haocai_min_conf: float, + haocai_min_conf_retry: float | None, + cls_names: dict, + allowed_class_idx: frozenset[int] | None, + empty_cache_every: int = 0, + log_fn: Callable[[str], None] | None = None, + log_prefix: str | None = None, + tracking_alpha: float = 0.6, + tracking_max_lost_frames: int = 0, +) -> dict[str, Any]: + """ + 先跑段内推理;若仍为「无有效耗材帧」则: + 1) 可放宽好帧 top1 阈值(good_top1_retry_threshold)再试; + 2) 再放宽耗材置信阈值(haocai_min_conf_retry)再试。 + log_fn / log_prefix:重试时各打一行(如 log_prefix='段落 rank=3: ')。 + """ + + def run(good_thr: float, haocai_thr: float) -> dict[str, Any]: + return process_segment_multi_hand_tear( + cap, + det, + fg, + grouper, + start_sec=start_sec, + end_sec=end_sec, + seek_margin_sec=seek_margin_sec, + det_conf=det_conf, + imgsz_det=imgsz_det, + frame_stride=frame_stride, + tracking_alpha=tracking_alpha, + tracking_max_lost_frames=tracking_max_lost_frames, + good_top1_conf_threshold=good_thr, + haocai_min_conf=haocai_thr, + cls_names=cls_names, + allowed_class_idx=allowed_class_idx, + empty_cache_every=empty_cache_every, + ) + + good_thr = float(good_top1_conf_threshold) + haocai_thr = float(haocai_min_conf) + info = run(good_thr, haocai_thr) + + rgb = float(good_top1_retry_threshold) + if ( + not info.get("ok") + and str(info.get("reason", "")) == REASON_NO_VALID_HAOCAI_FRAMES + and rgb > 0 + and rgb < good_thr - 1e-12 + ): + if log_fn and log_prefix: + log_fn( + f"{log_prefix}以 good_top1_conf_threshold={rgb} 重试本段(无有效耗材帧)…" + ) + good_thr = rgb + info = run(good_thr, haocai_thr) + + if ( + haocai_min_conf_retry is not None + and haocai_min_conf_retry > 1e-12 + and haocai_min_conf_retry < haocai_thr - 1e-12 + ): + if ( + not info.get("ok") + and str(info.get("reason", "")) == REASON_NO_VALID_HAOCAI_FRAMES + ): + h2 = float(haocai_min_conf_retry) + if log_fn and log_prefix: + log_fn( + f"{log_prefix}以 haocai_min_conf={h2} 重试本段(无有效耗材帧)…" + ) + info = run(good_thr, h2) + + return info + + +class HaocaiOnlyClassifier: + """耗材分类(推流/TSV 离线);可选好坏帧门控,无撕膜。""" + + def __init__( + self, + cls_m: YOLO, + *, + cls_names: dict, + imgsz_cls: int, + predict_kw: dict[str, Any], + gb: YOLO | None = None, + gb_names: dict | None = None, + ) -> None: + self.cls_m = cls_m + self.cls_names = cls_names + self.imgsz_cls = imgsz_cls + self.predict_kw = predict_kw + self.gb = gb + self.gb_names = gb_names or {} + + @property + def use_good_gate(self) -> bool: + return self.gb is not None + + def passes_good(self, crop: np.ndarray, good_top1_conf_threshold: float) -> bool: + if self.gb is None: + return True + return passes_good_gate_top1_conf_kw( + self.gb, + crop, + self.gb_names, + self.imgsz_cls, + good_top1_conf_threshold, + self.predict_kw, + ) + + def haocai_label_top_prob( + self, + crop: np.ndarray, + n_cls: int, + allowed_class_idx: frozenset[int] | None, + haocai_min_conf: float, + ) -> tuple[str, float] | None: + if crop.size == 0: + return None + r = self.cls_m.predict(crop, imgsz=self.imgsz_cls, verbose=False, **self.predict_kw)[0] + pr = r.probs + if pr is None or pr.data is None: + return None + v = pr.data.detach().float().cpu().numpy().astype(np.float64).ravel() + if v.size < n_cls: + v = np.resize(v, n_cls) + v = v[:n_cls].copy() + s = float(np.sum(v)) + if s <= 1e-12: + return None + if abs(s - 1.0) > 0.08: + v = v - float(np.max(v)) + e = np.exp(np.clip(v, -40.0, 40.0)) + vec_raw = e / float(np.sum(e)) + else: + vec_raw = v / s + if allowed_class_idx is not None: + vec = mask_probs_whitelist(vec_raw, allowed_class_idx, n_cls) + else: + vec = vec_raw + if vec is None: + return None + top_prob = float(np.max(vec)) + if top_prob <= haocai_min_conf: + return None + label = int(np.argmax(vec)) + return _cls_name(self.cls_names, label), top_prob + + +def _haocai_fail_reason(hc: HaocaiOnlyClassifier) -> str: + if hc.use_good_gate: + return REASON_NO_VALID_HAOCAI_FRAMES + return REASON_NO_VALID_HAOCAI_FRAMES_STREAM + + +def process_segment_haocai_from_frames( + frames: list[tuple[float, np.ndarray]], + det: YOLO, + hc: HaocaiOnlyClassifier, + *, + start_sec: float, + end_sec: float, + det_conf: float, + pad_ratio: float, + imgsz_det: int, + frame_stride: int, + haocai_min_conf: float, + good_top1_conf_threshold: float = 0.9, + cls_names: dict, + allowed_class_idx: frozenset[int] | None, + predict_kw: dict[str, Any] | None = None, +) -> dict[str, Any]: + """ + 对内存中的帧列表做耗材识别(手 → 可选好帧 → haocai),不含撕膜。 + frames: [(t_sec, bgr), ...] 已按时间过滤到 [start_sec, end_sec]。 + """ + if not frames: + return {"ok": False, "reason": "(段内无帧)", "pairs": [], "n_gate_pass": 0} + + pred_kw = dict(predict_kw or {}) + n_cls_key_max = max(int(k) for k in cls_names.keys()) + n_cls = n_cls_key_max + 1 + + n_hand_frames = 0 + n_gate_pass = 0 + pairs: list[tuple[str, float]] = [] + frames_in_segment = 0 + + def one_frame(fr: np.ndarray) -> None: + nonlocal frames_in_segment, n_hand_frames, n_gate_pass, pairs + frames_in_segment += 1 + if frame_stride > 1 and (frames_in_segment - 1) % frame_stride != 0: + return + + r0 = det.predict(fr, conf=det_conf, imgsz=imgsz_det, verbose=False, **pred_kw)[0] + hands = collect_hand_boxes(det, r0.boxes) if r0.boxes else [] + crop = _crop_two_hands_union(fr, hands, pad_ratio) + if crop is None: + return + + n_hand_frames += 1 + if hc.use_good_gate and not hc.passes_good(crop, good_top1_conf_threshold): + del crop + return + n_gate_pass += 1 + label_prob = hc.haocai_label_top_prob( + crop, n_cls, allowed_class_idx, haocai_min_conf + ) + del crop + if label_prob is not None: + pairs.append(label_prob) + + lo = float(start_sec) + hi = float(end_sec) + for t, fr in frames: + if t + 1e-6 < lo: + continue + if t > hi + 0.08: + break + one_frame(fr) + + if n_hand_frames == 0: + return {"ok": False, "reason": "(段内未检测到手部)", "pairs": [], "n_gate_pass": 0} + if not pairs: + return { + "ok": False, + "reason": _haocai_fail_reason(hc), + "pairs": [], + "n_hand_frames": n_hand_frames, + "n_gate_pass": n_gate_pass, + } + + n1, c1 = aggregate_top3_votes(pairs) + return { + "ok": True, + "top_names": n1, + "top_confs": c1, + "pairs": pairs, + "n_hand_frames": n_hand_frames, + "n_gate_pass": n_gate_pass, + "n_valid_haocai": len(pairs), + } + + +def process_segment_haocai_from_cap( + cap: cv2.VideoCapture, + det: YOLO, + hc: HaocaiOnlyClassifier, + *, + start_sec: float, + end_sec: float, + seek_margin_sec: float, + det_conf: float, + pad_ratio: float, + imgsz_det: int, + frame_stride: int, + haocai_min_conf: float, + good_top1_conf_threshold: float = 0.9, + cls_names: dict, + allowed_class_idx: frozenset[int] | None, + predict_kw: dict[str, Any] | None = None, +) -> dict[str, Any]: + """从视频逐帧解码做耗材识别(手 → 可选好帧 → haocai),不含撕膜。""" + probe_from = float(max(0.0, start_sec - seek_margin_sec)) + cap.set(cv2.CAP_PROP_POS_MSEC, probe_from * 1000.0) + synced_frame: np.ndarray | None = None + synced_t: float | None = None + tol = 0.04 + while True: + ok0, grab = cap.read() + if not ok0 or grab is None: + synced_frame, synced_t = None, None + break + t0 = float(cap.get(cv2.CAP_PROP_POS_MSEC)) / 1000.0 + if t0 + tol >= start_sec: + synced_frame, synced_t = grab, t0 + break + + pred_kw = dict(predict_kw or {}) + n_cls_key_max = max(int(k) for k in cls_names.keys()) + n_cls = n_cls_key_max + 1 + + n_hand_frames = 0 + n_gate_pass = 0 + pairs: list[tuple[str, float]] = [] + frames_in_segment = 0 + + def one_frame(fr: np.ndarray) -> None: + nonlocal frames_in_segment, n_hand_frames, n_gate_pass, pairs + frames_in_segment += 1 + if frame_stride > 1 and (frames_in_segment - 1) % frame_stride != 0: + return + + img_h, img_w = fr.shape[:2] + r0 = det.predict(fr, conf=det_conf, imgsz=imgsz_det, verbose=False, **pred_kw)[0] + hands = collect_hand_boxes(det, r0.boxes) if r0.boxes else [] + crop = _crop_two_hands_union(fr, hands, pad_ratio) + if crop is None: + return + + n_hand_frames += 1 + if hc.use_good_gate and not hc.passes_good(crop, good_top1_conf_threshold): + del crop + return + n_gate_pass += 1 + label_prob = hc.haocai_label_top_prob( + crop, n_cls, allowed_class_idx, haocai_min_conf + ) + del crop + if label_prob is not None: + pairs.append(label_prob) + + lo = float(start_sec) + hi = float(end_sec) + + if synced_frame is not None and synced_t is not None and synced_t <= hi + 0.08: + if synced_t + 1e-6 >= lo: + one_frame(synced_frame) + + while True: + ok, fr = cap.read() + if not ok or fr is None: + break + t = float(cap.get(cv2.CAP_PROP_POS_MSEC)) / 1000.0 + if t > hi + 0.08: + break + if t + 1e-6 < lo: + continue + one_frame(fr) + + if n_hand_frames == 0: + return {"ok": False, "reason": "(段内未检测到手部)", "pairs": [], "n_gate_pass": 0} + if not pairs: + return { + "ok": False, + "reason": _haocai_fail_reason(hc), + "pairs": [], + "n_hand_frames": n_hand_frames, + "n_gate_pass": n_gate_pass, + } + + n1, c1 = aggregate_top3_votes(pairs) + return { + "ok": True, + "top_names": n1, + "top_confs": c1, + "pairs": pairs, + "n_hand_frames": n_hand_frames, + "n_gate_pass": n_gate_pass, + "n_valid_haocai": len(pairs), + } + + +def _apply_haocai_gate_retries( + run: Callable[[float, float], dict[str, Any]], + *, + hc: HaocaiOnlyClassifier, + good_top1_conf_threshold: float, + good_top1_retry_threshold: float, + haocai_min_conf: float, + haocai_min_conf_retry: float | None, + log_fn: Callable[[str], None] | None = None, + log_prefix: str | None = None, +) -> dict[str, Any]: + fail_reason = _haocai_fail_reason(hc) + good_thr = float(good_top1_conf_threshold) + haocai_thr = float(haocai_min_conf) + info = run(good_thr, haocai_thr) + + if hc.use_good_gate: + rgb = float(good_top1_retry_threshold) + if ( + not info.get("ok") + and str(info.get("reason", "")) == fail_reason + and rgb > 0 + and rgb < good_thr - 1e-12 + ): + if log_fn and log_prefix: + log_fn( + f"{log_prefix}以 good_top1_conf_threshold={rgb} 重试本段(无有效耗材帧)…" + ) + good_thr = rgb + info = run(good_thr, haocai_thr) + + if ( + haocai_min_conf_retry is not None + and haocai_min_conf_retry > 1e-12 + and haocai_min_conf_retry < haocai_thr - 1e-12 + ): + if not info.get("ok") and str(info.get("reason", "")) == fail_reason: + h2 = float(haocai_min_conf_retry) + if log_fn and log_prefix: + log_fn( + f"{log_prefix}以 haocai_min_conf={h2} 重试本段(无有效耗材帧)…" + ) + info = run(good_thr, h2) + + return info + + +def process_segment_haocai_from_frames_with_gate_retries( + frames: list[tuple[float, np.ndarray]], + det: YOLO, + hc: HaocaiOnlyClassifier, + *, + start_sec: float, + end_sec: float, + det_conf: float, + pad_ratio: float, + imgsz_det: int, + frame_stride: int, + haocai_min_conf: float, + haocai_min_conf_retry: float | None, + good_top1_conf_threshold: float = 0.9, + good_top1_retry_threshold: float = 0.5, + cls_names: dict, + allowed_class_idx: frozenset[int] | None, + predict_kw: dict[str, Any] | None = None, + log_fn: Callable[[str], None] | None = None, + log_prefix: str | None = None, +) -> dict[str, Any]: + """推流帧列表:好帧门控 + 耗材阈值;失败时先放宽好帧再放宽耗材。""" + + def run(good_thr: float, haocai_thr: float) -> dict[str, Any]: + return process_segment_haocai_from_frames( + frames, + det, + hc, + start_sec=start_sec, + end_sec=end_sec, + det_conf=det_conf, + pad_ratio=pad_ratio, + imgsz_det=imgsz_det, + frame_stride=frame_stride, + haocai_min_conf=haocai_thr, + good_top1_conf_threshold=good_thr, + cls_names=cls_names, + allowed_class_idx=allowed_class_idx, + predict_kw=predict_kw, + ) + + return _apply_haocai_gate_retries( + run, + hc=hc, + good_top1_conf_threshold=good_top1_conf_threshold, + good_top1_retry_threshold=good_top1_retry_threshold, + haocai_min_conf=haocai_min_conf, + haocai_min_conf_retry=haocai_min_conf_retry, + log_fn=log_fn, + log_prefix=log_prefix, + ) + + +def process_segment_haocai_from_cap_with_gate_retries( + cap: cv2.VideoCapture, + det: YOLO, + hc: HaocaiOnlyClassifier, + *, + start_sec: float, + end_sec: float, + seek_margin_sec: float, + det_conf: float, + pad_ratio: float, + imgsz_det: int, + frame_stride: int, + haocai_min_conf: float, + haocai_min_conf_retry: float | None, + good_top1_conf_threshold: float = 0.9, + good_top1_retry_threshold: float = 0.5, + cls_names: dict, + allowed_class_idx: frozenset[int] | None, + predict_kw: dict[str, Any] | None = None, + log_fn: Callable[[str], None] | None = None, + log_prefix: str | None = None, +) -> dict[str, Any]: + """离线视频逐帧解码:手 → 可选好帧 → haocai,含门控重试。""" + + def run(good_thr: float, haocai_thr: float) -> dict[str, Any]: + return process_segment_haocai_from_cap( + cap, + det, + hc, + start_sec=start_sec, + end_sec=end_sec, + seek_margin_sec=seek_margin_sec, + det_conf=det_conf, + pad_ratio=pad_ratio, + imgsz_det=imgsz_det, + frame_stride=frame_stride, + haocai_min_conf=haocai_thr, + good_top1_conf_threshold=good_thr, + cls_names=cls_names, + allowed_class_idx=allowed_class_idx, + predict_kw=predict_kw, + ) + + return _apply_haocai_gate_retries( + run, + hc=hc, + good_top1_conf_threshold=good_top1_conf_threshold, + good_top1_retry_threshold=good_top1_retry_threshold, + haocai_min_conf=haocai_min_conf, + haocai_min_conf_retry=haocai_min_conf_retry, + log_fn=log_fn, + log_prefix=log_prefix, + ) diff --git a/code/video_clip_cls/scripts/pipeline/tear_gate_merge.py b/code/video_clip_cls/scripts/pipeline/tear_gate_merge.py new file mode 100644 index 0000000..ab47b5c --- /dev/null +++ b/code/video_clip_cls/scripts/pipeline/tear_gate_merge.py @@ -0,0 +1,350 @@ +""" +相邻成功行若 top1 相同:在下一段开头 head_sec 内统计「撕膜」高置信帧数; +>= tear_min_frames 视为两次耗材(不合并),否则合并为一段。 + +main_pipeline 内:默认在门控窗口内 **手检 → 双手 ROI(与 Phase2 相同合并策略)→ 撕膜分类**; +若未传入 det/grouper 则退化为 **整帧** 撕膜(与旧 pack merge 脚本一致)。 +""" + +from __future__ import annotations + +from dataclasses import dataclass, replace +from typing import Any + +import cv2 +from ultralytics import YOLO + +from .hand_roi_merge import HandRoiGrouper + +try: + from run_segments_consumable_vote import collect_hand_boxes +except ImportError: # 脚本独立运行时无 path + collect_hand_boxes = None # type: ignore[misc, assignment] + + +@dataclass +class E2eRow: + rank: int + start_sec: float + end_sec: float + id1: str + n1: str + c1: str + id2: str + n2: str + c2: str + id3: str + n3: str + c3: str + + def is_success(self) -> bool: + if not self.n1.strip(): + return False + try: + float(self.c1.strip()) + return True + except ValueError: + return False + + def to_line12(self, rank: int) -> str: + r = replace(self, rank=rank) + return "\t".join( + [ + str(r.rank), + f"{r.start_sec:.6f}", + f"{r.end_sec:.6f}", + r.id1, + r.n1, + r.c1, + r.id2, + r.n2, + r.c2, + r.id3, + r.n3, + r.c3, + ] + ) + + +def parse_e2e_rows_from_body_lines(lines: list[str]) -> list[E2eRow]: + rows: list[E2eRow] = [] + for i, line in enumerate(lines, start=2): + if not line.strip(): + continue + parts_line = line.split("\t") + while len(parts_line) < 12: + parts_line.append("") + parts_line = parts_line[:12] + try: + rank = int(parts_line[0]) + s = float(parts_line[1]) + e = float(parts_line[2]) + except ValueError as ex: + raise ValueError(f"第{i}行解析失败: {line[:80]}...") from ex + rows.append( + E2eRow( + rank=rank, + start_sec=s, + end_sec=e, + id1=parts_line[3], + n1=parts_line[4], + c1=parts_line[5], + id2=parts_line[6], + n2=parts_line[7], + c2=parts_line[8], + id3=parts_line[9], + n3=parts_line[10], + c3=parts_line[11], + ) + ) + return rows + + +def tear_class_index(model: YOLO, class_name: str) -> int: + names: dict[int, str] = model.names # type: ignore[assignment] + for k, v in names.items(): + if str(v).strip() == class_name: + return int(k) + lower = {str(v).strip().lower(): int(k) for k, v in names.items()} + if lower.get(class_name.lower()) is not None: + return lower[class_name.lower()] + raise ValueError(f"模型中无类别「{class_name}」,names={names}") + + +def count_tearing_frames( + cap: cv2.VideoCapture, + window_start: float, + window_end: float, + yolo: YOLO, + tear_cls: int, + tear_prob: float, + imgsz: int, + *, + predict_kw: dict[str, Any] | None = None, + det: YOLO | None = None, + grouper: HandRoiGrouper | None = None, + imgsz_det: int = 640, + det_conf: float = 0.5, +) -> int: + """[window_start, window_end) 内逐帧统计:P(tear_cls) >= tear_prob 的帧数。 + + 若提供 det+grouper:每帧先检测手,再对每个 ROI 跑撕膜;**任一 ROI** 达到阈值则该帧计 1。 + 否则对 **整帧** 跑一次撕膜(与旧 merge_e2e 一致)。 + """ + pred_tear: dict[str, Any] = {"imgsz": imgsz, "verbose": False} + pred_det: dict[str, Any] = {"imgsz": imgsz_det, "verbose": False} + if predict_kw: + pred_tear.update(predict_kw) + pred_det.update(predict_kw) + use_hand = ( + det is not None + and grouper is not None + and collect_hand_boxes is not None + ) + cap.set(cv2.CAP_PROP_POS_MSEC, max(0.0, window_start) * 1000.0) + cnt = 0 + while True: + ok, frame = cap.read() + if not ok or frame is None: + break + t = float(cap.get(cv2.CAP_PROP_POS_MSEC)) / 1000.0 + if t >= window_end - 1e-9: + break + if t + 1e-6 < window_start: + continue + if use_hand: + r0 = det.predict( # type: ignore[union-attr] + frame, conf=det_conf, **pred_det + )[0] + hands = collect_hand_boxes(det, r0.boxes) if r0.boxes else [] # type: ignore[arg-type] + if not hands: + continue + rois = grouper.frame_to_rois(frame, hands) # type: ignore[union-attr] + frame_hit = False + for crop in rois: + if crop is None or crop.size == 0: + continue + res = yolo.predict(crop, **pred_tear)[0] + if res.probs is None: + continue + prob_tear = float(res.probs.data[tear_cls].item()) + if prob_tear >= tear_prob - 1e-12: + frame_hit = True + break + if frame_hit: + cnt += 1 + else: + res = yolo.predict(frame, **pred_tear)[0] + if res.probs is None: + continue + prob_tear = float(res.probs.data[tear_cls].item()) + if prob_tear >= tear_prob - 1e-12: + cnt += 1 + return cnt + + +def merge_two_segments(a: E2eRow, b: E2eRow) -> E2eRow: + n1 = a.n1.strip() + fc1 = max(float(a.c1.strip()), float(b.c1.strip())) + c1s = f"{fc1:.6f}" + + id1 = a.id1.strip() or b.id1.strip() + + top1_name = n1 + cands: list[tuple[str, float, str]] = [] + for row in (a, b): + for nm, cf, pid in ( + (row.n2.strip(), row.c2.strip(), row.id2.strip()), + (row.n3.strip(), row.c3.strip(), row.id3.strip()), + ): + if not nm or not cf: + continue + try: + cff = float(cf) + except ValueError: + continue + if nm == top1_name: + continue + cands.append((nm, cff, pid)) + + cands.sort(key=lambda x: -x[1]) + seen: set[str] = set() + picked: list[tuple[str, float, str]] = [] + for nm, cff, pid in cands: + if nm in seen: + continue + seen.add(nm) + picked.append((nm, cff, pid)) + if len(picked) >= 2: + break + + id2 = n2 = c2 = id3 = n3 = c3 = "" + if len(picked) >= 1: + n2, c2f, id2 = picked[0] + c2 = f"{c2f:.6f}" + if len(picked) >= 2: + n3, c3f, id3 = picked[1] + c3 = f"{c3f:.6f}" + + return E2eRow( + rank=0, + start_sec=a.start_sec, + end_sec=b.end_sec, + id1=id1, + n1=n1, + c1=c1s, + id2=id2, + n2=n2, + c2=c2, + id3=id3, + n3=n3, + c3=c3, + ) + + +def one_pass_merge( + rows: list[E2eRow], + cap: cv2.VideoCapture, + yolo: YOLO, + tear_cls: int, + *, + head_sec: float, + tear_prob: float, + tear_min_frames: int, + imgsz: int, + predict_kw: dict[str, Any] | None, + verbose: bool, + det: YOLO | None = None, + grouper: HandRoiGrouper | None = None, + imgsz_det: int = 640, + det_conf: float = 0.5, +) -> tuple[list[E2eRow], bool]: + out: list[E2eRow] = [] + i = 0 + changed = False + while i < len(rows): + a = rows[i] + if i + 1 >= len(rows): + out.append(a) + break + b = rows[i + 1] + same_top1 = ( + a.is_success() + and b.is_success() + and a.n1.strip() == b.n1.strip() + ) + if same_top1: + w0 = b.start_sec + w1 = min(b.start_sec + head_sec, b.end_sec) + n_high = count_tearing_frames( + cap, + w0, + w1, + yolo, + tear_cls, + tear_prob, + imgsz, + predict_kw=predict_kw, + det=det, + grouper=grouper, + imgsz_det=imgsz_det, + det_conf=det_conf, + ) + if verbose: + mode = "hand_roi" if det is not None and grouper is not None else "full_frame" + print( + f"[tear_gate:{mode}] 窗口 [{w0:.3f},{w1:.3f})s(下一段起点起 head_sec={head_sec:g}s,截断至本段 end) " + f"P(tearing)>={tear_prob} 计数={n_high} (保留两段需>={tear_min_frames})", + flush=True, + ) + if n_high >= tear_min_frames: + out.append(a) + out.append(b) + else: + out.append(merge_two_segments(a, b)) + changed = True + i += 2 + else: + out.append(a) + i += 1 + return out, changed + + +def merge_all( + rows: list[E2eRow], + cap: cv2.VideoCapture, + yolo: YOLO, + tear_cls: int, + *, + head_sec: float, + tear_prob: float, + tear_min_frames: int, + imgsz: int, + predict_kw: dict[str, Any] | None = None, + verbose: bool = False, + det: YOLO | None = None, + grouper: HandRoiGrouper | None = None, + imgsz_det: int = 640, + det_conf: float = 0.5, +) -> list[E2eRow]: + cur = rows + while True: + cur, changed = one_pass_merge( + cur, + cap, + yolo, + tear_cls, + head_sec=head_sec, + tear_prob=tear_prob, + tear_min_frames=tear_min_frames, + imgsz=imgsz, + predict_kw=predict_kw, + verbose=verbose, + det=det, + grouper=grouper, + imgsz_det=imgsz_det, + det_conf=det_conf, + ) + if not changed: + break + return cur diff --git a/code/video_clip_cls/scripts/run_haocai_actionformer_consumables_e2e.py b/code/video_clip_cls/scripts/run_haocai_actionformer_consumables_e2e.py new file mode 100644 index 0000000..95a80d5 --- /dev/null +++ b/code/video_clip_cls/scripts/run_haocai_actionformer_consumables_e2e.py @@ -0,0 +1,839 @@ +#!/usr/bin/env python3 +""" +单视频端到端:VideoSwin 特征 → ActionFormer 划段 → 分数引导边界切割+score 过滤 → +手检 + 好帧(>阈值) + 白名单裁剪 + 耗材(softmax max>阈值) → 段内在有效帧上对类名计数,取 **票数前三**, +再以这三类出现次数 **归一化** 为 top1~3 置信度(三项和为 1;不足三类则空位补 0)。 +商品 id 来自 Excel「产品编码」。 +""" + +from __future__ import annotations + +import argparse +import json +import os +import pickle +import shutil +import subprocess +import sys +import tempfile +import time +from collections import defaultdict +from pathlib import Path +from typing import Any + +import cv2 +import numpy as np +from ultralytics import YOLO + +for _repo in Path(__file__).resolve().parents: + if (_repo / "repo_root.py").is_file() and (_repo / "dataset.py").is_file(): + if str(_repo) not in sys.path: + sys.path.insert(0, str(_repo)) + break +else: + raise RuntimeError("未定位到仓库 code/ 根目录") + +from repo_root import CODE_ROOT # noqa: E402 + +# 单文件夹打包:由 run.py 设置 HAOCAI_E2E_BUNDLE=解压根目录,权重/Excel 走包内路径,ActionFormer 在 /actionformer_release +_BUNDLE_ENV = os.environ.get("HAOCAI_E2E_BUNDLE", "").strip() +_BUNDLE_ROOT: Path | None = Path(_BUNDLE_ENV).resolve() if _BUNDLE_ENV else None + +if _BUNDLE_ROOT is not None: + _DEFAULT_EXCEL = _BUNDLE_ROOT / "data" / "视频中的商品信息表.xlsx" + _DEFAULT_AF_CKPT = _BUNDLE_ROOT / "models" / "actionformer_epoch_045.pth.tar" + _DEFAULT_HAND = _BUNDLE_ROOT / "models" / "hand_detect.pt" + _DEFAULT_GOODBAD = _BUNDLE_ROOT / "models" / "goodbad_frame.pt" + _DEFAULT_HAOCAI = _BUNDLE_ROOT / "models" / "haocai_classify.pt" +else: + _DEFAULT_EXCEL = CODE_ROOT.parent / "data/haocai/视频中的商品信息表.xlsx" + _DEFAULT_AF_CKPT = ( + CODE_ROOT + / "video_clip_cls/runs/actionformer_ckpt/haocai_main_perspective_videoswin_haocai_main_perspective_videoswin/epoch_045.pth.tar" + ) + _DEFAULT_HAND = CODE_ROOT / "hand_detection/runs/hand_det_y11s_multiframe-better/weights/best.pt" + _DEFAULT_GOODBAD = CODE_ROOT / "goodORbad_frame/runs/goodbad_frame_y11m_e50/weights/best.pt" + _DEFAULT_HAOCAI = ( + CODE_ROOT / "haocai_classify/runs/haocai_cls_41cls_goodframe_lastest-0.95/weights/best.pt" + ) + + +def _actionformer_release_dir() -> Path: + if _BUNDLE_ROOT is not None: + return _BUNDLE_ROOT / "actionformer_release" + return CODE_ROOT / "actionformer_release" + + +# 耗材投票:复用片段推理工具(infer_single_0506 为平铺目录,非 package) +_SYS_INSERT = str(CODE_ROOT / "video_clip_cls" / "infer_single_0506") +if _SYS_INSERT not in sys.path: + sys.path.insert(0, _SYS_INSERT) +import run_segments_consumable_vote as _rsv # noqa: E402 + +collect_hand_boxes = _rsv.collect_hand_boxes +haocai_softmax_probs = _rsv.haocai_softmax_probs +largest_hand = _rsv.largest_hand +pad_box = _rsv.pad_box_bottom_only +passes_good_gate_top1_conf = _rsv.passes_good_gate_top1_conf +_cls_name = _rsv._cls_name + +try: + import pandas as pd +except ImportError as e: + raise SystemExit("需要 pandas / openpyxl 读取 Excel:pip install pandas openpyxl") from e + +# ---------- 与训练/曾用 infer 对齐的 VideoSwin 参数 ---------- +FEAT_STRIDE_FRAMES = 8 +CLIP_LEN = 16 +FRAME_STRIDE = 1 +INPUT_DIM = 768 + + +def log(msg: str) -> None: + print(f"[{time.strftime('%H:%M:%S')}] {msg}", flush=True) + + +def load_product_code_map(excel_path: Path) -> dict[str, str]: + """商品名称 -> 产品编码。""" + df = pd.read_excel(excel_path, sheet_name=0, header=0) + col_code = "产品编码" + col_name = "商品名称" + if col_code not in df.columns or col_name not in df.columns: + df = pd.read_excel(excel_path, sheet_name=0, header=None) + col_code, col_name = df.columns[1], df.columns[2] + m: dict[str, str] = {} + for _, row in df.iterrows(): + name = row[col_name] + code = row[col_code] + if pd.isna(name) or str(name).strip() == "": + continue + name_s = str(name).strip() + if name_s not in m: + m[name_s] = str(code) if not pd.isna(code) else "" + return m + + +def mask_probs_whitelist( + probs: np.ndarray, + allowed: frozenset[int], + n_cls: int, +) -> np.ndarray | None: + v = np.asarray(probs, dtype=np.float64).ravel() + if v.size < n_cls: + v = np.resize(v, n_cls) + v = v[:n_cls].copy() + out = np.zeros_like(v) + for i in allowed: + if 0 <= i < n_cls: + out[i] = v[i] + s = float(np.sum(out)) + if s < 1e-12: + return None + return out / s + + +def allowed_indices_from_json_names( + allowed_names: list[str], cls_names: dict +) -> frozenset[int] | None: + """None 表示不按名称裁剪(全类)。""" + if not allowed_names: + return None + idx_by_name: dict[str, int] = {} + for k, v in cls_names.items(): + nm = str(v).strip() + if nm and nm not in idx_by_name: + idx_by_name[nm] = int(k) + out: set[int] = set() + for n in allowed_names: + ns = str(n).strip() + if ns in idx_by_name: + out.add(idx_by_name[ns]) + if not out: + log("警告: allowed_names 与模型类名无交集,白名单裁剪将不生效(等同全类)。") + return None + return frozenset(out) + + +def load_whitelist_json(path: Path) -> list[str]: + data = json.loads(path.read_text(encoding="utf-8")) + if isinstance(data, dict) and "allowed_names" in data: + raw = data["allowed_names"] + elif isinstance(data, list): + raw = data + else: + raise ValueError("白名单 JSON 应为 {\"allowed_names\": [...]} 或名称数组") + return [str(x).strip() for x in raw if str(x).strip()] + + +def run_feature_extraction( + *, + python_exe: str, + data_root: Path, + output_dir: Path, + meta_file: Path, + device: str, + batch_size: int, +) -> None: + ext_script = CODE_ROOT / "video_clip_cls" / "extract_videoswin_features.py" + cmd = [ + python_exe, + str(ext_script), + "--data-root", + str(data_root), + "--output-dir", + str(output_dir), + "--meta-file", + str(meta_file), + "--device", + device, + "--clip-len", + str(CLIP_LEN), + "--frame-stride", + str(FRAME_STRIDE), + "--feat-stride-frames", + str(FEAT_STRIDE_FRAMES), + "--batch-size", + str(batch_size), + "--max-videos", + "1", + ] + log("运行 VideoSwin 特征提取…") + env = os.environ.copy() + env.setdefault("OPENCV_FFMPEG_LOGLEVEL", "8") + r = subprocess.run(cmd, cwd=str(CODE_ROOT), env=env, check=False) + if r.returncode != 0: + raise RuntimeError(f"特征提取失败,exit={r.returncode}") + + +def write_infer_json( + out_path: Path, + video_id: str, + duration: float, + fps: float, +) -> None: + payload = { + "version": "haocai_infer_single_v1", + "taxonomy": [{"nodeName": "Action", "nodeId": 0}], + "database": { + video_id: { + "subset": "val", + "duration": float(duration), + "fps": float(fps), + "annotations": [ + {"segment": [0.0, min(1.0, duration)], "label": "Action", "label_id": 0} + ], + } + }, + } + out_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8") + + +def write_infer_yaml(out_path: Path, json_file: Path, feat_folder: Path) -> None: + jf = str(json_file.resolve()) + ff = str(feat_folder.resolve()) + nf = CLIP_LEN * FRAME_STRIDE + text = f"""dataset_name: thumos +devices: [0] +train_split: ['train'] +val_split: ['val'] + +dataset: + json_file: "{jf}" + feat_folder: "{ff}" + file_prefix: null + file_ext: ".npy" + num_classes: 1 + input_dim: {INPUT_DIM} + feat_stride: {FEAT_STRIDE_FRAMES} + num_frames: {nf} + default_fps: null + downsample_rate: 1 + trunc_thresh: 0.5 + crop_ratio: [0.9, 1.0] + max_seq_len: 2304 + force_upsampling: false + +model: + fpn_type: identity + max_buffer_len_factor: 6.0 + n_mha_win_size: 19 + n_head: 4 + embd_dim: 256 + fpn_dim: 256 + head_dim: 256 + use_abs_pe: false + +loader: + batch_size: 1 + num_workers: 2 + +test_cfg: + voting_thresh: 0.75 + pre_nms_topk: 4000 + max_seg_num: 600 + min_score: 0.001 + iou_threshold: 0.1 + duration_thresh: 0.05 + nms_method: soft + nms_sigma: 0.5 + multiclass_nms: true +""" + out_path.write_text(text, encoding="utf-8") + + +def run_actionformer_eval( + *, + python_exe: str, + yaml_path: Path, + ckpt_path: Path, + copy_pkl_to: Path, +) -> None: + af_dir = _actionformer_release_dir() + eval_py = af_dir / "eval.py" + cmd = [python_exe, str(eval_py), str(yaml_path), str(ckpt_path), "--saveonly"] + log("运行 ActionFormer eval(saveonly)…") + r = subprocess.run(cmd, cwd=str(af_dir), check=False) + if r.returncode != 0: + raise RuntimeError(f"ActionFormer eval 失败,exit={r.returncode}") + src_pkl = ckpt_path.parent / "eval_results.pkl" + if not src_pkl.is_file(): + raise FileNotFoundError(f"未找到输出: {src_pkl}") + shutil.copy2(src_pkl, copy_pkl_to) + log(f"已复制 eval_results.pkl -> {copy_pkl_to}") + + +def segments_overlap(s0: float, e0: float, s1: float, e1: float) -> bool: + inter = min(e0, e1) - max(s0, s1) + return inter > 1e-6 + + +def greedy_mutual_exclusive( + items: list[tuple[float, float, float]], +) -> list[tuple[float, float, float]]: + """items: (t_start, t_end, score)。按 score 降序;与已选段重叠则整段丢弃。""" + sorted_items = sorted(items, key=lambda x: -x[2]) + picked: list[tuple[float, float, float]] = [] + for s, e, sc in sorted_items: + if any(segments_overlap(s, e, ps, pe) for ps, pe, _ in picked): + continue + picked.append((s, e, sc)) + picked.sort(key=lambda x: x[0]) + return picked + + +_INTERVAL_EPS = 1e-6 +_IOU_NMS_THRESHOLD = 0.4 +_HYBRID_MIN_LEN = 1.5 + + +def segment_iou_1d(s0: float, e0: float, s1: float, e1: float) -> float: + """一维时间段 IoU;无交集或 union<=0 时返回 0.0。""" + inter = max(0.0, min(e0, e1) - max(s0, s1)) + if inter <= _INTERVAL_EPS: + return 0.0 + union = max(e0, e1) - min(s0, s1) + if union <= _INTERVAL_EPS: + return 0.0 + return inter / union + + +def _subtract_interval( + s: float, e: float, ps: float, pe: float +) -> list[tuple[float, float]]: + """从 [s,e] 挖掉 blocker [ps,pe],返回 0~2 个不重叠子区间。""" + if min(e, pe) - max(s, ps) <= _INTERVAL_EPS: + return [(s, e)] + out: list[tuple[float, float]] = [] + if ps - s > _INTERVAL_EPS: + out.append((s, min(e, ps))) + if e - pe > _INTERVAL_EPS: + out.append((max(s, pe), e)) + return out + + +def hybrid_nms_and_trimming( + items: list[tuple[float, float, float]], + iou_threshold: float = _IOU_NMS_THRESHOLD, + min_len: float = _HYBRID_MIN_LEN, +) -> list[tuple[float, float, float]]: + """混合后处理:IoU NMS 去重 → 边界切割 → 最短片段过滤。""" + sorted_items = sorted(items, key=lambda x: -x[2]) + picked: list[tuple[float, float, float]] = [] + for s, e, sc in sorted_items: + if e - s <= _INTERVAL_EPS: + continue + if any( + segment_iou_1d(s, e, ps, pe) > iou_threshold + _INTERVAL_EPS + for ps, pe, _ in picked + ): + continue + frags: list[tuple[float, float]] = [(s, e)] + for ps, pe, _ in picked: + nxt: list[tuple[float, float]] = [] + for fs, fe in frags: + nxt.extend(_subtract_interval(fs, fe, ps, pe)) + frags = nxt + if not frags: + break + for fs, fe in frags: + if fe - fs >= min_len - _INTERVAL_EPS: + picked.append((fs, fe, sc)) + picked.sort(key=lambda x: x[0]) + return picked + + +def parse_actionformer_pkl( + pkl_path: Path, video_id: str +) -> list[tuple[float, float, float]]: + with pkl_path.open("rb") as f: + data: dict[str, Any] = pickle.load(f) + vids = data["video-id"] + t0 = np.asarray(data["t-start"]).reshape(-1) + t1 = np.asarray(data["t-end"]).reshape(-1) + scores = np.asarray(data["score"]).reshape(-1) + # 兼容 str / bytes + def _norm(x: object) -> str: + if isinstance(x, bytes): + return x.decode("utf-8", errors="replace") + return str(x) + + mask = np.array([_norm(v) == video_id for v in np.asarray(vids).reshape(-1)]) + out: list[tuple[float, float, float]] = [] + for i in np.where(mask)[0]: + out.append((float(t0[i]), float(t1[i]), float(scores[i]))) + return out + + +def aggregate_top3_votes( + pairs: list[tuple[str, float]], +) -> tuple[list[str], list[float]]: + """ + pairs: (类名, 该帧 max softmax);按置信度做段内加权累计。 + 按累计分数取前三类(同分按类名字典序稳定次序),再以这三类累计分数之和归一化为 top1~3 置信度。 + """ + empty = (["", "", ""], [0.0, 0.0, 0.0]) + if not pairs: + return empty + + # 1) 初始化“积分池”:key=类名,value=该类在段内累计得到的置信度积分。 + score_pool: defaultdict[str, float] = defaultdict(float) + # 2) 逐帧累加积分:同一类在不同帧的 top_prob 按加和方式累计。 + for name, conf in pairs: + score_pool[name] += float(conf) + + # 3) 按累计积分降序排序(同分用类名字典序保证结果稳定),取 Top3。 + ranked = sorted(score_pool.items(), key=lambda x: (-x[1], x[0])) + top = ranked[:3] + if not top: + return empty + + # 4) 仅对 Top3 的累计积分做归一化,得到 top1~top3 置信度(和为 1)。 + total = float(sum(score for _, score in top)) + if total <= 0: + return empty + out_names: list[str] = ["", "", ""] + out_conf: list[float] = [0.0, 0.0, 0.0] + for i, (nm, score) in enumerate(top): + out_names[i] = nm + out_conf[i] = float(score) / total + return out_names, out_conf + + +def process_segment_e2e( + cap: cv2.VideoCapture, + det: YOLO, + gb: YOLO, + cls_m: YOLO, + *, + start_sec: float, + end_sec: float, + seek_margin_sec: float, + det_conf: float, + pad_ratio: float, + imgsz_det: int, + imgsz_cls: int, + frame_stride: int, + good_top1_conf_threshold: float, + haocai_min_conf: float, + gb_names: dict, + cls_names: dict, + allowed_class_idx: frozenset[int] | None, +) -> dict[str, Any]: + probe_from = float(max(0.0, start_sec - seek_margin_sec)) + cap.set(cv2.CAP_PROP_POS_MSEC, probe_from * 1000.0) + synced_frame: np.ndarray | None = None + synced_t: float | None = None + tol = 0.04 + while True: + ok0, grab = cap.read() + if not ok0 or grab is None: + synced_frame, synced_t = None, None + break + t0 = float(cap.get(cv2.CAP_PROP_POS_MSEC)) / 1000.0 + if t0 + tol >= start_sec: + synced_frame, synced_t = grab, t0 + break + + w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + + n_cls_key_max = max(int(k) for k in cls_names.keys()) + n_cls = n_cls_key_max + 1 + + n_hand_frames = 0 + n_gate_pass = 0 + pairs: list[tuple[str, float]] = [] + frames_read_in_segment = 0 + + def one_frame(fr: np.ndarray) -> None: + nonlocal frames_read_in_segment, n_hand_frames, n_gate_pass, pairs + frames_read_in_segment += 1 + if frame_stride > 1 and (frames_read_in_segment - 1) % frame_stride != 0: + return + + r0 = det.predict(fr, conf=det_conf, imgsz=imgsz_det, verbose=False)[0] + hands = collect_hand_boxes(det, r0.boxes) if r0.boxes else [] + if not hands: + return + + n_hand_frames += 1 + xyxy = largest_hand(hands) + x1, y1, x2, y2 = pad_box(xyxy, w, h, pad_ratio) + crop = fr[y1:y2, x1:x2] + if not passes_good_gate_top1_conf( + gb, crop, gb_names, imgsz_cls, good_top1_conf_threshold + ): + return + n_gate_pass += 1 + vec_raw = haocai_softmax_probs(cls_m, crop, imgsz_cls, n_cls) + if vec_raw is None: + return + if allowed_class_idx is not None: + vec = mask_probs_whitelist(vec_raw, allowed_class_idx, n_cls) + else: + vec = vec_raw + if vec is None: + return + top_prob = float(np.max(vec)) + if top_prob <= haocai_min_conf: + return + label = int(np.argmax(vec)) + pairs.append((_cls_name(cls_names, label), top_prob)) + + if synced_frame is not None and synced_t is not None and synced_t <= end_sec + 0.08: + one_frame(synced_frame) + + while True: + ok, frame = cap.read() + if not ok or frame is None: + break + t = float(cap.get(cv2.CAP_PROP_POS_MSEC)) / 1000.0 + if t > end_sec + 0.08: + break + if t + 1e-6 < start_sec: + continue + one_frame(frame) + + if n_hand_frames == 0: + return {"ok": False, "reason": "(段内未检测到手部)", "pairs": [], "n_gate_pass": 0} + if not pairs: + return { + "ok": False, + "reason": "(无有效耗材帧:好帧/白名单/耗材置信度未全部满足)", + "pairs": [], + "n_hand_frames": n_hand_frames, + "n_gate_pass": n_gate_pass, + } + + n1, c1 = aggregate_top3_votes(pairs) + return { + "ok": True, + "top_names": n1, + "top_confs": c1, + "pairs": pairs, + "n_hand_frames": n_hand_frames, + "n_gate_pass": n_gate_pass, + "n_valid": len(pairs), + } + + +def duration_fps_from_meta(meta: dict, video_id: str) -> tuple[float, float]: + v = meta.get("videos", {}).get(video_id, {}) + if v: + fps = float(v.get("fps", 25.0)) + tf = int(v.get("total_frames", 0)) + if tf > 0 and fps > 0: + return tf / fps, fps + return 300.0, 25.0 + + +def main() -> int: + ap = argparse.ArgumentParser(description="ActionFormer 划段 + 耗材端到端(单视频)") + ap.add_argument("--video", type=Path, required=True, help="输入 MP4") + ap.add_argument("--whitelist-json", type=Path, required=True, help='{"allowed_names":["..."]}') + ap.add_argument( + "--excel", + type=Path, + default=_DEFAULT_EXCEL, + help="商品名称→产品编码", + ) + ap.add_argument("--out", type=Path, required=True, help="输出制表符 TXT") + ap.add_argument( + "--work-dir", + type=Path, + default=None, + help="工作目录(默认临时目录;加 --keep-work-dir 可保留)", + ) + ap.add_argument("--keep-work-dir", action="store_true") + ap.add_argument( + "--actionformer-ckpt", + type=Path, + default=_DEFAULT_AF_CKPT, + ) + ap.add_argument( + "--hand-model", + type=Path, + default=_DEFAULT_HAND, + ) + ap.add_argument( + "--goodbad-model", + type=Path, + default=_DEFAULT_GOODBAD, + ) + ap.add_argument( + "--haocai-model", + type=Path, + default=_DEFAULT_HAOCAI, + ) + ap.add_argument("--good-top1-conf-threshold", type=float, default=0.9) + ap.add_argument("--haocai-min-conf", type=float, default=0.8) + ap.add_argument("--af-min-score", type=float, default=0.1, help="划段保留 score 下限(不含等于)") + ap.add_argument("--det-conf", type=float, default=0.5) + ap.add_argument("--pad-ratio", type=float, default=0.30) + ap.add_argument("--imgsz-det", type=int, default=640) + ap.add_argument("--imgsz-cls", type=int, default=224) + ap.add_argument("--frame-stride", type=int, default=1) + ap.add_argument("--seek-margin-sec", type=float, default=3.0) + ap.add_argument("--feat-batch-size", type=int, default=1) + ap.add_argument("--device", type=str, default="cuda") + ap.add_argument( + "--python", + type=str, + default=sys.executable, + help="子进程 Python(建议 conda yolo 环境的 python)", + ) + args = ap.parse_args() + + video_path = args.video.resolve() + if not video_path.is_file(): + log(f"找不到视频: {video_path}") + return 1 + if not args.excel.is_file(): + log(f"找不到 Excel: {args.excel}") + return 1 + if not args.whitelist_json.is_file(): + log(f"找不到白名单 JSON: {args.whitelist_json}") + return 1 + for p, name in ( + (args.actionformer_ckpt, "ActionFormer ckpt"), + (args.hand_model, "hand"), + (args.goodbad_model, "goodbad"), + (args.haocai_model, "haocai"), + ): + if not Path(p).is_file(): + log(f"缺少{name}: {p}") + return 1 + + stem = video_path.stem + tmp_ctx: tempfile.TemporaryDirectory | None = None + if args.work_dir is not None: + work = Path(args.work_dir).resolve() + work.mkdir(parents=True, exist_ok=True) + elif args.keep_work_dir: + work = Path(tempfile.mkdtemp(prefix="haocai_e2e_")) + log(f"工作目录(保留): {work}") + else: + tmp_ctx = tempfile.TemporaryDirectory(prefix="haocai_e2e_") + work = Path(tmp_ctx.name) + + try: + product_map = load_product_code_map(args.excel.resolve()) + allowed_names = load_whitelist_json(args.whitelist_json.resolve()) + + inp = work / "input" + feat_dir = work / "features" + inp.mkdir(parents=True, exist_ok=True) + feat_dir.mkdir(parents=True, exist_ok=True) + + single_video = inp / video_path.name + if single_video.resolve() != video_path.resolve(): + shutil.copy2(video_path, single_video) + + meta_path = feat_dir / "meta.json" + run_feature_extraction( + python_exe=args.python, + data_root=inp, + output_dir=feat_dir, + meta_file=meta_path, + device=args.device, + batch_size=max(1, args.feat_batch_size), + ) + + meta = json.loads(meta_path.read_text(encoding="utf-8")) + duration, fps = duration_fps_from_meta(meta, stem) + if stem not in meta.get("videos", {}): + # 回退:用文件名 stem 对应 npy + log("meta 中未找到 video_id=stem,使用 ffprobe 估 duration…") + cap0 = cv2.VideoCapture(str(video_path)) + if cap0.isOpened(): + fps = float(cap0.get(cv2.CAP_PROP_FPS)) or fps + nfr = int(cap0.get(cv2.CAP_PROP_FRAME_COUNT)) + cap0.release() + if fps > 0 and nfr > 0: + duration = nfr / fps + + npy_path = feat_dir / f"{stem}.npy" + if not npy_path.is_file(): + log(f"特征文件不存在: {npy_path}") + return 1 + + json_path = work / "infer_single.json" + write_infer_json(json_path, stem, duration, fps) + + yaml_path = work / "infer_single.yaml" + write_infer_yaml(yaml_path, json_path.resolve(), feat_dir.resolve()) + + pkl_dest = work / "eval_results.pkl" + run_actionformer_eval( + python_exe=args.python, + yaml_path=yaml_path.resolve(), + ckpt_path=args.actionformer_ckpt.resolve(), + copy_pkl_to=pkl_dest, + ) + + raw_segs = parse_actionformer_pkl(pkl_dest, stem) + raw_segs = [(s, e, sc) for s, e, sc in raw_segs if sc > args.af_min_score] + segs = greedy_mutual_exclusive(raw_segs) + log(f"ActionFormer 候选 {len(raw_segs)} -> 互斥后 {len(segs)} 段(score>{args.af_min_score})") + + log("加载 YOLO 模型…") + det = YOLO(str(args.hand_model)) + gb = YOLO(str(args.goodbad_model)) + cls_m = YOLO(str(args.haocai_model)) + gb_names = gb.names + cls_names = cls_m.names + allowed_idx = allowed_indices_from_json_names(allowed_names, cls_names) + + cap = cv2.VideoCapture(str(video_path)) + if not cap.isOpened(): + log("无法打开视频") + return 1 + + sep = "\t" + header = sep.join( + [ + "rank", + "start_sec", + "end_sec", + "product_id_top1", + "top1_name", + "top1_conf", + "product_id_top2", + "top2_name", + "top2_conf", + "product_id_top3", + "top3_name", + "top3_conf", + ] + ) + lines_out = [header] + + try: + for rank, (t0, t1, af_sc) in enumerate(segs, start=1): + log(f"段落 rank={rank} [{t0:.3f},{t1:.3f}] score={af_sc:.4f} …") + info = process_segment_e2e( + cap, + det, + gb, + cls_m, + start_sec=t0, + end_sec=t1, + seek_margin_sec=args.seek_margin_sec, + det_conf=args.det_conf, + pad_ratio=args.pad_ratio, + imgsz_det=args.imgsz_det, + imgsz_cls=args.imgsz_cls, + frame_stride=max(1, args.frame_stride), + good_top1_conf_threshold=args.good_top1_conf_threshold, + haocai_min_conf=args.haocai_min_conf, + gb_names=gb_names, + cls_names=cls_names, + allowed_class_idx=allowed_idx, + ) + if not info.get("ok"): + reason = str(info.get("reason", "")) + lines_out.append( + sep.join( + [ + str(rank), + f"{t0:.6f}", + f"{t1:.6f}", + "", + reason, + "", + "", + "", + "", + "", + "", + "", + "", + ] + ) + ) + continue + + n1, n2, n3 = info["top_names"] + c1, c2, c3 = info["top_confs"] + id1 = product_map.get(n1, "") if n1 else "" + id2 = product_map.get(n2, "") if n2 else "" + id3 = product_map.get(n3, "") if n3 else "" + for nm, pid in ((n1, id1), (n2, id2), (n3, id3)): + if nm and not pid: + log(f"警告: 商品表无名称「{nm}」,产品编码置空。") + + lines_out.append( + sep.join( + [ + str(rank), + f"{t0:.6f}", + f"{t1:.6f}", + id1, + n1, + f"{c1:.6f}" if n1 else "", + id2, + n2, + f"{c2:.6f}" if n2 else "", + id3, + n3, + f"{c3:.6f}" if n3 else "", + ] + ) + ) + finally: + cap.release() + + args.out.parent.mkdir(parents=True, exist_ok=True) + args.out.write_text("\n".join(lines_out) + "\n", encoding="utf-8") + log(f"已写出: {args.out.resolve()}") + if args.work_dir is not None or (args.keep_work_dir and args.work_dir is None): + log(f"工作目录: {work}") + finally: + if tmp_ctx is not None: + tmp_ctx.cleanup() + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/configs/default_config.yaml b/configs/default_config.yaml new file mode 100644 index 0000000..d775bb6 --- /dev/null +++ b/configs/default_config.yaml @@ -0,0 +1,99 @@ +# 篮子接触分段 + 段内耗材识别 +# 入口: main_basket.py / main_basket_stream.py / main_segments_offline.py + +io: + video: input/sample.mp4 + excel: input/视频中的商品信息表.xlsx + out: output/result.txt + # false:段内耗材分类不做 Excel/JSON 白名单裁剪(全 41 类);Excel 仍用于 product_id 映射 + use_whitelist: false + # use_whitelist=true 时:null 从 excel C 列读白名单;或指定 {"allowed_names":[...]} JSON + whitelist_json: null + +weights: + hand: weights/hand_detect.pt + goodbad: weights/goodbad_frame.pt + haocai: weights/haocai_classify.pt + +runtime: + work_dir: null + keep_work_dir: false + python: null + +device: + type: cuda + half: false + +# 段内:手检 → 双手 ROI → 好坏帧门控 → 耗材分类 +phase2: + seek_margin_sec: 3.0 + frame_stride: 1 + det_conf: 0.6 + # 双手 union 紧框后仅向下延伸(相对框高) + pad_bottom_ratio: 0.5 + imgsz_det: 1920 + # 段内需检测到至少两只手才裁 ROI(取最大两只 union);merge_* 已不使用 + merge_iou_gt: 0.0 + merge_center_dist_max_px: null + merge_center_dist_max_frac_diag: null + tracking_alpha: 0.6 + tracking_max_lost_frames: 0 + +classification: + imgsz_cls: 224 + good_top1_conf_threshold: 0.8 + good_top1_retry_threshold: 0.6 + haocai_min_conf: 0.8 + haocai_min_conf_retry: 0.5 + empty_cache_every: 0 + +gap_merge: + enabled: true + max_gap_sec: 2.0 + +output: + legacy_12_col_only: true + +doctor_identity: + enabled: true + checkpoint: doctor_identity_package/doctor_info.pth + labels_csv: doctor_identity_package/labels.csv + pose_min_detection_confidence: 0.30 + min_identity_confidence: 0.00 + middle_seconds: 10.0 + sample_fps: 3.0 + pad_frac: 0.15 + +# 篮子接触分段(main_basket.py / main_basket_stream.py) +basket: + det_conf: 0.6 + contact_iou_threshold: 0.05 + contact_iou_on: 0.03 + contact_iou_off: 0.01 + confirm_seconds: 0.1 + cooldown_seconds: 3.0 + segment_start_offset_sec: 1.0 + segment_end_offset_sec: 6.0 + min_segment_sec: 4.0 + scan_frame_stride: 1 + roi_frame: first + save_roi_json: null + load_roi_json: null + skip_roi_select: false + roi_backend: tkinter + +# 推流实时识别(main_basket_stream.py) +# 接触判定 / 手检 imgsz / 好坏帧 / 耗材阈值:与离线共用 basket + phase2 + classification +# 段内推理:本地 MP4 回源 4K + phase2.imgsz_det=1920(与离线一致);RTSP/缓存 fallback 时 JPEG 宽≤1920 +stream: + rtsp: null + ring_buffer_sec: 10.0 + cache_max_width: 1920 + jpeg_quality: 85 + fps: 25.0 + # 段窗口与 basket 一致:[contact+1, contact+6],时长 5s + segment_start_offset_sec: 1.0 + segment_end_offset_sec: 6.0 + min_segment_sec: 4.0 + infer_source: file + infer_fallback: cache diff --git a/doctor_identity_package/.mediapipe_models/pose_landmarker_lite.task b/doctor_identity_package/.mediapipe_models/pose_landmarker_lite.task new file mode 100644 index 0000000..09576a9 Binary files /dev/null and b/doctor_identity_package/.mediapipe_models/pose_landmarker_lite.task differ diff --git a/doctor_identity_package/doctor_info.pth b/doctor_identity_package/doctor_info.pth new file mode 100644 index 0000000..d29b396 Binary files /dev/null and b/doctor_identity_package/doctor_info.pth differ diff --git a/doctor_identity_package/infer_doctor_from_video.py b/doctor_identity_package/infer_doctor_from_video.py new file mode 100644 index 0000000..22f9b3b --- /dev/null +++ b/doctor_identity_package/infer_doctor_from_video.py @@ -0,0 +1,321 @@ +#!/usr/bin/env python3 +"""Infer doctor identity from one MP4 video. + +Pipeline: +1) Take middle N seconds from input video. +2) Run MediaPipe Pose to detect human bbox. +3) Keep the best crop (largest bbox area). +4) Run doctor ReID checkpoint classification head. +5) Output one final doctor identity. +""" + +from __future__ import annotations + +import argparse +import csv +import sys +import urllib.request +from pathlib import Path + +import cv2 +import mediapipe as mp +import numpy as np +import torch +from PIL import Image +from torchvision import transforms + +# Allow importing local training model definition when running directly. +THIS_DIR = Path(__file__).resolve().parent +if str(THIS_DIR) not in sys.path: + sys.path.insert(0, str(THIS_DIR)) + +from train_reid_contrastive import ReIDEmbedModel # noqa: E402 + +BaseOptions = mp.tasks.BaseOptions +PoseLandmarker = mp.tasks.vision.PoseLandmarker +PoseLandmarkerOptions = mp.tasks.vision.PoseLandmarkerOptions +VisionRunningMode = mp.tasks.vision.RunningMode + +POSE_LITE_URL = ( + "https://storage.googleapis.com/mediapipe-models/pose_landmarker/" + "pose_landmarker_lite/float16/1/pose_landmarker_lite.task" +) +POSE_LITE_NAME = "pose_landmarker_lite.task" + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Input mp4 -> middle 10s pose crop -> doctor identity", + ) + parser.add_argument("--video", type=Path, required=True, help="input .mp4 path") + parser.add_argument( + "--checkpoint", + type=Path, + default=THIS_DIR / "doctor_info.pth", + help="doctor checkpoint path (.pth)", + ) + parser.add_argument( + "--labels-csv", + type=Path, + default=THIS_DIR / "labels.csv", + help="person_id to doctor name mapping csv", + ) + parser.add_argument( + "--middle-seconds", + type=float, + default=10.0, + help="window length around video center in seconds", + ) + parser.add_argument( + "--sample-fps", + type=float, + default=3.0, + help="sampling fps inside the middle window", + ) + parser.add_argument( + "--pad-frac", + type=float, + default=0.15, + help="bbox padding ratio", + ) + parser.add_argument( + "--save-crop", + type=Path, + default=None, + help="optional path to save best cropped person image", + ) + return parser.parse_args() + + +def _ensure_pose_lite_model(model_dir: Path) -> Path: + model_dir.mkdir(parents=True, exist_ok=True) + model_path = model_dir / POSE_LITE_NAME + if model_path.is_file() and model_path.stat().st_size > 10_000: + return model_path + print(f"[info] Downloading MediaPipe Pose model -> {model_path}", flush=True) + urllib.request.urlretrieve(POSE_LITE_URL, model_path) + return model_path + + +def bbox_from_normalized_pose_landmarks( + w: int, + h: int, + landmark_list, +) -> tuple[int, int, int, int] | None: + if not landmark_list: + return None + xs = [float(lm.x) * w for lm in landmark_list] + ys = [float(lm.y) * h for lm in landmark_list] + if not xs: + return None + return int(min(xs)), int(min(ys)), int(max(xs)), int(max(ys)) + + +def expand_bbox_with_padding( + x1: int, + y1: int, + x2: int, + y2: int, + image_w: int, + image_h: int, + pad_frac: float, +) -> tuple[int, int, int, int]: + bw = max(1, x2 - x1) + bh = max(1, y2 - y1) + cx = (x1 + x2) / 2.0 + cy = (y1 + y2) / 2.0 + nw = bw * (1.0 + pad_frac) + nh = bh * (1.0 + pad_frac) + nx1 = int(round(cx - nw / 2.0)) + ny1 = int(round(cy - nh / 2.0)) + nx2 = int(round(cx + nw / 2.0)) + ny2 = int(round(cy + nh / 2.0)) + nx1 = max(0, nx1) + ny1 = max(0, ny1) + nx2 = min(image_w, nx2) + ny2 = min(image_h, ny2) + if nx2 <= nx1 or ny2 <= ny1: + return 0, 0, min(1, image_w), min(1, image_h) + return nx1, ny1, nx2, ny2 + + +def sample_middle_timestamps(duration_sec: float, middle_seconds: float, sample_fps: float) -> list[float]: + if duration_sec <= 0 or middle_seconds <= 0 or sample_fps <= 0: + return [] + center = duration_sec / 2.0 + half = middle_seconds / 2.0 + t0 = max(0.0, center - half) + t1 = min(duration_sec, center + half) + step = 1.0 / sample_fps + ts = [] + t = t0 + while t < t1 - 1e-6: + ts.append(t) + t += step + return ts + + +def pick_best_person_crop( + video_path: Path, + landmarker: PoseLandmarker, + middle_seconds: float, + sample_fps: float, + pad_frac: float, +) -> np.ndarray: + cap = cv2.VideoCapture(str(video_path)) + if not cap.isOpened(): + raise RuntimeError(f"Cannot open video: {video_path}") + + fps = float(cap.get(cv2.CAP_PROP_FPS) or 0.0) + frame_count = float(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0.0) + duration_sec = frame_count / fps if fps > 1e-6 else 0.0 + timestamps = sample_middle_timestamps(duration_sec, middle_seconds, sample_fps) + if not timestamps: + cap.release() + raise RuntimeError("No valid timestamps from middle window.") + + best_area = -1 + best_crop: np.ndarray | None = None + + for ts in timestamps: + cap.set(cv2.CAP_PROP_POS_MSEC, ts * 1000.0) + ok, frame = cap.read() + if not ok or frame is None: + continue + h, w = frame.shape[:2] + rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + mp_img = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb) + res = landmarker.detect(mp_img) + if not res.pose_landmarks: + continue + + for lmk in res.pose_landmarks: + box = bbox_from_normalized_pose_landmarks(w, h, lmk) + if box is None: + continue + ex1, ey1, ex2, ey2 = expand_bbox_with_padding(*box, w, h, pad_frac=pad_frac) + crop = frame[ey1:ey2, ex1:ex2] + if crop.size == 0: + continue + area = int((ex2 - ex1) * (ey2 - ey1)) + if area > best_area: + best_area = area + best_crop = crop.copy() + + cap.release() + if best_crop is None: + raise RuntimeError("No person detected in the middle window.") + return best_crop + + +def build_label_to_pid(pid_to_label: dict) -> dict[int, str]: + label_to_pid: dict[int, str] = {} + for raw_pid, label in pid_to_label.items(): + try: + label_int = int(label) + except (TypeError, ValueError): + continue + label_to_pid[label_int] = str(raw_pid) + return label_to_pid + + +def load_name_mapping(labels_csv: Path) -> dict[str, str]: + if not labels_csv.is_file(): + return {} + mapping: dict[str, str] = {} + with labels_csv.open("r", encoding="utf-8-sig", newline="") as f: + reader = csv.DictReader(f) + for row in reader: + pid = str(row.get("person_id", "")).strip() + name = str(row.get("医生姓名", "")).strip() + if pid and name and pid not in mapping: + mapping[pid] = name + return mapping + + +def run_inference(crop_bgr: np.ndarray, checkpoint_path: Path) -> tuple[str, float]: + if not checkpoint_path.is_file(): + raise FileNotFoundError(f"Checkpoint not found: {checkpoint_path}") + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + ckpt = torch.load(checkpoint_path, map_location=device, weights_only=False) + num_classes = int(ckpt["num_classes"]) + pid_to_label = ckpt.get("pid_to_label", {}) + if not isinstance(pid_to_label, dict): + raise RuntimeError("Checkpoint missing valid pid_to_label dict.") + + model = ReIDEmbedModel(num_classes=num_classes, feat_dim=512).to(device) + model.load_state_dict(ckpt["model_state"]) + model.eval() + + transform = transforms.Compose( + [ + transforms.Resize((256, 128)), + transforms.ToTensor(), + transforms.Normalize( + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225], + ), + ] + ) + crop_rgb = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2RGB) + inp = transform(Image.fromarray(crop_rgb)).unsqueeze(0).to(device) + + with torch.no_grad(): + _, logits = model(inp) + probs = torch.softmax(logits, dim=1) + pred_label = int(torch.argmax(probs, dim=1).item()) + conf = float(probs[0, pred_label].item()) + + label_to_pid = build_label_to_pid(pid_to_label) + raw_pid = label_to_pid.get(pred_label) + if raw_pid is None: + raise RuntimeError(f"Predicted label {pred_label} not found in pid mapping.") + return raw_pid, conf + + +def main() -> int: + args = parse_args() + if not args.video.is_file(): + print(f"[error] video not found: {args.video}", file=sys.stderr) + return 2 + + try: + model_path = _ensure_pose_lite_model(THIS_DIR / ".mediapipe_models") + opts = PoseLandmarkerOptions( + base_options=BaseOptions(model_asset_path=str(model_path)), + running_mode=VisionRunningMode.IMAGE, + min_pose_detection_confidence=0.3, + ) + landmarker = PoseLandmarker.create_from_options(opts) + try: + best_crop = pick_best_person_crop( + video_path=args.video, + landmarker=landmarker, + middle_seconds=args.middle_seconds, + sample_fps=args.sample_fps, + pad_frac=args.pad_frac, + ) + finally: + landmarker.close() + + if args.save_crop is not None: + args.save_crop.parent.mkdir(parents=True, exist_ok=True) + cv2.imwrite(str(args.save_crop), best_crop) + + raw_pid, conf = run_inference(best_crop, args.checkpoint) + name_map = load_name_mapping(args.labels_csv) + doctor_name = name_map.get(str(raw_pid), "") + + if doctor_name: + print(f"doctor={doctor_name} (id={raw_pid}, conf={conf:.4f})") + else: + print(f"doctor_id={raw_pid} (conf={conf:.4f})") + return 0 + except Exception as exc: # noqa: BLE001 + print(f"[error] {exc}", file=sys.stderr) + return 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/doctor_identity_package/labels.csv b/doctor_identity_package/labels.csv new file mode 100644 index 0000000..c447e5f --- /dev/null +++ b/doctor_identity_package/labels.csv @@ -0,0 +1,760 @@ +filename,person_id,person_id_file,医生姓名,camera_id,global_index +24502_c1_s1_00001.jpg,24502,24502,钟光喜,1,1 +24502_c1_s1_00002.jpg,24502,24502,钟光喜,1,2 +24502_c1_s1_00003.jpg,24502,24502,钟光喜,1,3 +24502_c1_s1_00004.jpg,24502,24502,钟光喜,1,4 +24502_c1_s1_00005.jpg,24502,24502,钟光喜,1,5 +24502_c1_s1_00006.jpg,24502,24502,钟光喜,1,6 +24502_c1_s1_00007.jpg,24502,24502,钟光喜,1,7 +24502_c1_s1_00008.jpg,24502,24502,钟光喜,1,8 +24502_c1_s1_00009.jpg,24502,24502,钟光喜,1,9 +24502_c1_s1_00010.jpg,24502,24502,钟光喜,1,10 +24502_c1_s1_00011.jpg,24502,24502,钟光喜,1,11 +24502_c1_s1_00012.jpg,24502,24502,钟光喜,1,12 +24502_c1_s1_00013.jpg,24502,24502,钟光喜,1,13 +24502_c1_s1_00014.jpg,24502,24502,钟光喜,1,14 +24502_c1_s1_00015.jpg,24502,24502,钟光喜,1,15 +24502_c1_s1_00016.jpg,24502,24502,钟光喜,1,16 +24502_c1_s1_00017.jpg,24502,24502,钟光喜,1,17 +24502_c1_s1_00018.jpg,24502,24502,钟光喜,1,18 +24502_c1_s1_00019.jpg,24502,24502,钟光喜,1,19 +24502_c1_s1_00020.jpg,24502,24502,钟光喜,1,20 +24502_c1_s1_00021.jpg,24502,24502,钟光喜,1,21 +24502_c1_s1_00022.jpg,24502,24502,钟光喜,1,22 +24502_c1_s1_00023.jpg,24502,24502,钟光喜,1,23 +24502_c1_s1_00024.jpg,24502,24502,钟光喜,1,24 +24502_c1_s1_00025.jpg,24502,24502,钟光喜,1,25 +24502_c1_s1_00026.jpg,24502,24502,钟光喜,1,26 +24502_c1_s1_00027.jpg,24502,24502,钟光喜,1,27 +24502_c1_s1_00028.jpg,24502,24502,钟光喜,1,28 +24502_c1_s1_00029.jpg,24502,24502,钟光喜,1,29 +24502_c1_s1_00030.jpg,24502,24502,钟光喜,1,30 +24502_c1_s1_00031.jpg,24502,24502,钟光喜,1,31 +24502_c1_s1_00032.jpg,24502,24502,钟光喜,1,32 +24502_c1_s1_00033.jpg,24502,24502,钟光喜,1,33 +24502_c1_s1_00034.jpg,24502,24502,钟光喜,1,34 +24502_c1_s1_00035.jpg,24502,24502,钟光喜,1,35 +24502_c1_s1_00036.jpg,24502,24502,钟光喜,1,36 +24502_c1_s1_00037.jpg,24502,24502,钟光喜,1,37 +24502_c1_s1_00038.jpg,24502,24502,钟光喜,1,38 +24502_c1_s1_00039.jpg,24502,24502,钟光喜,1,39 +24502_c1_s1_00040.jpg,24502,24502,钟光喜,1,40 +24502_c1_s1_00041.jpg,24502,24502,钟光喜,1,41 +24502_c1_s1_00042.jpg,24502,24502,钟光喜,1,42 +24502_c1_s1_00043.jpg,24502,24502,钟光喜,1,43 +24502_c1_s1_00044.jpg,24502,24502,钟光喜,1,44 +24502_c1_s1_00045.jpg,24502,24502,钟光喜,1,45 +24502_c2_s1_00046.jpg,24502,24502,钟光喜,2,46 +24502_c2_s1_00047.jpg,24502,24502,钟光喜,2,47 +24502_c2_s1_00048.jpg,24502,24502,钟光喜,2,48 +24502_c2_s1_00049.jpg,24502,24502,钟光喜,2,49 +24502_c2_s1_00050.jpg,24502,24502,钟光喜,2,50 +24502_c2_s1_00051.jpg,24502,24502,钟光喜,2,51 +24502_c2_s1_00052.jpg,24502,24502,钟光喜,2,52 +24502_c2_s1_00053.jpg,24502,24502,钟光喜,2,53 +24502_c2_s1_00054.jpg,24502,24502,钟光喜,2,54 +24502_c2_s1_00055.jpg,24502,24502,钟光喜,2,55 +24502_c2_s1_00056.jpg,24502,24502,钟光喜,2,56 +24502_c2_s1_00057.jpg,24502,24502,钟光喜,2,57 +24502_c2_s1_00058.jpg,24502,24502,钟光喜,2,58 +24502_c2_s1_00059.jpg,24502,24502,钟光喜,2,59 +24502_c2_s1_00060.jpg,24502,24502,钟光喜,2,60 +24502_c2_s1_00061.jpg,24502,24502,钟光喜,2,61 +24502_c2_s1_00062.jpg,24502,24502,钟光喜,2,62 +24502_c2_s1_00063.jpg,24502,24502,钟光喜,2,63 +24502_c2_s1_00064.jpg,24502,24502,钟光喜,2,64 +24502_c2_s1_00065.jpg,24502,24502,钟光喜,2,65 +24502_c2_s1_00066.jpg,24502,24502,钟光喜,2,66 +24502_c2_s1_00067.jpg,24502,24502,钟光喜,2,67 +24502_c2_s1_00068.jpg,24502,24502,钟光喜,2,68 +24502_c2_s1_00069.jpg,24502,24502,钟光喜,2,69 +24502_c2_s1_00070.jpg,24502,24502,钟光喜,2,70 +24502_c2_s1_00071.jpg,24502,24502,钟光喜,2,71 +24502_c2_s1_00072.jpg,24502,24502,钟光喜,2,72 +24502_c2_s1_00073.jpg,24502,24502,钟光喜,2,73 +24502_c2_s1_00074.jpg,24502,24502,钟光喜,2,74 +24502_c2_s1_00075.jpg,24502,24502,钟光喜,2,75 +24502_c2_s1_00076.jpg,24502,24502,钟光喜,2,76 +24502_c2_s1_00077.jpg,24502,24502,钟光喜,2,77 +24502_c2_s1_00078.jpg,24502,24502,钟光喜,2,78 +24502_c2_s1_00079.jpg,24502,24502,钟光喜,2,79 +24502_c2_s1_00080.jpg,24502,24502,钟光喜,2,80 +24502_c2_s1_00081.jpg,24502,24502,钟光喜,2,81 +24502_c2_s1_00082.jpg,24502,24502,钟光喜,2,82 +24502_c2_s1_00083.jpg,24502,24502,钟光喜,2,83 +24502_c2_s1_00084.jpg,24502,24502,钟光喜,2,84 +24502_c2_s1_00085.jpg,24502,24502,钟光喜,2,85 +24502_c2_s1_00086.jpg,24502,24502,钟光喜,2,86 +24502_c2_s1_00087.jpg,24502,24502,钟光喜,2,87 +24502_c2_s1_00088.jpg,24502,24502,钟光喜,2,88 +24502_c2_s1_00089.jpg,24502,24502,钟光喜,2,89 +24502_c2_s1_00090.jpg,24502,24502,钟光喜,2,90 +24502_c2_s1_00091.jpg,24502,24502,钟光喜,2,91 +24502_c2_s1_00092.jpg,24502,24502,钟光喜,2,92 +24502_c2_s1_00093.jpg,24502,24502,钟光喜,2,93 +24502_c2_s1_00094.jpg,24502,24502,钟光喜,2,94 +24502_c2_s1_00095.jpg,24502,24502,钟光喜,2,95 +24502_c2_s1_00096.jpg,24502,24502,钟光喜,2,96 +24502_c2_s1_00097.jpg,24502,24502,钟光喜,2,97 +24502_c2_s1_00098.jpg,24502,24502,钟光喜,2,98 +24502_c2_s1_00099.jpg,24502,24502,钟光喜,2,99 +24502_c2_s1_00100.jpg,24502,24502,钟光喜,2,100 +24502_c2_s1_00101.jpg,24502,24502,钟光喜,2,101 +24502_c2_s1_00102.jpg,24502,24502,钟光喜,2,102 +24502_c3_s1_00103.jpg,24502,24502,钟光喜,3,103 +24502_c3_s1_00104.jpg,24502,24502,钟光喜,3,104 +24502_c3_s1_00105.jpg,24502,24502,钟光喜,3,105 +24502_c3_s1_00106.jpg,24502,24502,钟光喜,3,106 +24502_c3_s1_00107.jpg,24502,24502,钟光喜,3,107 +24502_c3_s1_00108.jpg,24502,24502,钟光喜,3,108 +24502_c3_s1_00109.jpg,24502,24502,钟光喜,3,109 +24502_c3_s1_00110.jpg,24502,24502,钟光喜,3,110 +24502_c3_s1_00111.jpg,24502,24502,钟光喜,3,111 +24502_c3_s1_00112.jpg,24502,24502,钟光喜,3,112 +24502_c3_s1_00113.jpg,24502,24502,钟光喜,3,113 +24502_c3_s1_00114.jpg,24502,24502,钟光喜,3,114 +24502_c3_s1_00115.jpg,24502,24502,钟光喜,3,115 +24502_c3_s1_00116.jpg,24502,24502,钟光喜,3,116 +24502_c3_s1_00117.jpg,24502,24502,钟光喜,3,117 +24502_c3_s1_00118.jpg,24502,24502,钟光喜,3,118 +24502_c3_s1_00119.jpg,24502,24502,钟光喜,3,119 +24502_c3_s1_00120.jpg,24502,24502,钟光喜,3,120 +24502_c3_s1_00121.jpg,24502,24502,钟光喜,3,121 +24502_c3_s1_00122.jpg,24502,24502,钟光喜,3,122 +24502_c3_s1_00123.jpg,24502,24502,钟光喜,3,123 +24502_c3_s1_00124.jpg,24502,24502,钟光喜,3,124 +24502_c3_s1_00125.jpg,24502,24502,钟光喜,3,125 +24502_c3_s1_00126.jpg,24502,24502,钟光喜,3,126 +24502_c3_s1_00127.jpg,24502,24502,钟光喜,3,127 +24502_c3_s1_00128.jpg,24502,24502,钟光喜,3,128 +24502_c3_s1_00129.jpg,24502,24502,钟光喜,3,129 +24502_c3_s1_00130.jpg,24502,24502,钟光喜,3,130 +24502_c3_s1_00131.jpg,24502,24502,钟光喜,3,131 +24502_c3_s1_00132.jpg,24502,24502,钟光喜,3,132 +24502_c3_s1_00133.jpg,24502,24502,钟光喜,3,133 +24502_c3_s1_00134.jpg,24502,24502,钟光喜,3,134 +24502_c3_s1_00135.jpg,24502,24502,钟光喜,3,135 +24502_c3_s1_00136.jpg,24502,24502,钟光喜,3,136 +24502_c3_s1_00137.jpg,24502,24502,钟光喜,3,137 +24502_c3_s1_00138.jpg,24502,24502,钟光喜,3,138 +24502_c3_s1_00139.jpg,24502,24502,钟光喜,3,139 +24502_c3_s1_00140.jpg,24502,24502,钟光喜,3,140 +24502_c3_s1_00141.jpg,24502,24502,钟光喜,3,141 +24502_c3_s1_00142.jpg,24502,24502,钟光喜,3,142 +24502_c3_s1_00143.jpg,24502,24502,钟光喜,3,143 +24502_c3_s1_00144.jpg,24502,24502,钟光喜,3,144 +24502_c3_s1_00145.jpg,24502,24502,钟光喜,3,145 +24502_c3_s1_00146.jpg,24502,24502,钟光喜,3,146 +24503_c1_s1_00147.jpg,24503,24503,付玉峰,1,147 +24503_c1_s1_00148.jpg,24503,24503,付玉峰,1,148 +24503_c1_s1_00149.jpg,24503,24503,付玉峰,1,149 +24503_c1_s1_00150.jpg,24503,24503,付玉峰,1,150 +24503_c1_s1_00151.jpg,24503,24503,付玉峰,1,151 +24503_c1_s1_00152.jpg,24503,24503,付玉峰,1,152 +24503_c1_s1_00153.jpg,24503,24503,付玉峰,1,153 +24503_c1_s1_00154.jpg,24503,24503,付玉峰,1,154 +24503_c1_s1_00155.jpg,24503,24503,付玉峰,1,155 +24503_c1_s1_00156.jpg,24503,24503,付玉峰,1,156 +24503_c1_s1_00157.jpg,24503,24503,付玉峰,1,157 +24503_c1_s1_00158.jpg,24503,24503,付玉峰,1,158 +24503_c1_s1_00159.jpg,24503,24503,付玉峰,1,159 +24503_c1_s1_00160.jpg,24503,24503,付玉峰,1,160 +24503_c1_s1_00161.jpg,24503,24503,付玉峰,1,161 +24503_c1_s1_00162.jpg,24503,24503,付玉峰,1,162 +24503_c1_s1_00163.jpg,24503,24503,付玉峰,1,163 +24503_c1_s1_00164.jpg,24503,24503,付玉峰,1,164 +24503_c1_s1_00165.jpg,24503,24503,付玉峰,1,165 +24503_c1_s1_00166.jpg,24503,24503,付玉峰,1,166 +24503_c1_s1_00167.jpg,24503,24503,付玉峰,1,167 +24503_c1_s1_00168.jpg,24503,24503,付玉峰,1,168 +24503_c1_s1_00169.jpg,24503,24503,付玉峰,1,169 +24503_c1_s1_00170.jpg,24503,24503,付玉峰,1,170 +24503_c1_s1_00171.jpg,24503,24503,付玉峰,1,171 +24503_c1_s1_00172.jpg,24503,24503,付玉峰,1,172 +24503_c1_s1_00173.jpg,24503,24503,付玉峰,1,173 +24503_c1_s1_00174.jpg,24503,24503,付玉峰,1,174 +24503_c1_s1_00175.jpg,24503,24503,付玉峰,1,175 +24503_c1_s1_00176.jpg,24503,24503,付玉峰,1,176 +24503_c1_s1_00177.jpg,24503,24503,付玉峰,1,177 +24503_c1_s1_00178.jpg,24503,24503,付玉峰,1,178 +24503_c1_s1_00179.jpg,24503,24503,付玉峰,1,179 +24503_c1_s1_00180.jpg,24503,24503,付玉峰,1,180 +24503_c1_s1_00181.jpg,24503,24503,付玉峰,1,181 +24503_c1_s1_00182.jpg,24503,24503,付玉峰,1,182 +24503_c1_s1_00183.jpg,24503,24503,付玉峰,1,183 +24503_c1_s1_00184.jpg,24503,24503,付玉峰,1,184 +24503_c1_s1_00185.jpg,24503,24503,付玉峰,1,185 +24503_c1_s1_00186.jpg,24503,24503,付玉峰,1,186 +24503_c1_s1_00187.jpg,24503,24503,付玉峰,1,187 +24503_c2_s1_00188.jpg,24503,24503,付玉峰,2,188 +24503_c2_s1_00189.jpg,24503,24503,付玉峰,2,189 +24503_c2_s1_00190.jpg,24503,24503,付玉峰,2,190 +24503_c2_s1_00191.jpg,24503,24503,付玉峰,2,191 +24503_c2_s1_00192.jpg,24503,24503,付玉峰,2,192 +24503_c2_s1_00193.jpg,24503,24503,付玉峰,2,193 +24503_c2_s1_00194.jpg,24503,24503,付玉峰,2,194 +24503_c2_s1_00195.jpg,24503,24503,付玉峰,2,195 +24503_c2_s1_00196.jpg,24503,24503,付玉峰,2,196 +24503_c2_s1_00197.jpg,24503,24503,付玉峰,2,197 +24503_c2_s1_00198.jpg,24503,24503,付玉峰,2,198 +24503_c2_s1_00199.jpg,24503,24503,付玉峰,2,199 +24503_c2_s1_00200.jpg,24503,24503,付玉峰,2,200 +24503_c2_s1_00201.jpg,24503,24503,付玉峰,2,201 +24503_c2_s1_00202.jpg,24503,24503,付玉峰,2,202 +24503_c2_s1_00203.jpg,24503,24503,付玉峰,2,203 +24503_c2_s1_00204.jpg,24503,24503,付玉峰,2,204 +24503_c2_s1_00205.jpg,24503,24503,付玉峰,2,205 +24503_c2_s1_00206.jpg,24503,24503,付玉峰,2,206 +24503_c2_s1_00207.jpg,24503,24503,付玉峰,2,207 +24503_c2_s1_00208.jpg,24503,24503,付玉峰,2,208 +24503_c2_s1_00209.jpg,24503,24503,付玉峰,2,209 +24503_c2_s1_00210.jpg,24503,24503,付玉峰,2,210 +24503_c2_s1_00211.jpg,24503,24503,付玉峰,2,211 +24503_c2_s1_00212.jpg,24503,24503,付玉峰,2,212 +24503_c2_s1_00213.jpg,24503,24503,付玉峰,2,213 +24503_c2_s1_00214.jpg,24503,24503,付玉峰,2,214 +24503_c2_s1_00215.jpg,24503,24503,付玉峰,2,215 +24503_c2_s1_00216.jpg,24503,24503,付玉峰,2,216 +24503_c2_s1_00217.jpg,24503,24503,付玉峰,2,217 +24503_c2_s1_00218.jpg,24503,24503,付玉峰,2,218 +24503_c2_s1_00219.jpg,24503,24503,付玉峰,2,219 +24503_c2_s1_00220.jpg,24503,24503,付玉峰,2,220 +24503_c2_s1_00221.jpg,24503,24503,付玉峰,2,221 +24503_c2_s1_00222.jpg,24503,24503,付玉峰,2,222 +24503_c2_s1_00223.jpg,24503,24503,付玉峰,2,223 +24503_c2_s1_00224.jpg,24503,24503,付玉峰,2,224 +24503_c2_s1_00225.jpg,24503,24503,付玉峰,2,225 +24503_c2_s1_00226.jpg,24503,24503,付玉峰,2,226 +24503_c2_s1_00227.jpg,24503,24503,付玉峰,2,227 +24503_c2_s1_00228.jpg,24503,24503,付玉峰,2,228 +24503_c2_s1_00229.jpg,24503,24503,付玉峰,2,229 +24503_c2_s1_00230.jpg,24503,24503,付玉峰,2,230 +24503_c2_s1_00231.jpg,24503,24503,付玉峰,2,231 +24503_c2_s1_00232.jpg,24503,24503,付玉峰,2,232 +24503_c2_s1_00233.jpg,24503,24503,付玉峰,2,233 +24503_c2_s1_00234.jpg,24503,24503,付玉峰,2,234 +24503_c2_s1_00235.jpg,24503,24503,付玉峰,2,235 +24503_c2_s1_00236.jpg,24503,24503,付玉峰,2,236 +24503_c2_s1_00237.jpg,24503,24503,付玉峰,2,237 +24503_c2_s1_00238.jpg,24503,24503,付玉峰,2,238 +24503_c2_s1_00239.jpg,24503,24503,付玉峰,2,239 +24503_c2_s1_00240.jpg,24503,24503,付玉峰,2,240 +24503_c2_s1_00241.jpg,24503,24503,付玉峰,2,241 +24503_c2_s1_00242.jpg,24503,24503,付玉峰,2,242 +24503_c2_s1_00243.jpg,24503,24503,付玉峰,2,243 +24503_c2_s1_00244.jpg,24503,24503,付玉峰,2,244 +24503_c3_s1_00245.jpg,24503,24503,付玉峰,3,245 +24503_c3_s1_00246.jpg,24503,24503,付玉峰,3,246 +24503_c3_s1_00247.jpg,24503,24503,付玉峰,3,247 +24503_c3_s1_00248.jpg,24503,24503,付玉峰,3,248 +24503_c3_s1_00249.jpg,24503,24503,付玉峰,3,249 +24503_c3_s1_00250.jpg,24503,24503,付玉峰,3,250 +24503_c3_s1_00251.jpg,24503,24503,付玉峰,3,251 +24503_c3_s1_00252.jpg,24503,24503,付玉峰,3,252 +24503_c3_s1_00253.jpg,24503,24503,付玉峰,3,253 +24503_c3_s1_00254.jpg,24503,24503,付玉峰,3,254 +24503_c3_s1_00255.jpg,24503,24503,付玉峰,3,255 +24503_c3_s1_00256.jpg,24503,24503,付玉峰,3,256 +24503_c3_s1_00257.jpg,24503,24503,付玉峰,3,257 +24503_c3_s1_00258.jpg,24503,24503,付玉峰,3,258 +24503_c3_s1_00259.jpg,24503,24503,付玉峰,3,259 +24503_c3_s1_00260.jpg,24503,24503,付玉峰,3,260 +24503_c3_s1_00261.jpg,24503,24503,付玉峰,3,261 +24503_c3_s1_00262.jpg,24503,24503,付玉峰,3,262 +24503_c3_s1_00263.jpg,24503,24503,付玉峰,3,263 +24503_c3_s1_00264.jpg,24503,24503,付玉峰,3,264 +24503_c3_s1_00265.jpg,24503,24503,付玉峰,3,265 +24503_c3_s1_00266.jpg,24503,24503,付玉峰,3,266 +24503_c3_s1_00267.jpg,24503,24503,付玉峰,3,267 +24503_c3_s1_00268.jpg,24503,24503,付玉峰,3,268 +24503_c3_s1_00269.jpg,24503,24503,付玉峰,3,269 +24503_c3_s1_00270.jpg,24503,24503,付玉峰,3,270 +24503_c3_s1_00271.jpg,24503,24503,付玉峰,3,271 +24503_c3_s1_00272.jpg,24503,24503,付玉峰,3,272 +24503_c3_s1_00273.jpg,24503,24503,付玉峰,3,273 +24503_c3_s1_00274.jpg,24503,24503,付玉峰,3,274 +24503_c3_s1_00275.jpg,24503,24503,付玉峰,3,275 +24503_c3_s1_00276.jpg,24503,24503,付玉峰,3,276 +24503_c3_s1_00277.jpg,24503,24503,付玉峰,3,277 +24503_c3_s1_00278.jpg,24503,24503,付玉峰,3,278 +24503_c3_s1_00279.jpg,24503,24503,付玉峰,3,279 +24503_c3_s1_00280.jpg,24503,24503,付玉峰,3,280 +24503_c3_s1_00281.jpg,24503,24503,付玉峰,3,281 +24503_c3_s1_00282.jpg,24503,24503,付玉峰,3,282 +24503_c3_s1_00283.jpg,24503,24503,付玉峰,3,283 +24503_c3_s1_00284.jpg,24503,24503,付玉峰,3,284 +24503_c3_s1_00285.jpg,24503,24503,付玉峰,3,285 +24503_c3_s1_00286.jpg,24503,24503,付玉峰,3,286 +24503_c3_s1_00287.jpg,24503,24503,付玉峰,3,287 +24503_c3_s1_00288.jpg,24503,24503,付玉峰,3,288 +24503_c3_s1_00289.jpg,24503,24503,付玉峰,3,289 +24503_c3_s1_00290.jpg,24503,24503,付玉峰,3,290 +24503_c3_s1_00291.jpg,24503,24503,付玉峰,3,291 +24503_c3_s1_00292.jpg,24503,24503,付玉峰,3,292 +24503_c3_s1_00293.jpg,24503,24503,付玉峰,3,293 +24503_c3_s1_00294.jpg,24503,24503,付玉峰,3,294 +24503_c3_s1_00295.jpg,24503,24503,付玉峰,3,295 +24503_c3_s1_00296.jpg,24503,24503,付玉峰,3,296 +24503_c3_s1_00297.jpg,24503,24503,付玉峰,3,297 +24503_c3_s1_00298.jpg,24503,24503,付玉峰,3,298 +24504_c1_s1_00299.jpg,24504,24504,李树华,1,299 +24504_c1_s1_00300.jpg,24504,24504,李树华,1,300 +24504_c1_s1_00301.jpg,24504,24504,李树华,1,301 +24504_c1_s1_00302.jpg,24504,24504,李树华,1,302 +24504_c1_s1_00303.jpg,24504,24504,李树华,1,303 +24504_c1_s1_00304.jpg,24504,24504,李树华,1,304 +24504_c1_s1_00305.jpg,24504,24504,李树华,1,305 +24504_c1_s1_00306.jpg,24504,24504,李树华,1,306 +24504_c1_s1_00307.jpg,24504,24504,李树华,1,307 +24504_c1_s1_00308.jpg,24504,24504,李树华,1,308 +24504_c1_s1_00309.jpg,24504,24504,李树华,1,309 +24504_c1_s1_00310.jpg,24504,24504,李树华,1,310 +24504_c1_s1_00311.jpg,24504,24504,李树华,1,311 +24504_c1_s1_00312.jpg,24504,24504,李树华,1,312 +24504_c1_s1_00313.jpg,24504,24504,李树华,1,313 +24504_c1_s1_00314.jpg,24504,24504,李树华,1,314 +24504_c1_s1_00315.jpg,24504,24504,李树华,1,315 +24504_c1_s1_00316.jpg,24504,24504,李树华,1,316 +24504_c1_s1_00317.jpg,24504,24504,李树华,1,317 +24504_c1_s1_00318.jpg,24504,24504,李树华,1,318 +24504_c1_s1_00319.jpg,24504,24504,李树华,1,319 +24504_c1_s1_00320.jpg,24504,24504,李树华,1,320 +24504_c1_s1_00321.jpg,24504,24504,李树华,1,321 +24504_c1_s1_00322.jpg,24504,24504,李树华,1,322 +24504_c1_s1_00323.jpg,24504,24504,李树华,1,323 +24504_c1_s1_00324.jpg,24504,24504,李树华,1,324 +24504_c1_s1_00325.jpg,24504,24504,李树华,1,325 +24504_c1_s1_00326.jpg,24504,24504,李树华,1,326 +24504_c1_s1_00327.jpg,24504,24504,李树华,1,327 +24504_c1_s1_00328.jpg,24504,24504,李树华,1,328 +24504_c1_s1_00329.jpg,24504,24504,李树华,1,329 +24504_c1_s1_00330.jpg,24504,24504,李树华,1,330 +24504_c1_s1_00331.jpg,24504,24504,李树华,1,331 +24504_c1_s1_00332.jpg,24504,24504,李树华,1,332 +24504_c1_s1_00333.jpg,24504,24504,李树华,1,333 +24504_c1_s1_00334.jpg,24504,24504,李树华,1,334 +24504_c1_s1_00335.jpg,24504,24504,李树华,1,335 +24504_c1_s1_00336.jpg,24504,24504,李树华,1,336 +24504_c1_s1_00337.jpg,24504,24504,李树华,1,337 +24504_c1_s1_00338.jpg,24504,24504,李树华,1,338 +24504_c1_s1_00339.jpg,24504,24504,李树华,1,339 +24504_c1_s1_00340.jpg,24504,24504,李树华,1,340 +24504_c2_s1_00341.jpg,24504,24504,李树华,2,341 +24504_c2_s1_00342.jpg,24504,24504,李树华,2,342 +24504_c2_s1_00343.jpg,24504,24504,李树华,2,343 +24504_c2_s1_00344.jpg,24504,24504,李树华,2,344 +24504_c2_s1_00345.jpg,24504,24504,李树华,2,345 +24504_c2_s1_00346.jpg,24504,24504,李树华,2,346 +24504_c2_s1_00347.jpg,24504,24504,李树华,2,347 +24504_c2_s1_00348.jpg,24504,24504,李树华,2,348 +24504_c2_s1_00349.jpg,24504,24504,李树华,2,349 +24504_c2_s1_00350.jpg,24504,24504,李树华,2,350 +24504_c2_s1_00351.jpg,24504,24504,李树华,2,351 +24504_c2_s1_00352.jpg,24504,24504,李树华,2,352 +24504_c2_s1_00353.jpg,24504,24504,李树华,2,353 +24504_c2_s1_00354.jpg,24504,24504,李树华,2,354 +24504_c2_s1_00355.jpg,24504,24504,李树华,2,355 +24504_c2_s1_00356.jpg,24504,24504,李树华,2,356 +24504_c2_s1_00357.jpg,24504,24504,李树华,2,357 +24504_c2_s1_00358.jpg,24504,24504,李树华,2,358 +24504_c2_s1_00359.jpg,24504,24504,李树华,2,359 +24504_c2_s1_00360.jpg,24504,24504,李树华,2,360 +24504_c2_s1_00361.jpg,24504,24504,李树华,2,361 +24504_c2_s1_00362.jpg,24504,24504,李树华,2,362 +24504_c2_s1_00363.jpg,24504,24504,李树华,2,363 +24504_c2_s1_00364.jpg,24504,24504,李树华,2,364 +24504_c2_s1_00365.jpg,24504,24504,李树华,2,365 +24504_c2_s1_00366.jpg,24504,24504,李树华,2,366 +24504_c2_s1_00367.jpg,24504,24504,李树华,2,367 +24504_c2_s1_00368.jpg,24504,24504,李树华,2,368 +24504_c2_s1_00369.jpg,24504,24504,李树华,2,369 +24504_c2_s1_00370.jpg,24504,24504,李树华,2,370 +24504_c2_s1_00371.jpg,24504,24504,李树华,2,371 +24504_c2_s1_00372.jpg,24504,24504,李树华,2,372 +24504_c2_s1_00373.jpg,24504,24504,李树华,2,373 +24504_c2_s1_00374.jpg,24504,24504,李树华,2,374 +24504_c2_s1_00375.jpg,24504,24504,李树华,2,375 +24504_c2_s1_00376.jpg,24504,24504,李树华,2,376 +24504_c2_s1_00377.jpg,24504,24504,李树华,2,377 +24504_c2_s1_00378.jpg,24504,24504,李树华,2,378 +24504_c2_s1_00379.jpg,24504,24504,李树华,2,379 +24504_c2_s1_00380.jpg,24504,24504,李树华,2,380 +24504_c2_s1_00381.jpg,24504,24504,李树华,2,381 +24504_c2_s1_00382.jpg,24504,24504,李树华,2,382 +24504_c3_s1_00383.jpg,24504,24504,李树华,3,383 +24504_c3_s1_00384.jpg,24504,24504,李树华,3,384 +24504_c3_s1_00385.jpg,24504,24504,李树华,3,385 +24504_c3_s1_00386.jpg,24504,24504,李树华,3,386 +24504_c3_s1_00387.jpg,24504,24504,李树华,3,387 +24504_c3_s1_00388.jpg,24504,24504,李树华,3,388 +24504_c3_s1_00389.jpg,24504,24504,李树华,3,389 +24504_c3_s1_00390.jpg,24504,24504,李树华,3,390 +24504_c3_s1_00391.jpg,24504,24504,李树华,3,391 +24504_c3_s1_00392.jpg,24504,24504,李树华,3,392 +24504_c3_s1_00393.jpg,24504,24504,李树华,3,393 +24504_c3_s1_00394.jpg,24504,24504,李树华,3,394 +24504_c3_s1_00395.jpg,24504,24504,李树华,3,395 +24504_c3_s1_00396.jpg,24504,24504,李树华,3,396 +24504_c3_s1_00397.jpg,24504,24504,李树华,3,397 +24504_c3_s1_00398.jpg,24504,24504,李树华,3,398 +24504_c3_s1_00399.jpg,24504,24504,李树华,3,399 +24504_c3_s1_00400.jpg,24504,24504,李树华,3,400 +24504_c3_s1_00401.jpg,24504,24504,李树华,3,401 +24504_c3_s1_00402.jpg,24504,24504,李树华,3,402 +24504_c3_s1_00403.jpg,24504,24504,李树华,3,403 +24504_c3_s1_00404.jpg,24504,24504,李树华,3,404 +24504_c3_s1_00405.jpg,24504,24504,李树华,3,405 +24504_c3_s1_00406.jpg,24504,24504,李树华,3,406 +24504_c3_s1_00407.jpg,24504,24504,李树华,3,407 +24504_c3_s1_00408.jpg,24504,24504,李树华,3,408 +24504_c3_s1_00409.jpg,24504,24504,李树华,3,409 +24504_c3_s1_00410.jpg,24504,24504,李树华,3,410 +24504_c3_s1_00411.jpg,24504,24504,李树华,3,411 +24504_c3_s1_00412.jpg,24504,24504,李树华,3,412 +24504_c3_s1_00413.jpg,24504,24504,李树华,3,413 +24504_c3_s1_00414.jpg,24504,24504,李树华,3,414 +24504_c3_s1_00415.jpg,24504,24504,李树华,3,415 +24504_c3_s1_00416.jpg,24504,24504,李树华,3,416 +24504_c3_s1_00417.jpg,24504,24504,李树华,3,417 +24504_c3_s1_00418.jpg,24504,24504,李树华,3,418 +24504_c3_s1_00419.jpg,24504,24504,李树华,3,419 +24504_c3_s1_00420.jpg,24504,24504,李树华,3,420 +24505_c1_s1_00421.jpg,24505,24505,刘杰,1,421 +24505_c1_s1_00422.jpg,24505,24505,刘杰,1,422 +24505_c1_s1_00423.jpg,24505,24505,刘杰,1,423 +24505_c1_s1_00424.jpg,24505,24505,刘杰,1,424 +24505_c1_s1_00425.jpg,24505,24505,刘杰,1,425 +24505_c1_s1_00426.jpg,24505,24505,刘杰,1,426 +24505_c1_s1_00427.jpg,24505,24505,刘杰,1,427 +24505_c1_s1_00428.jpg,24505,24505,刘杰,1,428 +24505_c1_s1_00429.jpg,24505,24505,刘杰,1,429 +24505_c1_s1_00430.jpg,24505,24505,刘杰,1,430 +24505_c1_s1_00431.jpg,24505,24505,刘杰,1,431 +24505_c1_s1_00432.jpg,24505,24505,刘杰,1,432 +24505_c1_s1_00433.jpg,24505,24505,刘杰,1,433 +24505_c1_s1_00434.jpg,24505,24505,刘杰,1,434 +24505_c1_s1_00435.jpg,24505,24505,刘杰,1,435 +24505_c1_s1_00436.jpg,24505,24505,刘杰,1,436 +24505_c1_s1_00437.jpg,24505,24505,刘杰,1,437 +24505_c1_s1_00438.jpg,24505,24505,刘杰,1,438 +24505_c1_s1_00439.jpg,24505,24505,刘杰,1,439 +24505_c1_s1_00440.jpg,24505,24505,刘杰,1,440 +24505_c1_s1_00441.jpg,24505,24505,刘杰,1,441 +24505_c1_s1_00442.jpg,24505,24505,刘杰,1,442 +24505_c1_s1_00443.jpg,24505,24505,刘杰,1,443 +24505_c1_s1_00444.jpg,24505,24505,刘杰,1,444 +24505_c1_s1_00445.jpg,24505,24505,刘杰,1,445 +24505_c1_s1_00446.jpg,24505,24505,刘杰,1,446 +24505_c1_s1_00447.jpg,24505,24505,刘杰,1,447 +24505_c1_s1_00448.jpg,24505,24505,刘杰,1,448 +24505_c1_s1_00449.jpg,24505,24505,刘杰,1,449 +24505_c1_s1_00450.jpg,24505,24505,刘杰,1,450 +24505_c1_s1_00451.jpg,24505,24505,刘杰,1,451 +24505_c1_s1_00452.jpg,24505,24505,刘杰,1,452 +24505_c1_s1_00453.jpg,24505,24505,刘杰,1,453 +24505_c1_s1_00454.jpg,24505,24505,刘杰,1,454 +24505_c1_s1_00455.jpg,24505,24505,刘杰,1,455 +24505_c1_s1_00456.jpg,24505,24505,刘杰,1,456 +24505_c1_s1_00457.jpg,24505,24505,刘杰,1,457 +24505_c1_s1_00458.jpg,24505,24505,刘杰,1,458 +24505_c1_s1_00459.jpg,24505,24505,刘杰,1,459 +24505_c1_s1_00460.jpg,24505,24505,刘杰,1,460 +24505_c1_s1_00461.jpg,24505,24505,刘杰,1,461 +24505_c1_s1_00462.jpg,24505,24505,刘杰,1,462 +24505_c1_s1_00463.jpg,24505,24505,刘杰,1,463 +24505_c1_s1_00464.jpg,24505,24505,刘杰,1,464 +24505_c1_s1_00465.jpg,24505,24505,刘杰,1,465 +24505_c1_s1_00466.jpg,24505,24505,刘杰,1,466 +24505_c1_s1_00467.jpg,24505,24505,刘杰,1,467 +24505_c1_s1_00468.jpg,24505,24505,刘杰,1,468 +24505_c1_s1_00469.jpg,24505,24505,刘杰,1,469 +24505_c1_s1_00470.jpg,24505,24505,刘杰,1,470 +24505_c1_s1_00471.jpg,24505,24505,刘杰,1,471 +24505_c1_s1_00472.jpg,24505,24505,刘杰,1,472 +24505_c1_s1_00473.jpg,24505,24505,刘杰,1,473 +24505_c1_s1_00474.jpg,24505,24505,刘杰,1,474 +24505_c1_s1_00475.jpg,24505,24505,刘杰,1,475 +24505_c1_s1_00476.jpg,24505,24505,刘杰,1,476 +24505_c1_s1_00477.jpg,24505,24505,刘杰,1,477 +24505_c1_s1_00478.jpg,24505,24505,刘杰,1,478 +24505_c1_s1_00479.jpg,24505,24505,刘杰,1,479 +24505_c1_s1_00480.jpg,24505,24505,刘杰,1,480 +24505_c1_s1_00481.jpg,24505,24505,刘杰,1,481 +24505_c1_s1_00482.jpg,24505,24505,刘杰,1,482 +24505_c1_s1_00483.jpg,24505,24505,刘杰,1,483 +24505_c2_s1_00484.jpg,24505,24505,刘杰,2,484 +24505_c2_s1_00485.jpg,24505,24505,刘杰,2,485 +24505_c2_s1_00486.jpg,24505,24505,刘杰,2,486 +24505_c2_s1_00487.jpg,24505,24505,刘杰,2,487 +24505_c2_s1_00488.jpg,24505,24505,刘杰,2,488 +24505_c2_s1_00489.jpg,24505,24505,刘杰,2,489 +24505_c2_s1_00490.jpg,24505,24505,刘杰,2,490 +24505_c2_s1_00491.jpg,24505,24505,刘杰,2,491 +24505_c2_s1_00492.jpg,24505,24505,刘杰,2,492 +24505_c2_s1_00493.jpg,24505,24505,刘杰,2,493 +24505_c2_s1_00494.jpg,24505,24505,刘杰,2,494 +24505_c2_s1_00495.jpg,24505,24505,刘杰,2,495 +24505_c2_s1_00496.jpg,24505,24505,刘杰,2,496 +24505_c2_s1_00497.jpg,24505,24505,刘杰,2,497 +24505_c2_s1_00498.jpg,24505,24505,刘杰,2,498 +24505_c2_s1_00499.jpg,24505,24505,刘杰,2,499 +24505_c2_s1_00500.jpg,24505,24505,刘杰,2,500 +24505_c2_s1_00501.jpg,24505,24505,刘杰,2,501 +24505_c2_s1_00502.jpg,24505,24505,刘杰,2,502 +24505_c2_s1_00503.jpg,24505,24505,刘杰,2,503 +24505_c2_s1_00504.jpg,24505,24505,刘杰,2,504 +24505_c2_s1_00505.jpg,24505,24505,刘杰,2,505 +24505_c2_s1_00506.jpg,24505,24505,刘杰,2,506 +24505_c2_s1_00507.jpg,24505,24505,刘杰,2,507 +24505_c2_s1_00508.jpg,24505,24505,刘杰,2,508 +24505_c2_s1_00509.jpg,24505,24505,刘杰,2,509 +24505_c2_s1_00510.jpg,24505,24505,刘杰,2,510 +24505_c2_s1_00511.jpg,24505,24505,刘杰,2,511 +24505_c2_s1_00512.jpg,24505,24505,刘杰,2,512 +24505_c2_s1_00513.jpg,24505,24505,刘杰,2,513 +24505_c2_s1_00514.jpg,24505,24505,刘杰,2,514 +24505_c2_s1_00515.jpg,24505,24505,刘杰,2,515 +24505_c2_s1_00516.jpg,24505,24505,刘杰,2,516 +24505_c2_s1_00517.jpg,24505,24505,刘杰,2,517 +24505_c2_s1_00518.jpg,24505,24505,刘杰,2,518 +24505_c2_s1_00519.jpg,24505,24505,刘杰,2,519 +24505_c2_s1_00520.jpg,24505,24505,刘杰,2,520 +24505_c2_s1_00521.jpg,24505,24505,刘杰,2,521 +24505_c2_s1_00522.jpg,24505,24505,刘杰,2,522 +24505_c2_s1_00523.jpg,24505,24505,刘杰,2,523 +24505_c2_s1_00524.jpg,24505,24505,刘杰,2,524 +24505_c2_s1_00525.jpg,24505,24505,刘杰,2,525 +24505_c2_s1_00526.jpg,24505,24505,刘杰,2,526 +24505_c2_s1_00527.jpg,24505,24505,刘杰,2,527 +24505_c2_s1_00528.jpg,24505,24505,刘杰,2,528 +24505_c2_s1_00529.jpg,24505,24505,刘杰,2,529 +24505_c2_s1_00530.jpg,24505,24505,刘杰,2,530 +24505_c2_s1_00531.jpg,24505,24505,刘杰,2,531 +24505_c2_s1_00532.jpg,24505,24505,刘杰,2,532 +24505_c2_s1_00533.jpg,24505,24505,刘杰,2,533 +24505_c2_s1_00534.jpg,24505,24505,刘杰,2,534 +24505_c2_s1_00535.jpg,24505,24505,刘杰,2,535 +24505_c2_s1_00536.jpg,24505,24505,刘杰,2,536 +24505_c2_s1_00537.jpg,24505,24505,刘杰,2,537 +24505_c2_s1_00538.jpg,24505,24505,刘杰,2,538 +24505_c2_s1_00539.jpg,24505,24505,刘杰,2,539 +24505_c2_s1_00540.jpg,24505,24505,刘杰,2,540 +24505_c2_s1_00541.jpg,24505,24505,刘杰,2,541 +24505_c2_s1_00542.jpg,24505,24505,刘杰,2,542 +24505_c2_s1_00543.jpg,24505,24505,刘杰,2,543 +24505_c2_s1_00544.jpg,24505,24505,刘杰,2,544 +24505_c2_s1_00545.jpg,24505,24505,刘杰,2,545 +24505_c2_s1_00546.jpg,24505,24505,刘杰,2,546 +24505_c2_s1_00547.jpg,24505,24505,刘杰,2,547 +24505_c2_s1_00548.jpg,24505,24505,刘杰,2,548 +24505_c2_s1_00549.jpg,24505,24505,刘杰,2,549 +24505_c2_s1_00550.jpg,24505,24505,刘杰,2,550 +24505_c2_s1_00551.jpg,24505,24505,刘杰,2,551 +24505_c2_s1_00552.jpg,24505,24505,刘杰,2,552 +24505_c2_s1_00553.jpg,24505,24505,刘杰,2,553 +24505_c2_s1_00554.jpg,24505,24505,刘杰,2,554 +24505_c2_s1_00555.jpg,24505,24505,刘杰,2,555 +24505_c3_s1_00556.jpg,24505,24505,刘杰,3,556 +24505_c3_s1_00557.jpg,24505,24505,刘杰,3,557 +24505_c3_s1_00558.jpg,24505,24505,刘杰,3,558 +24505_c3_s1_00559.jpg,24505,24505,刘杰,3,559 +24505_c3_s1_00560.jpg,24505,24505,刘杰,3,560 +24505_c3_s1_00561.jpg,24505,24505,刘杰,3,561 +24505_c3_s1_00562.jpg,24505,24505,刘杰,3,562 +24505_c3_s1_00563.jpg,24505,24505,刘杰,3,563 +24505_c3_s1_00564.jpg,24505,24505,刘杰,3,564 +24505_c3_s1_00565.jpg,24505,24505,刘杰,3,565 +24505_c3_s1_00566.jpg,24505,24505,刘杰,3,566 +24505_c3_s1_00567.jpg,24505,24505,刘杰,3,567 +24505_c3_s1_00568.jpg,24505,24505,刘杰,3,568 +24505_c3_s1_00569.jpg,24505,24505,刘杰,3,569 +24505_c3_s1_00570.jpg,24505,24505,刘杰,3,570 +24505_c3_s1_00571.jpg,24505,24505,刘杰,3,571 +24505_c3_s1_00572.jpg,24505,24505,刘杰,3,572 +24505_c3_s1_00573.jpg,24505,24505,刘杰,3,573 +24505_c3_s1_00574.jpg,24505,24505,刘杰,3,574 +24505_c3_s1_00575.jpg,24505,24505,刘杰,3,575 +24505_c3_s1_00576.jpg,24505,24505,刘杰,3,576 +24505_c3_s1_00577.jpg,24505,24505,刘杰,3,577 +24505_c3_s1_00578.jpg,24505,24505,刘杰,3,578 +24505_c3_s1_00579.jpg,24505,24505,刘杰,3,579 +24505_c3_s1_00580.jpg,24505,24505,刘杰,3,580 +24505_c3_s1_00581.jpg,24505,24505,刘杰,3,581 +24505_c3_s1_00582.jpg,24505,24505,刘杰,3,582 +24505_c3_s1_00583.jpg,24505,24505,刘杰,3,583 +24505_c3_s1_00584.jpg,24505,24505,刘杰,3,584 +24505_c3_s1_00585.jpg,24505,24505,刘杰,3,585 +24505_c3_s1_00586.jpg,24505,24505,刘杰,3,586 +24505_c3_s1_00587.jpg,24505,24505,刘杰,3,587 +24505_c3_s1_00588.jpg,24505,24505,刘杰,3,588 +24505_c3_s1_00589.jpg,24505,24505,刘杰,3,589 +24505_c3_s1_00590.jpg,24505,24505,刘杰,3,590 +24505_c3_s1_00591.jpg,24505,24505,刘杰,3,591 +24505_c3_s1_00592.jpg,24505,24505,刘杰,3,592 +24505_c3_s1_00593.jpg,24505,24505,刘杰,3,593 +24505_c3_s1_00594.jpg,24505,24505,刘杰,3,594 +24505_c3_s1_00595.jpg,24505,24505,刘杰,3,595 +24505_c3_s1_00596.jpg,24505,24505,刘杰,3,596 +24505_c3_s1_00597.jpg,24505,24505,刘杰,3,597 +24505_c3_s1_00598.jpg,24505,24505,刘杰,3,598 +24505_c3_s1_00599.jpg,24505,24505,刘杰,3,599 +24505_c3_s1_00600.jpg,24505,24505,刘杰,3,600 +24505_c3_s1_00601.jpg,24505,24505,刘杰,3,601 +24505_c3_s1_00602.jpg,24505,24505,刘杰,3,602 +24505_c3_s1_00603.jpg,24505,24505,刘杰,3,603 +24505_c3_s1_00604.jpg,24505,24505,刘杰,3,604 +24505_c3_s1_00605.jpg,24505,24505,刘杰,3,605 +24505_c3_s1_00606.jpg,24505,24505,刘杰,3,606 +24505_c3_s1_00607.jpg,24505,24505,刘杰,3,607 +24505_c3_s1_00608.jpg,24505,24505,刘杰,3,608 +24505_c3_s1_00609.jpg,24505,24505,刘杰,3,609 +24505_c3_s1_00610.jpg,24505,24505,刘杰,3,610 +24505_c3_s1_00611.jpg,24505,24505,刘杰,3,611 +24505_c3_s1_00612.jpg,24505,24505,刘杰,3,612 +24505_c3_s1_00613.jpg,24505,24505,刘杰,3,613 +24505_c3_s1_00614.jpg,24505,24505,刘杰,3,614 +24505_c3_s1_00615.jpg,24505,24505,刘杰,3,615 +24505_c3_s1_00616.jpg,24505,24505,刘杰,3,616 +24505_c3_s1_00617.jpg,24505,24505,刘杰,3,617 +24505_c3_s1_00618.jpg,24505,24505,刘杰,3,618 +24505_c3_s1_00619.jpg,24505,24505,刘杰,3,619 +24505_c3_s1_00620.jpg,24505,24505,刘杰,3,620 +24505_c3_s1_00621.jpg,24505,24505,刘杰,3,621 +24505_c3_s1_00622.jpg,24505,24505,刘杰,3,622 +24506_c1_s1_00623.jpg,24506,24506,黄伟斌,1,623 +24506_c1_s1_00624.jpg,24506,24506,黄伟斌,1,624 +24506_c1_s1_00625.jpg,24506,24506,黄伟斌,1,625 +24506_c1_s1_00626.jpg,24506,24506,黄伟斌,1,626 +24506_c1_s1_00627.jpg,24506,24506,黄伟斌,1,627 +24506_c1_s1_00628.jpg,24506,24506,黄伟斌,1,628 +24506_c1_s1_00629.jpg,24506,24506,黄伟斌,1,629 +24506_c1_s1_00630.jpg,24506,24506,黄伟斌,1,630 +24506_c1_s1_00631.jpg,24506,24506,黄伟斌,1,631 +24506_c1_s1_00632.jpg,24506,24506,黄伟斌,1,632 +24506_c1_s1_00633.jpg,24506,24506,黄伟斌,1,633 +24506_c1_s1_00634.jpg,24506,24506,黄伟斌,1,634 +24506_c1_s1_00635.jpg,24506,24506,黄伟斌,1,635 +24506_c1_s1_00636.jpg,24506,24506,黄伟斌,1,636 +24506_c1_s1_00637.jpg,24506,24506,黄伟斌,1,637 +24506_c1_s1_00638.jpg,24506,24506,黄伟斌,1,638 +24506_c1_s1_00639.jpg,24506,24506,黄伟斌,1,639 +24506_c1_s1_00640.jpg,24506,24506,黄伟斌,1,640 +24506_c1_s1_00641.jpg,24506,24506,黄伟斌,1,641 +24506_c1_s1_00642.jpg,24506,24506,黄伟斌,1,642 +24506_c1_s1_00643.jpg,24506,24506,黄伟斌,1,643 +24506_c1_s1_00644.jpg,24506,24506,黄伟斌,1,644 +24506_c1_s1_00645.jpg,24506,24506,黄伟斌,1,645 +24506_c1_s1_00646.jpg,24506,24506,黄伟斌,1,646 +24506_c1_s1_00647.jpg,24506,24506,黄伟斌,1,647 +24506_c1_s1_00648.jpg,24506,24506,黄伟斌,1,648 +24506_c1_s1_00649.jpg,24506,24506,黄伟斌,1,649 +24506_c1_s1_00650.jpg,24506,24506,黄伟斌,1,650 +24506_c1_s1_00651.jpg,24506,24506,黄伟斌,1,651 +24506_c1_s1_00652.jpg,24506,24506,黄伟斌,1,652 +24506_c1_s1_00653.jpg,24506,24506,黄伟斌,1,653 +24506_c1_s1_00654.jpg,24506,24506,黄伟斌,1,654 +24506_c1_s1_00655.jpg,24506,24506,黄伟斌,1,655 +24506_c1_s1_00656.jpg,24506,24506,黄伟斌,1,656 +24506_c1_s1_00657.jpg,24506,24506,黄伟斌,1,657 +24506_c1_s1_00658.jpg,24506,24506,黄伟斌,1,658 +24506_c2_s1_00659.jpg,24506,24506,黄伟斌,2,659 +24506_c2_s1_00660.jpg,24506,24506,黄伟斌,2,660 +24506_c2_s1_00661.jpg,24506,24506,黄伟斌,2,661 +24506_c2_s1_00662.jpg,24506,24506,黄伟斌,2,662 +24506_c2_s1_00663.jpg,24506,24506,黄伟斌,2,663 +24506_c2_s1_00664.jpg,24506,24506,黄伟斌,2,664 +24506_c2_s1_00665.jpg,24506,24506,黄伟斌,2,665 +24506_c2_s1_00666.jpg,24506,24506,黄伟斌,2,666 +24506_c2_s1_00667.jpg,24506,24506,黄伟斌,2,667 +24506_c2_s1_00668.jpg,24506,24506,黄伟斌,2,668 +24506_c2_s1_00669.jpg,24506,24506,黄伟斌,2,669 +24506_c2_s1_00670.jpg,24506,24506,黄伟斌,2,670 +24506_c2_s1_00671.jpg,24506,24506,黄伟斌,2,671 +24506_c2_s1_00672.jpg,24506,24506,黄伟斌,2,672 +24506_c2_s1_00673.jpg,24506,24506,黄伟斌,2,673 +24506_c2_s1_00674.jpg,24506,24506,黄伟斌,2,674 +24506_c2_s1_00675.jpg,24506,24506,黄伟斌,2,675 +24506_c2_s1_00676.jpg,24506,24506,黄伟斌,2,676 +24506_c2_s1_00677.jpg,24506,24506,黄伟斌,2,677 +24506_c2_s1_00678.jpg,24506,24506,黄伟斌,2,678 +24506_c2_s1_00679.jpg,24506,24506,黄伟斌,2,679 +24506_c2_s1_00680.jpg,24506,24506,黄伟斌,2,680 +24506_c2_s1_00681.jpg,24506,24506,黄伟斌,2,681 +24506_c2_s1_00682.jpg,24506,24506,黄伟斌,2,682 +24506_c2_s1_00683.jpg,24506,24506,黄伟斌,2,683 +24506_c2_s1_00684.jpg,24506,24506,黄伟斌,2,684 +24506_c2_s1_00685.jpg,24506,24506,黄伟斌,2,685 +24506_c2_s1_00686.jpg,24506,24506,黄伟斌,2,686 +24506_c2_s1_00687.jpg,24506,24506,黄伟斌,2,687 +24506_c2_s1_00688.jpg,24506,24506,黄伟斌,2,688 +24506_c2_s1_00689.jpg,24506,24506,黄伟斌,2,689 +24506_c2_s1_00690.jpg,24506,24506,黄伟斌,2,690 +24506_c2_s1_00691.jpg,24506,24506,黄伟斌,2,691 +24506_c2_s1_00692.jpg,24506,24506,黄伟斌,2,692 +24506_c2_s1_00693.jpg,24506,24506,黄伟斌,2,693 +24506_c2_s1_00694.jpg,24506,24506,黄伟斌,2,694 +24506_c2_s1_00695.jpg,24506,24506,黄伟斌,2,695 +24506_c2_s1_00696.jpg,24506,24506,黄伟斌,2,696 +24506_c2_s1_00697.jpg,24506,24506,黄伟斌,2,697 +24506_c2_s1_00698.jpg,24506,24506,黄伟斌,2,698 +24506_c2_s1_00699.jpg,24506,24506,黄伟斌,2,699 +24506_c2_s1_00700.jpg,24506,24506,黄伟斌,2,700 +24506_c2_s1_00701.jpg,24506,24506,黄伟斌,2,701 +24506_c2_s1_00702.jpg,24506,24506,黄伟斌,2,702 +24506_c2_s1_00703.jpg,24506,24506,黄伟斌,2,703 +24506_c2_s1_00704.jpg,24506,24506,黄伟斌,2,704 +24506_c2_s1_00705.jpg,24506,24506,黄伟斌,2,705 +24506_c2_s1_00706.jpg,24506,24506,黄伟斌,2,706 +24506_c2_s1_00707.jpg,24506,24506,黄伟斌,2,707 +24506_c2_s1_00708.jpg,24506,24506,黄伟斌,2,708 +24506_c2_s1_00709.jpg,24506,24506,黄伟斌,2,709 +24506_c2_s1_00710.jpg,24506,24506,黄伟斌,2,710 +24506_c2_s1_00711.jpg,24506,24506,黄伟斌,2,711 +24506_c2_s1_00712.jpg,24506,24506,黄伟斌,2,712 +24506_c2_s1_00713.jpg,24506,24506,黄伟斌,2,713 +24506_c2_s1_00714.jpg,24506,24506,黄伟斌,2,714 +24506_c2_s1_00715.jpg,24506,24506,黄伟斌,2,715 +24506_c2_s1_00716.jpg,24506,24506,黄伟斌,2,716 +24506_c2_s1_00717.jpg,24506,24506,黄伟斌,2,717 +24506_c2_s1_00718.jpg,24506,24506,黄伟斌,2,718 +24506_c3_s1_00719.jpg,24506,24506,黄伟斌,3,719 +24506_c3_s1_00720.jpg,24506,24506,黄伟斌,3,720 +24506_c3_s1_00721.jpg,24506,24506,黄伟斌,3,721 +24506_c3_s1_00722.jpg,24506,24506,黄伟斌,3,722 +24506_c3_s1_00723.jpg,24506,24506,黄伟斌,3,723 +24506_c3_s1_00724.jpg,24506,24506,黄伟斌,3,724 +24506_c3_s1_00725.jpg,24506,24506,黄伟斌,3,725 +24506_c3_s1_00726.jpg,24506,24506,黄伟斌,3,726 +24506_c3_s1_00727.jpg,24506,24506,黄伟斌,3,727 +24506_c3_s1_00728.jpg,24506,24506,黄伟斌,3,728 +24506_c3_s1_00729.jpg,24506,24506,黄伟斌,3,729 +24506_c3_s1_00730.jpg,24506,24506,黄伟斌,3,730 +24506_c3_s1_00731.jpg,24506,24506,黄伟斌,3,731 +24506_c3_s1_00732.jpg,24506,24506,黄伟斌,3,732 +24506_c3_s1_00733.jpg,24506,24506,黄伟斌,3,733 +24506_c3_s1_00734.jpg,24506,24506,黄伟斌,3,734 +24506_c3_s1_00735.jpg,24506,24506,黄伟斌,3,735 +24506_c3_s1_00736.jpg,24506,24506,黄伟斌,3,736 +24506_c3_s1_00737.jpg,24506,24506,黄伟斌,3,737 +24506_c3_s1_00738.jpg,24506,24506,黄伟斌,3,738 +24506_c3_s1_00739.jpg,24506,24506,黄伟斌,3,739 +24506_c3_s1_00740.jpg,24506,24506,黄伟斌,3,740 +24506_c3_s1_00741.jpg,24506,24506,黄伟斌,3,741 +24506_c3_s1_00742.jpg,24506,24506,黄伟斌,3,742 +24506_c3_s1_00743.jpg,24506,24506,黄伟斌,3,743 +24506_c3_s1_00744.jpg,24506,24506,黄伟斌,3,744 +24506_c3_s1_00745.jpg,24506,24506,黄伟斌,3,745 +24506_c3_s1_00746.jpg,24506,24506,黄伟斌,3,746 +24506_c3_s1_00747.jpg,24506,24506,黄伟斌,3,747 +24506_c3_s1_00748.jpg,24506,24506,黄伟斌,3,748 +24506_c3_s1_00749.jpg,24506,24506,黄伟斌,3,749 +24506_c3_s1_00750.jpg,24506,24506,黄伟斌,3,750 +24506_c3_s1_00751.jpg,24506,24506,黄伟斌,3,751 +24506_c3_s1_00752.jpg,24506,24506,黄伟斌,3,752 +24506_c3_s1_00753.jpg,24506,24506,黄伟斌,3,753 +24506_c3_s1_00754.jpg,24506,24506,黄伟斌,3,754 +24506_c3_s1_00755.jpg,24506,24506,黄伟斌,3,755 +24506_c3_s1_00756.jpg,24506,24506,黄伟斌,3,756 +24506_c3_s1_00757.jpg,24506,24506,黄伟斌,3,757 +24506_c3_s1_00758.jpg,24506,24506,黄伟斌,3,758 +24506_c3_s1_00759.jpg,24506,24506,黄伟斌,3,759 diff --git a/input/.gitkeep b/input/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/input/视频中的商品信息表.xlsx b/input/视频中的商品信息表.xlsx new file mode 100644 index 0000000..b2471db Binary files /dev/null and b/input/视频中的商品信息表.xlsx differ diff --git a/main_basket.py b/main_basket.py new file mode 100644 index 0000000..5f16748 --- /dev/null +++ b/main_basket.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 +"""篮子接触分段入口:OpenCV 框选篮子 → 手篮接触上升沿 → Phase2(跳过 ActionFormer)。""" +from __future__ import annotations + +import argparse +import os +import sys +from pathlib import Path + +PACK_ROOT = Path(__file__).resolve().parent +sys.path.insert(0, str(PACK_ROOT / "src")) + +from paths import ensure_code_on_path + +ensure_code_on_path(PACK_ROOT) + +from config import load_run_config +from orchestrator import run_basket_pipeline + + +def main() -> int: + os.environ.setdefault("OPENCV_FFMPEG_LOGLEVEL", "8") + ap = argparse.ArgumentParser( + description="手术室耗材篮子接触分段主流程(跳过 ActionFormer)" + ) + ap.add_argument("--video", type=Path, required=True, help="输入 MP4") + ap.add_argument( + "--excel", + type=Path, + required=True, + help="商品表 Excel(C 列白名单 + 产品编码)", + ) + ap.add_argument("--out", type=Path, required=True, help="输出 TSV") + ap.add_argument( + "--config", + type=Path, + default=PACK_ROOT / "configs" / "default_config.yaml", + help="继承 weights / phase2 / classification / doctor / basket 的 YAML", + ) + ap.add_argument( + "--save-basket-roi", + type=Path, + default=None, + help="框选后将 ROI 保存为 JSON(可选;每次运行仍会先弹窗标框)", + ) + ap.add_argument( + "--det-conf", + type=float, + default=None, + help="篮子扫描手部检测置信度(默认读 yaml basket.det_conf)", + ) + ap.add_argument( + "--contact-iou-on", + type=float, + default=None, + help="篮子接触 IoU 进入阈值(默认读 yaml basket.contact_iou_on)", + ) + ap.add_argument( + "--contact-iou-off", + type=float, + default=None, + help="篮子接触 IoU 退出阈值(默认读 yaml basket.contact_iou_off)", + ) + ap.add_argument( + "--confirm-seconds", + type=float, + default=None, + help="连续接触确认时长(秒,默认 0.4)", + ) + ap.add_argument( + "--cooldown-seconds", + type=float, + default=None, + help="触发后绝对冷却时长(秒,默认 5.0)", + ) + ap.add_argument( + "--contact-iou-threshold", + type=float, + default=None, + help="手框与篮子 IoU 阈值(默认读 yaml basket.contact_iou_threshold)", + ) + ap.add_argument( + "--segment-start-offset-sec", + type=float, + default=None, + help="段起点相对接触时刻偏移(秒,默认 1 → contact+1)", + ) + ap.add_argument( + "--segment-end-offset-sec", + type=float, + default=None, + help="段终点相对接触时刻偏移(秒,默认 5 → contact+5)", + ) + ap.add_argument( + "--min-segment-sec", + type=float, + default=None, + help="截断后段长不足此值则丢弃(秒,默认 4.0;0 表示不过滤)", + ) + ap.add_argument( + "--scan-frame-stride", + type=int, + default=None, + help="全片接触扫描帧步长(默认 1)", + ) + args = ap.parse_args() + + cfg_path = args.config.resolve() + if not cfg_path.is_file(): + print("找不到配置:", cfg_path, file=sys.stderr) + return 1 + + run_cfg = load_run_config(PACK_ROOT, cfg_path) + run_cfg.video = args.video.resolve() + run_cfg.excel = args.excel.resolve() + run_cfg.out = args.out.resolve() + + # 每次运行均在首帧弹窗标框,不从 JSON / yaml 复用 ROI + run_cfg.basket_load_roi_json = None + run_cfg.basket_skip_roi_select = False + if args.save_basket_roi is not None: + run_cfg.basket_save_roi_json = args.save_basket_roi.resolve() + if args.det_conf is not None: + run_cfg.basket_det_conf = float(args.det_conf) + if args.contact_iou_on is not None: + run_cfg.basket_contact_iou_on = float(args.contact_iou_on) + if args.contact_iou_off is not None: + run_cfg.basket_contact_iou_off = float(args.contact_iou_off) + if args.confirm_seconds is not None: + run_cfg.basket_confirm_seconds = float(args.confirm_seconds) + if args.cooldown_seconds is not None: + run_cfg.basket_cooldown_seconds = float(args.cooldown_seconds) + if args.contact_iou_threshold is not None: + run_cfg.basket_contact_iou_threshold = float(args.contact_iou_threshold) + if args.segment_start_offset_sec is not None: + run_cfg.basket_segment_start_offset_sec = float(args.segment_start_offset_sec) + if args.segment_end_offset_sec is not None: + run_cfg.basket_segment_end_offset_sec = float(args.segment_end_offset_sec) + if args.min_segment_sec is not None: + run_cfg.basket_min_segment_sec = float(args.min_segment_sec) + if args.scan_frame_stride is not None: + run_cfg.basket_scan_frame_stride = int(args.scan_frame_stride) + + return int(run_basket_pipeline(run_cfg)) + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/main_basket_stream.py b/main_basket_stream.py new file mode 100644 index 0000000..79fb9bb --- /dev/null +++ b/main_basket_stream.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python3 +"""推流篮子耗材识别:弹窗框选 ROI → RTSP 逐帧触发 → 缓存 [contact+1,contact+6] → 耗材识别。""" +from __future__ import annotations + +import argparse +import os +import sys +from pathlib import Path + +PACK_ROOT = Path(__file__).resolve().parent +sys.path.insert(0, str(PACK_ROOT / "src")) + +from paths import ensure_code_on_path + +ensure_code_on_path(PACK_ROOT) + +from config import load_run_config +from stream_orchestrator import run_stream_pipeline + + +def main() -> int: + os.environ.setdefault("OPENCV_FFMPEG_LOGLEVEL", "8") + ap = argparse.ArgumentParser(description="推流篮子耗材识别(无撕膜)") + ap.add_argument( + "--rtsp", + type=str, + default=None, + help="RTSP/摄像头 URL;本地 mp4 也可用于测试", + ) + ap.add_argument( + "--excel", + type=Path, + required=True, + help="商品表 Excel(C 列白名单 + 产品编码)", + ) + ap.add_argument("--out", type=Path, required=True, help="输出 TSV(实时追加)") + ap.add_argument( + "--config", + type=Path, + default=PACK_ROOT / "configs" / "default_config.yaml", + help="配置文件", + ) + ap.add_argument( + "--save-basket-roi", + type=Path, + default=None, + help="框选后将 ROI 保存为 JSON(可选;每次运行仍会先弹窗标框)", + ) + ap.add_argument( + "--segment-start-offset-sec", + type=float, + default=None, + help="段起点相对 contact 偏移(默认读 yaml,与 basket 一致 → contact+1)", + ) + ap.add_argument( + "--segment-end-offset-sec", + type=float, + default=None, + help="段终点相对 contact 偏移(默认读 yaml,与 basket 一致 → contact+6,窗口 5s)", + ) + ap.add_argument( + "--min-segment-sec", + type=float, + default=None, + help="段长不足此值则丢弃(默认 4.0)", + ) + ap.add_argument( + "--ring-buffer-sec", + type=float, + default=None, + help="帧环形缓存时长(秒,默认 15)", + ) + ap.add_argument( + "--stream-fps", + type=float, + default=None, + help="RTSP 无 FPS 元数据时的假定帧率(默认 25)", + ) + args = ap.parse_args() + + cfg_path = args.config.resolve() + if not cfg_path.is_file(): + print("找不到配置:", cfg_path, file=sys.stderr) + return 1 + + run_cfg = load_run_config(PACK_ROOT, cfg_path) + run_cfg.excel = args.excel.resolve() + run_cfg.out = args.out.resolve() + + rtsp = args.rtsp or getattr(run_cfg, "stream_rtsp", None) + if not rtsp: + print("请指定 --rtsp 或在 yaml stream.rtsp 中配置", file=sys.stderr) + return 1 + run_cfg.stream_rtsp = str(rtsp) + + # 每次运行均在首帧弹窗标框,不从 JSON / yaml 复用 ROI + run_cfg.basket_load_roi_json = None + run_cfg.basket_skip_roi_select = False + if args.save_basket_roi is not None: + run_cfg.basket_save_roi_json = args.save_basket_roi.resolve() + if args.segment_start_offset_sec is not None: + run_cfg.stream_segment_start_offset_sec = float(args.segment_start_offset_sec) + if args.segment_end_offset_sec is not None: + run_cfg.stream_segment_end_offset_sec = float(args.segment_end_offset_sec) + if args.min_segment_sec is not None: + run_cfg.stream_min_segment_sec = float(args.min_segment_sec) + if args.ring_buffer_sec is not None: + run_cfg.stream_ring_buffer_sec = float(args.ring_buffer_sec) + if args.stream_fps is not None: + run_cfg.stream_fps = float(args.stream_fps) + + return int(run_stream_pipeline(run_cfg)) + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/main_segments_offline.py b/main_segments_offline.py new file mode 100644 index 0000000..c6c44f8 --- /dev/null +++ b/main_segments_offline.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 +"""按结果 TSV 时间段对离线视频做手检 → 耗材分类(跳过分段与撕膜,无好坏帧门控)。""" +from __future__ import annotations + +import argparse +import os +import sys +from pathlib import Path + +PACK_ROOT = Path(__file__).resolve().parent +sys.path.insert(0, str(PACK_ROOT / "src")) + +from paths import ensure_code_on_path + +ensure_code_on_path(PACK_ROOT) + +from config import load_run_config +from segments_offline_orchestrator import run_segments_offline_pipeline + + +def main() -> int: + os.environ.setdefault("OPENCV_FFMPEG_LOGLEVEL", "8") + ap = argparse.ArgumentParser( + description="TSV 时间段 → 离线视频段内耗材识别(无 ActionFormer / 无篮子分段 / 无撕膜)" + ) + ap.add_argument("--video", type=Path, required=True, help="输入 MP4") + ap.add_argument( + "--segments-tsv", + type=Path, + required=True, + help="含 start_sec/end_sec 的结果 TSV(如推流输出)", + ) + ap.add_argument( + "--excel", + type=Path, + required=True, + help="商品表 Excel(C 列白名单 + 产品编码)", + ) + ap.add_argument("--out", type=Path, required=True, help="输出 TSV") + ap.add_argument( + "--config", + type=Path, + default=PACK_ROOT / "configs" / "default_config.yaml", + help="配置文件", + ) + ap.add_argument( + "--skip-empty-segments", + action="store_true", + help="跳过 TSV 中 top1_name 为空或为失败文案的行", + ) + args = ap.parse_args() + + cfg_path = args.config.resolve() + if not cfg_path.is_file(): + print("找不到配置:", cfg_path, file=sys.stderr) + return 1 + + run_cfg = load_run_config(PACK_ROOT, cfg_path) + run_cfg.video = args.video.resolve() + run_cfg.excel = args.excel.resolve() + run_cfg.out = args.out.resolve() + run_cfg.segments_tsv = args.segments_tsv.resolve() + run_cfg.segments_skip_empty = bool(args.skip_empty_segments) + + return int(run_segments_offline_pipeline(run_cfg)) + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/output/.gitkeep b/output/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..37c6276 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,14 @@ +# 篮子离线 + 推流 — 运行依赖 +# 1. 先按 https://pytorch.org 安装与 CUDA 匹配的 torch / torchvision +# 2. pip install -r requirements.txt + +torch>=2.0.0 +torchvision>=0.15.0 +ultralytics>=8.0.0 +opencv-python>=4.8.0 +numpy>=1.23.0 +pandas>=2.0.0 +openpyxl>=3.1.0 +PyYAML>=6.0 +Pillow>=10.0.0 +mediapipe>=0.10.0 diff --git a/scripts/remux_hevc.sh b/scripts/remux_hevc.sh new file mode 100755 index 0000000..c1ba2c7 --- /dev/null +++ b/scripts/remux_hevc.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +# HEVC 主视角 MP4 转 H.264,供 VideoSwin 特征提取与 OpenCV 解码。 +# 用法: +# ./scripts/remux_hevc.sh /path/to/source.mp4 [output.mp4] +# 未指定输出时写入 input/remuxed/_h264.mp4 + +set -euo pipefail + +ROOT="$(cd "$(dirname "$0")/.." && pwd)" +SRC="${1:?用法: remux_hevc.sh [output.mp4]}" +STEM="$(basename "${SRC%.*}")" +OUT="${2:-${ROOT}/input/remuxed/${STEM}_h264.mp4}" + +mkdir -p "$(dirname "$OUT")" +echo "[remux] ${SRC} -> ${OUT}" +ffmpeg -y -i "$SRC" -c:v libx264 -preset ultrafast -crf 23 -an "$OUT" +echo "[done] ${OUT}" diff --git a/setup.sh b/setup.sh new file mode 100755 index 0000000..3ea6a85 --- /dev/null +++ b/setup.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +set -euo pipefail +cd "$(dirname "$0")" + +echo "=== 环境检查 ===" +if ! python3 -c "import tkinter" 2>/dev/null; then + echo "警告: 未检测到 python3-tk,框选篮子 ROI 会失败。" + echo " Ubuntu/Debian: sudo apt install python3-tk" +fi + +if command -v conda >/dev/null 2>&1; then + echo "检测到 conda。推荐: conda activate yolo && pip install -r requirements.txt" +else + echo "使用 venv 安装..." + python3 -m venv .venv + # shellcheck disable=SC1091 + source .venv/bin/activate + pip install -U pip + pip install -r requirements.txt +fi + +echo "" +echo "=== 权重检查 ===" +for w in hand_detect.pt goodbad_frame.pt haocai_classify.pt; do + test -f "weights/$w" && echo " OK weights/$w" || echo " 缺失 weights/$w" +done +test -f doctor_identity_package/doctor_info.pth && echo " OK doctor_info.pth" || echo " 缺失 doctor_info.pth" +test -f input/视频中的商品信息表.xlsx && echo " OK Excel" || echo " 缺失 Excel" + +echo "" +echo "安装说明见 README.md" diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..ebf7288 --- /dev/null +++ b/src/__init__.py @@ -0,0 +1 @@ +# pack 5.11 src package diff --git a/src/action_trigger_logic.py b/src/action_trigger_logic.py new file mode 100644 index 0000000..7cd35d6 --- /dev/null +++ b/src/action_trigger_logic.py @@ -0,0 +1,120 @@ +"""手篮接触 ActionTriggerLogic:帧防抖 + 上升沿 + 绝对冷却三道锁。""" +from __future__ import annotations + +from pipeline.hand_roi_merge import bbox_iou_xyxy + + +def max_hand_basket_iou( + hand_boxes: list[list[float]], basket_xyxy: list[float] +) -> float: + """任意一只手与篮子的最大 IoU;无手则 0.0。""" + if not hand_boxes: + return 0.0 + basket = [float(v) for v in basket_xyxy] + return max(bbox_iou_xyxy(hb, basket) for hb in hand_boxes) + + +def resolve_contact_iou_thresholds( + *, + contact_iou_threshold: float | None = None, + contact_iou_on: float | None = None, + contact_iou_off: float | None = None, +) -> tuple[float, float]: + """由 legacy 单阈值或显式 on/off 解析 IoU 滞回参数。""" + legacy = float(contact_iou_threshold if contact_iou_threshold is not None else 0.05) + iou_on = float(contact_iou_on if contact_iou_on is not None else legacy) + iou_off = float( + contact_iou_off if contact_iou_off is not None else max(legacy * 0.6, 0.01) + ) + if iou_off >= iou_on: + iou_off = max(iou_on - 0.02, 0.01) + return iou_on, iou_off + + +class ActionTriggerLogic: + """ + 基于 2D 防区的动作触发状态机。 + + 三道锁: + 1. 帧级防抖 — 连续 confirm_frames 帧滞回判定为接触才确认 + 2. 上升沿 — 单次接触会话仅触发一次 Start + 3. 绝对冷却 — 触发后 cooldown_seconds 内忽略一切信号 + """ + + def __init__( + self, + fps: float = 25, + confirm_seconds: float = 0.4, + cooldown_seconds: float = 5.0, + threshold_on: float = 0.08, + threshold_off: float = 0.03, + ) -> None: + self.fps = float(fps) + self.confirm_seconds = float(confirm_seconds) + self.cooldown_seconds = float(cooldown_seconds) + self.threshold_on = float(threshold_on) + self.threshold_off = float(threshold_off) + if self.threshold_off >= self.threshold_on: + self.threshold_off = max(self.threshold_on - 0.02, 0.01) + + self._confirm_frames = max(1, int(round(self.confirm_seconds * self.fps))) + self._overlap_counter = 0 + self._debounce_start_t: float | None = None + self._hysteresis_inside = False + self._armed = True + self._last_trigger_t = float("-inf") + + def reset(self) -> None: + """换视频或换篮子时清空内部状态。""" + self._overlap_counter = 0 + self._debounce_start_t = None + self._hysteresis_inside = False + self._armed = True + self._last_trigger_t = float("-inf") + + def _is_contacting(self, current_iou: float) -> bool: + if not self._hysteresis_inside: + return current_iou > self.threshold_on + 1e-12 + return current_iou > self.threshold_off + 1e-12 + + def step_iou(self, current_timestamp: float, current_iou: float) -> float | None: + """以预计算 IoU 驱动状态机(供单元测试);返回 Start 时间戳或 None。""" + t = float(current_timestamp) + iou = float(current_iou) + + if t - self._last_trigger_t < self.cooldown_seconds - 1e-12: + self._overlap_counter = 0 + self._debounce_start_t = None + return None + + is_contacting = self._is_contacting(iou) + + if is_contacting: + if self._overlap_counter == 0: + self._debounce_start_t = t + self._overlap_counter += 1 + self._hysteresis_inside = True + else: + self._overlap_counter = 0 + self._debounce_start_t = None + self._hysteresis_inside = False + self._armed = True + + if self._overlap_counter >= self._confirm_frames and self._armed: + self._armed = False + self._last_trigger_t = t + start_t = self._debounce_start_t if self._debounce_start_t is not None else t + return start_t + + return None + + def process_frame( + self, + current_timestamp: float, + hand_boxes: list[list[float]], + basket_box: tuple[float, float, float, float] | list[float], + ) -> float | None: + """逐帧处理;任意一只手满足条件即可。触发成功返回 Start 时间戳。""" + basket = [float(v) for v in basket_box] + current_iou = max_hand_basket_iou(hand_boxes, basket) + return self.step_iou(current_timestamp, current_iou) diff --git a/src/actionformer_utils.py b/src/actionformer_utils.py new file mode 100644 index 0000000..0b02459 --- /dev/null +++ b/src/actionformer_utils.py @@ -0,0 +1,96 @@ +"""Phase1:VideoSwin 特征 + ActionFormer 时段(与仓库 main_pipeline.ActionSegmenter 一致)。""" +from __future__ import annotations + +import json +import shutil +from pathlib import Path +from typing import Any + +import cv2 + +import run_haocai_actionformer_consumables_e2e as e2e +from pack_utils import log + + +class ActionSegmenter: + @staticmethod + def build_segments( + *, + video_path: Path, + stem: str, + work: Path, + actionformer_ckpt: Path, + af_min_score: float, + af_min_seg_seconds: float, + python_exe: str, + feat_batch_size: int, + device: str, + ) -> list[tuple[float, float, float]]: + inp = work / "input" + feat_dir = work / "features" + inp.mkdir(parents=True, exist_ok=True) + feat_dir.mkdir(parents=True, exist_ok=True) + for stale in inp.glob("*.mp4"): + stale.unlink(missing_ok=True) + + single_video = inp / video_path.name + if single_video.resolve() != video_path.resolve(): + shutil.copy2(video_path, single_video) + + meta_path = feat_dir / "meta.json" + e2e.run_feature_extraction( + python_exe=python_exe, + data_root=inp, + output_dir=feat_dir, + meta_file=meta_path, + device=device, + batch_size=max(1, feat_batch_size), + ) + + meta = json.loads(meta_path.read_text(encoding="utf-8")) + duration, fps = e2e.duration_fps_from_meta(meta, stem) + if stem not in meta.get("videos", {}): + log("meta 中未找到 video_id=stem,使用 OpenCV 估 duration…") + cap0 = cv2.VideoCapture(str(video_path)) + if cap0.isOpened(): + fps = float(cap0.get(cv2.CAP_PROP_FPS)) or fps + nfr = int(cap0.get(cv2.CAP_PROP_FRAME_COUNT)) + cap0.release() + if fps > 0 and nfr > 0: + duration = nfr / fps + + npy_path = feat_dir / f"{stem}.npy" + if not npy_path.is_file(): + raise FileNotFoundError(f"特征文件不存在: {npy_path}") + + json_path = work / "infer_single.json" + e2e.write_infer_json(json_path, stem, duration, fps) + + yaml_path = work / "infer_single.yaml" + e2e.write_infer_yaml(yaml_path, json_path.resolve(), feat_dir.resolve()) + + pkl_dest = work / "eval_results.pkl" + e2e.run_actionformer_eval( + python_exe=python_exe, + yaml_path=yaml_path.resolve(), + ckpt_path=actionformer_ckpt.resolve(), + copy_pkl_to=pkl_dest, + ) + + raw_segs = e2e.parse_actionformer_pkl(pkl_dest, stem) + raw_segs = [(s, e, sc) for s, e, sc in raw_segs if sc > af_min_score] + segs = e2e.greedy_mutual_exclusive(raw_segs) + n_exclusive = len(segs) + min_seg = float(af_min_seg_seconds) + if min_seg > 0: + segs = [(s, e, sc) for s, e, sc in segs if (e - s) >= min_seg - 1e-9] + if min_seg > 0: + log( + f"ActionFormer 候选 {len(raw_segs)} -> 互斥后 {n_exclusive} 段 -> " + f"剔除短于 {min_seg:g}s 后 {len(segs)} 段(score>{af_min_score})" + ) + else: + log( + f"ActionFormer 候选 {len(raw_segs)} -> 互斥后 {n_exclusive} 段(score>{af_min_score})" + ) + return segs diff --git a/src/basket_segmenter.py b/src/basket_segmenter.py new file mode 100644 index 0000000..e757f7c --- /dev/null +++ b/src/basket_segmenter.py @@ -0,0 +1,616 @@ +"""篮子 ROI 交互选取 + 手篮接触上升沿扫描 → 固定窗口段列表。""" +from __future__ import annotations + +import json +import subprocess +from pathlib import Path +from typing import Any, Callable + +import cv2 +from ultralytics import YOLO + +from action_trigger_logic import ActionTriggerLogic, resolve_contact_iou_thresholds +from pipeline.hand_roi_merge import bbox_iou_xyxy +from run_segments_consumable_vote import collect_hand_boxes + + +def _roi_xyxy_from_select(x: int, y: int, w: int, h: int) -> list[float]: + if w <= 0 or h <= 0: + raise ValueError("未框选有效区域(宽高须 > 0)") + return [float(x), float(y), float(x + w), float(y + h)] + + +def _read_frame_at(cap: cv2.VideoCapture, *, mode: str | float) -> tuple[Any, float]: + fps = float(cap.get(cv2.CAP_PROP_FPS) or 25.0) + n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0) + duration = n_frames / fps if n_frames > 0 and fps > 0 else 0.0 + + if isinstance(mode, (int, float)): + t_sec = float(mode) + elif mode == "first": + t_sec = 0.0 + elif mode == "middle": + t_sec = max(0.0, duration * 0.5) + else: + raise ValueError(f"未知 roi_frame 模式: {mode!r}") + + cap.set(cv2.CAP_PROP_POS_MSEC, t_sec * 1000.0) + ok, frame = cap.read() + if not ok or frame is None: + cap.set(cv2.CAP_PROP_POS_FRAMES, 0) + ok, frame = cap.read() + if not ok or frame is None: + raise RuntimeError("无法从视频读取用于框选 ROI 的帧") + t_sec = 0.0 + else: + t_sec = float(cap.get(cv2.CAP_PROP_POS_MSEC)) / 1000.0 + return frame, t_sec + + +def save_basket_roi_json(path: Path, roi: list[float], *, video_path: Path | None = None) -> None: + payload: dict[str, Any] = {"basket_xyxy": [float(v) for v in roi]} + if video_path is not None: + payload["video"] = str(video_path.resolve()) + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(payload, ensure_ascii=False, indent=2) + "\n", encoding="utf-8") + + +def load_basket_roi_json(path: Path) -> list[float]: + data = json.loads(path.read_text(encoding="utf-8")) + roi = data.get("basket_xyxy") + if not isinstance(roi, list) or len(roi) != 4: + raise ValueError(f"无效的篮子 ROI JSON: {path}") + return [float(v) for v in roi] + + +def _scale_frame_for_display(frame, max_display_px: int) -> tuple[Any, float]: + orig_h, orig_w = frame.shape[:2] + scale = 1.0 + disp = frame + if max(orig_w, orig_h) > max_display_px: + scale = max_display_px / float(max(orig_w, orig_h)) + disp = cv2.resize( + frame, + (int(round(orig_w * scale)), int(round(orig_h * scale))), + interpolation=cv2.INTER_AREA, + ) + print( + f"[basket] 4K 预览缩放 scale={scale:.4f} " + f"({orig_w}x{orig_h} -> {disp.shape[1]}x{disp.shape[0]})" + ) + return disp, scale + + +def _select_basket_roi_tkinter( + disp_bgr, + *, + t_sec: float, + title: str, +) -> tuple[float, float, float, float]: + """Tkinter 弹窗:按住左键拖动画框,点顶部【确认】提交。""" + import tkinter as tk + from tkinter import messagebox + + from PIL import Image, ImageTk + + rgb = cv2.cvtColor(disp_bgr, cv2.COLOR_BGR2RGB) + pil = Image.fromarray(rgb) + img_w, img_h = pil.size + + root = tk.Tk() + root.title(title) + root.attributes("-topmost", True) + root.after(300, lambda: root.attributes("-topmost", False)) + + sw = int(root.winfo_screenwidth() or 1920) + sh = int(root.winfo_screenheight() or 1080) + # 预留顶部说明+按钮、窗口边框;画布不超过屏幕可用高度 + max_canvas_w = max(640, sw - 48) + max_canvas_h = max(360, sh - 220) + ui_scale = min(max_canvas_w / img_w, max_canvas_h / img_h, 1.0) + show_w = int(round(img_w * ui_scale)) + show_h = int(round(img_h * ui_scale)) + if (show_w, show_h) != (img_w, img_h): + pil = pil.resize((show_w, show_h), Image.Resampling.LANCZOS) + + state: dict[str, float | None] = {"x1": None, "y1": None, "x2": None, "y2": None} + start: dict[str, int | None] = {"x": None, "y": None} + rect_holder: dict[str, int | None] = {"id": None} + cancelled = {"v": False} + + def to_disp_coords(x1: float, y1: float, x2: float, y2: float) -> tuple[float, float, float, float]: + inv = 1.0 / ui_scale + return x1 * inv, y1 * inv, x2 * inv, y2 * inv + + def on_confirm() -> None: + if state["x1"] is None: + messagebox.showwarning(title, "请先在图片上按住左键拖动,框选篮子区域") + return + root.quit() + root.destroy() + + def on_cancel() -> None: + cancelled["v"] = True + root.quit() + root.destroy() + + top = tk.Frame(root, padx=12, pady=8) + top.pack(side=tk.TOP, fill=tk.X) + + tk.Label( + top, + text=( + f"参考帧 t={t_sec:.2f}s | 按住左键在图片上拖动画框 | 完成后点【确认】或按 Enter" + ), + font=("", 12), + justify=tk.LEFT, + anchor=tk.W, + ).pack(fill=tk.X) + + status = tk.Label(top, text="尚未框选", font=("", 11), fg="gray", anchor=tk.W) + status.pack(fill=tk.X, pady=(4, 8)) + + btn_frame = tk.Frame(top) + btn_frame.pack(fill=tk.X) + confirm_btn = tk.Button( + btn_frame, + text="确认", + command=on_confirm, + font=("", 15, "bold"), + width=14, + height=1, + bg="#4CAF50", + fg="white", + activebackground="#43A047", + ) + confirm_btn.pack(side=tk.LEFT, padx=(0, 10)) + tk.Button( + btn_frame, + text="取消", + command=on_cancel, + font=("", 14), + width=12, + ).pack(side=tk.LEFT) + + photo = ImageTk.PhotoImage(pil) + canvas = tk.Canvas( + root, + width=show_w, + height=show_h, + cursor="crosshair", + highlightthickness=1, + highlightbackground="#cccccc", + ) + canvas.pack(side=tk.TOP, padx=10, pady=(0, 10)) + canvas.create_image(0, 0, anchor=tk.NW, image=photo) + + def on_press(event: tk.Event) -> None: + start["x"], start["y"] = int(event.x), int(event.y) + if rect_holder["id"] is not None: + canvas.delete(rect_holder["id"]) + rect_holder["id"] = canvas.create_rectangle( + event.x, event.y, event.x, event.y, outline="red", width=3 + ) + status.config(text="正在框选…(松开左键完成矩形)", fg="orange") + + def on_drag(event: tk.Event) -> None: + if rect_holder["id"] is not None and start["x"] is not None and start["y"] is not None: + canvas.coords(rect_holder["id"], start["x"], start["y"], event.x, event.y) + + def on_release(event: tk.Event) -> None: + if start["x"] is None or start["y"] is None: + return + x1, y1 = min(start["x"], event.x), min(start["y"], event.y) + x2, y2 = max(start["x"], event.x), max(start["y"], event.y) + if x2 - x1 < 8 or y2 - y1 < 8: + status.config(text="框太小,请重新按住左键拖动", fg="red") + state["x1"] = state["y1"] = state["x2"] = state["y2"] = None + return + dx1, dy1, dx2, dy2 = to_disp_coords(float(x1), float(y1), float(x2), float(y2)) + state["x1"], state["y1"], state["x2"], state["y2"] = dx1, dy1, dx2, dy2 + status.config( + text=f"已框选 {int(dx2 - dx1)}×{int(dy2 - dy1)} 像素 — 请点击上方绿色【确认】或按 Enter", + fg="green", + ) + + canvas.bind("", on_press) + canvas.bind("", on_drag) + canvas.bind("", on_release) + root.bind("", lambda _e: on_confirm()) + root.bind("", lambda _e: on_cancel()) + confirm_btn.focus_set() + + # 居中并限制窗口不超过屏幕 + win_w = min(sw - 20, show_w + 24) + win_h = min(sh - 20, show_h + 180) + x0 = max(0, (sw - win_w) // 2) + y0 = max(0, (sh - win_h) // 2) + root.geometry(f"{win_w}x{win_h}+{x0}+{y0}") + root.minsize(min(win_w, 720), min(win_h, 480)) + + print("[basket] 已打开框选窗口:顶部有绿色【确认】按钮;拖框后点确认或按 Enter") + root.mainloop() + + if cancelled["v"]: + raise ValueError("用户取消框选") + if state["x1"] is None or state["x2"] is None or state["y1"] is None or state["y2"] is None: + raise ValueError("未框选有效区域:请按住左键拖动画出矩形后点【确认】") + x1, y1, x2, y2 = state["x1"], state["y1"], state["x2"], state["y2"] + return float(x1), float(y1), float(x2 - x1), float(y2 - y1) + + +def _select_basket_roi_matplotlib( + disp_bgr, + *, + t_sec: float, + title: str, +) -> tuple[float, float, float, float]: + """matplotlib 弹窗框选;关闭窗口即确认。""" + import matplotlib + + matplotlib.use("TkAgg") + import matplotlib.pyplot as plt + from matplotlib.widgets import RectangleSelector + + rgb = cv2.cvtColor(disp_bgr, cv2.COLOR_BGR2RGB) + h, w = rgb.shape[:2] + fig_w = min(16.0, max(8.0, w / 120.0)) + fig_h = min(9.0, max(4.5, h / 120.0)) + fig, ax = plt.subplots(figsize=(fig_w, fig_h)) + ax.imshow(rgb) + ax.set_title( + f"{title}\n参考帧 t={t_sec:.2f}s | 鼠标左键拖框 | 可拖拽调整 | 关闭窗口确认", + fontsize=11, + ) + ax.axis("off") + + box: dict[str, float | None] = {"x1": None, "y1": None, "x2": None, "y2": None} + + def onselect(eclick, erelease) -> None: + if eclick.xdata is None or erelease.xdata is None: + return + if eclick.ydata is None or erelease.ydata is None: + return + box["x1"] = float(min(eclick.xdata, erelease.xdata)) + box["y1"] = float(min(eclick.ydata, erelease.ydata)) + box["x2"] = float(max(eclick.xdata, erelease.xdata)) + box["y2"] = float(max(eclick.ydata, erelease.ydata)) + + RectangleSelector( + ax, + onselect, + useblit=False, + button=[1], + minspanx=10, + minspany=10, + spancoords="data", + interactive=True, + ) + fig.canvas.manager.set_window_title(title) + plt.tight_layout() + print("[basket] 已打开 matplotlib 框选窗口:按住左键拖动画框,关闭窗口确认") + plt.show() + + if box["x1"] is None or box["x2"] is None or box["y1"] is None or box["y2"] is None: + raise ValueError("未框选有效区域:请用鼠标拖出一个矩形后关闭窗口") + x1, y1, x2, y2 = box["x1"], box["y1"], box["x2"], box["y2"] + if x2 - x1 < 1 or y2 - y1 < 1: + raise ValueError("框选区域过小,请重新运行并框选篮子") + return x1, y1, x2 - x1, y2 - y1 + + +def _select_basket_roi_opencv( + disp_bgr, + *, + title: str, +) -> tuple[float, float, float, float]: + cv2.namedWindow(title, cv2.WINDOW_NORMAL) + cv2.resizeWindow(title, disp_bgr.shape[1], disp_bgr.shape[0]) + rx, ry, rw, rh = cv2.selectROI(title, disp_bgr, showCrosshair=True, fromCenter=False) + cv2.destroyWindow(title) + cv2.destroyAllWindows() + return float(rx), float(ry), float(rw), float(rh) + + +def select_basket_roi( + video_path: Path, + *, + roi_frame: str | float = "middle", + window_title: str = "框选耗材篮子", + max_display_px: int = 1920, + roi_backend: str = "tkinter", +) -> list[float]: + """弹窗框选篮子 ROI。默认 tkinter(按住拖动 + 确认按钮)。""" + cap = cv2.VideoCapture(str(video_path)) + if not cap.isOpened(): + raise RuntimeError(f"无法打开视频: {video_path}") + try: + frame, t_sec = _read_frame_at(cap, mode=roi_frame) + disp, scale = _scale_frame_for_display(frame, max_display_px) + + backend = str(roi_backend).strip().lower() + if backend == "tkinter": + rx, ry, rw, rh = _select_basket_roi_tkinter(disp, t_sec=t_sec, title=window_title) + elif backend == "matplotlib": + rx, ry, rw, rh = _select_basket_roi_matplotlib( + disp, t_sec=t_sec, title=window_title + ) + elif backend == "opencv": + print(f"[basket] 框选参考帧 t={t_sec:.2f}s,Enter/Space 确认,Esc 取消") + rx, ry, rw, rh = _select_basket_roi_opencv(disp, title=window_title) + else: + raise ValueError(f"未知 roi_backend: {roi_backend!r},可选 tkinter / matplotlib / opencv") + + if scale != 1.0: + rx, ry, rw, rh = rx / scale, ry / scale, rw / scale, rh / scale + roi = _roi_xyxy_from_select(int(round(rx)), int(round(ry)), int(round(rw)), int(round(rh))) + print(f"[basket] 篮子 ROI xyxy={roi}") + return roi + finally: + cap.release() + cv2.destroyAllWindows() + + +def hands_contact_basket( + hand_boxes: list[list[float]], + basket_xyxy: list[float], + iou_threshold: float, +) -> bool: + """任意一只手框与篮子 IoU 严格大于阈值即视为接触。""" + thr = float(iou_threshold) + for hb in hand_boxes: + if bbox_iou_xyxy(hb, basket_xyxy) > thr + 1e-12: + return True + return False + + + +def filter_near_contact_starts( + starts: list[float], + min_interval_sec: float, + *, + log_fn: Callable[[str], None] | None = None, +) -> list[float]: + """ + 合并时间上过于接近的接触上升沿,保留每簇中的第一个。 + 用于抑制手框抖动导致的重复触发(如 71.0s 与 71.9s)。 + """ + gap = float(min_interval_sec) + if gap <= 0 or not starts: + return list(starts) + kept: list[float] = [] + for t in sorted(starts): + if kept and t - kept[-1] < gap - 1e-9: + if log_fn: + log_fn( + f"[basket] 忽略近距离上升沿 t={t:.3f}s " + f"(距上次 {t - kept[-1]:.3f}s < {gap:g}s)" + ) + continue + kept.append(t) + return kept + + +def video_duration_sec(cap: cv2.VideoCapture) -> float: + fps = float(cap.get(cv2.CAP_PROP_FPS) or 25.0) + n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0) + if n_frames > 0 and fps > 0: + return n_frames / fps + ms = float(cap.get(cv2.CAP_PROP_POS_MSEC) or 0.0) + cap.set(cv2.CAP_PROP_POS_AVI_RATIO, 1.0) + end_ms = float(cap.get(cv2.CAP_PROP_POS_MSEC) or 0.0) + cap.set(cv2.CAP_PROP_POS_MSEC, ms) + return max(0.0, end_ms / 1000.0) + + +def warn_if_hevc(video_path: Path) -> None: + try: + out = subprocess.run( + [ + "ffprobe", + "-v", + "error", + "-select_streams", + "v:0", + "-show_entries", + "stream=codec_name", + "-of", + "default=nw=1", + str(video_path), + ], + capture_output=True, + text=True, + check=False, + ) + codec = (out.stdout or "").strip().split("=", 1)[-1].lower() + if codec in ("hevc", "h265"): + print( + "[basket] 警告: 检测到 HEVC 编码,VideoSwin 不受影响但 OpenCV 解码可能不稳定;" + "建议先运行 scripts/remux_hevc.sh 转 H.264" + ) + except FileNotFoundError: + pass + + +def scan_contact_segments( + video_path: Path, + det_model: YOLO | str | Path, + basket_xyxy: list[float], + *, + contact_iou_threshold: float = 0.05, + contact_iou_on: float | None = None, + contact_iou_off: float | None = None, + confirm_seconds: float = 0.4, + cooldown_seconds: float = 5.0, + segment_start_offset_sec: float = 1.0, + segment_end_offset_sec: float = 5.0, + min_segment_sec: float = 4.0, + scan_frame_stride: int = 1, + det_conf: float = 0.6, + imgsz_det: int = 640, + device: str = "cuda", + half: bool = False, + log_fn: Callable[[str], None] | None = print, +) -> list[tuple[float, float, float]]: + """ + 全片扫描手篮接触上升沿,每段 [contact+start_offset, contact+end_offset](末尾截断至视频时长)。 + 截断后段长短于 min_segment_sec 的段会被丢弃。 + 接触判定经 ActionTriggerLogic(滞回 + 帧防抖 + 上升沿 + 绝对冷却)。 + 返回 (start_sec, end_sec, score) 列表,score 固定 1.0。 + """ + iou_on, iou_off = resolve_contact_iou_thresholds( + contact_iou_threshold=contact_iou_threshold, + contact_iou_on=contact_iou_on, + contact_iou_off=contact_iou_off, + ) + model = det_model if isinstance(det_model, YOLO) else YOLO(str(det_model)) + predict_kw: dict[str, Any] = {"device": device} + if half: + predict_kw["half"] = True + + stride = max(1, int(scan_frame_stride)) + t_start_off = float(segment_start_offset_sec) + t_end_off = float(segment_end_offset_sec) + if t_end_off <= t_start_off + 1e-9: + raise ValueError( + f"segment_end_offset_sec ({t_end_off}) 须大于 segment_start_offset_sec ({t_start_off})" + ) + basket = [float(v) for v in basket_xyxy] + + cap = cv2.VideoCapture(str(video_path)) + if not cap.isOpened(): + raise RuntimeError(f"无法打开视频: {video_path}") + + fps = float(cap.get(cv2.CAP_PROP_FPS) or 25.0) + trigger = ActionTriggerLogic( + fps=fps, + confirm_seconds=float(confirm_seconds), + cooldown_seconds=float(cooldown_seconds), + threshold_on=iou_on, + threshold_off=iou_off, + ) + + starts: list[float] = [] + frame_idx = 0 + + try: + duration = video_duration_sec(cap) + cap.set(cv2.CAP_PROP_POS_FRAMES, 0) + + while True: + ok, frame = cap.read() + if not ok or frame is None: + break + frame_idx += 1 + if stride > 1 and (frame_idx - 1) % stride != 0: + continue + + t_sec = float(cap.get(cv2.CAP_PROP_POS_MSEC)) / 1000.0 + r0 = model.predict(frame, conf=det_conf, imgsz=imgsz_det, verbose=False, **predict_kw)[0] + hands = collect_hand_boxes(model, r0.boxes) if r0.boxes else [] + event_t = trigger.process_frame(t_sec, hands, basket) + if event_t is not None: + starts.append(event_t) + if log_fn: + log_fn(f"[basket] 接触上升沿 t={event_t:.3f}s") + finally: + cap.release() + + starts = filter_near_contact_starts( + starts, float(cooldown_seconds), log_fn=log_fn + ) + + segs: list[tuple[float, float, float]] = [] + min_seg = float(min_segment_sec) + for t_contact in starts: + t0 = t_contact + t_start_off + t1 = t_contact + t_end_off + if duration > 0: + t1 = min(t1, duration) + if t1 <= t0 + 1e-9: + continue + seg_len = t1 - t0 + if min_seg > 0 and seg_len < min_seg - 1e-9: + if log_fn: + log_fn( + f"[basket] 丢弃截断短段 [{t0:.3f}, {t1:.3f}] " + f"时长 {seg_len:.3f}s < {min_seg:g}s" + ) + continue + segs.append((t0, t1, 1.0)) + + confirm_frames = max(1, int(round(float(confirm_seconds) * fps))) + if log_fn: + log_fn( + f"[basket] 扫描完成: {len(segs)} 段 " + f"([contact+{t_start_off:g}, contact+{t_end_off:g}]s, " + f"IoU on>{iou_on:g} off<={iou_off:g}, " + f"confirm={float(confirm_seconds):g}s (~{confirm_frames} frames), " + f"cooldown={float(cooldown_seconds):g}s" + + (f", min_segment>={min_seg:g}s" if min_seg > 0 else "") + + ")" + ) + return segs + + +def build_segments_from_basket( + video_path: Path, + hand_model: Path, + *, + basket_roi_json: Path | None = None, + save_roi_json: Path | None = None, + skip_roi_select: bool = False, + roi_frame: str | float = "middle", + roi_backend: str = "tkinter", + contact_iou_threshold: float = 0.05, + contact_iou_on: float | None = None, + contact_iou_off: float | None = None, + confirm_seconds: float = 0.4, + cooldown_seconds: float = 5.0, + segment_start_offset_sec: float = 1.0, + segment_end_offset_sec: float = 5.0, + min_segment_sec: float = 4.0, + scan_frame_stride: int = 1, + det_conf: float = 0.6, + imgsz_det: int = 640, + device: str = "cuda", + half: bool = False, + log_fn: Callable[[str], None] | None = print, +) -> tuple[list[tuple[float, float, float]], list[float]]: + """解析/框选 ROI 并扫描接触段。返回 (segments, basket_xyxy)。""" + warn_if_hevc(video_path) + + if basket_roi_json is not None and basket_roi_json.is_file(): + roi = load_basket_roi_json(basket_roi_json) + if log_fn: + log_fn(f"[basket] 从 JSON 加载 ROI: {basket_roi_json}") + elif skip_roi_select: + raise ValueError("skip_roi_select 需要有效的 --basket-roi-json") + else: + roi = select_basket_roi(video_path, roi_frame=roi_frame, roi_backend=roi_backend) + + if save_roi_json is not None: + save_basket_roi_json(save_roi_json, roi, video_path=video_path) + if log_fn: + log_fn(f"[basket] ROI 已保存: {save_roi_json}") + + segs = scan_contact_segments( + video_path, + hand_model, + roi, + contact_iou_threshold=contact_iou_threshold, + contact_iou_on=contact_iou_on, + contact_iou_off=contact_iou_off, + confirm_seconds=confirm_seconds, + cooldown_seconds=cooldown_seconds, + segment_start_offset_sec=segment_start_offset_sec, + segment_end_offset_sec=segment_end_offset_sec, + min_segment_sec=min_segment_sec, + scan_frame_stride=scan_frame_stride, + det_conf=det_conf, + imgsz_det=imgsz_det, + device=device, + half=half, + log_fn=log_fn, + ) + return segs, roi diff --git a/src/config.py b/src/config.py new file mode 100644 index 0000000..9b80ced --- /dev/null +++ b/src/config.py @@ -0,0 +1,172 @@ +"""加载 configs/*.yaml,解析为运行参数 Namespace。""" +from __future__ import annotations + +import sys +from argparse import Namespace +from pathlib import Path +from typing import Any + +import yaml + + +def _rel(pack_root: Path, raw: str | None) -> Path | None: + if raw is None: + return None + path = Path(raw) + if path.is_absolute(): + return path.resolve() + return (pack_root / path).resolve() + + +def load_run_config(pack_root: Path, config_path: Path) -> Namespace: + pack_root = pack_root.resolve() + data: dict[str, Any] = yaml.safe_load(config_path.read_text(encoding="utf-8")) + io = data["io"] + w = data.get("weights", {}) + rt = data.get("runtime", {}) + dev = data.get("device", {}) + p2 = data["phase2"] + cl = data["classification"] + gm = data.get("gap_merge", {}) + outopt = data.get("output", {}) + did = data.get("doctor_identity", {}) + bk = data.get("basket", {}) + st = data.get("stream", {}) + + py = rt.get("python") + python_exe = sys.executable if py is None or str(py).strip() == "" else str(py) + + whitelist_raw = io.get("whitelist_json") + whitelist_path = _rel(pack_root, whitelist_raw) if whitelist_raw else None + + work_raw = rt.get("work_dir") + work_dir = _rel(pack_root, work_raw) if work_raw else None + + doctor_ckpt_raw = did.get("checkpoint", "doctor_identity_package/doctor_info.pth") + doctor_labels_raw = did.get("labels_csv", "doctor_identity_package/labels.csv") + + basket_save_raw = bk.get("save_roi_json") + basket_load_raw = bk.get("load_roi_json") + basket_roi_frame = bk.get("roi_frame", "middle") + if isinstance(basket_roi_frame, (int, float)): + basket_roi_frame = float(basket_roi_frame) + else: + basket_roi_frame = str(basket_roi_frame) + + legacy_contact_iou = float(bk.get("contact_iou_threshold", 0.05)) + on_raw = bk.get("contact_iou_on") + off_raw = bk.get("contact_iou_off") + basket_contact_iou_on = float(on_raw) if on_raw is not None else legacy_contact_iou + basket_contact_iou_off = ( + float(off_raw) if off_raw is not None else max(legacy_contact_iou * 0.6, 0.01) + ) + if basket_contact_iou_off >= basket_contact_iou_on: + basket_contact_iou_off = max(basket_contact_iou_on - 0.02, 0.01) + + pad_bottom = float(p2.get("pad_bottom_ratio", p2.get("pad_ratio", 0.5))) + + # 篮子/推流默认不用;main.py(ActionFormer)或撕膜合并可在 yaml 中另行配置 + actionformer_raw = w.get("actionformer") + tear_raw = w.get("tear") + p1 = data.get("phase1", {}) + tm = data.get("tear_merge", {}) + + return Namespace( + video=_rel(pack_root, io["video"]), + excel=_rel(pack_root, io["excel"]), + out=_rel(pack_root, io["out"]), + whitelist_json=whitelist_path, + use_whitelist=bool(io.get("use_whitelist", True)), + work_dir=work_dir, + keep_work_dir=bool(rt.get("keep_work_dir", False)), + python=python_exe, + actionformer_ckpt=_rel(pack_root, actionformer_raw) if actionformer_raw else None, + hand_model=_rel(pack_root, w["hand"]), + goodbad_model=_rel(pack_root, w["goodbad"]), + haocai_model=_rel(pack_root, w["haocai"]), + tear_model=_rel(pack_root, tear_raw) if tear_raw else None, + device=str(dev.get("type", "cuda")), + half=bool(dev.get("half", False)), + af_min_score=float(p1.get("af_min_score", 0.1)), + af_min_seg_seconds=float(p1.get("af_min_seg_seconds", 2.0)), + feat_batch_size=int(p1.get("feat_batch_size", 1)), + seek_margin_sec=float(p2["seek_margin_sec"]), + frame_stride=int(p2["frame_stride"]), + det_conf=float(p2["det_conf"]), + pad_bottom_ratio=pad_bottom, + pad_ratio=pad_bottom, + imgsz_det=int(p2["imgsz_det"]), + merge_iou_gt=float(p2["merge_iou_gt"]), + merge_center_dist_max_px=( + float(p2["merge_center_dist_max_px"]) + if p2.get("merge_center_dist_max_px") is not None + else None + ), + merge_center_dist_max_frac_diag=( + float(p2["merge_center_dist_max_frac_diag"]) + if p2.get("merge_center_dist_max_frac_diag") is not None + else None + ), + tracking_alpha=float(p2.get("tracking_alpha", 0.6)), + tracking_max_lost_frames=int(p2.get("tracking_max_lost_frames", 0)), + imgsz_cls=int(cl["imgsz_cls"]), + good_top1_conf_threshold=float(cl["good_top1_conf_threshold"]), + good_top1_retry_threshold=float(cl["good_top1_retry_threshold"]), + haocai_min_conf=float(cl["haocai_min_conf"]), + haocai_min_conf_retry=float(cl["haocai_min_conf_retry"]), + empty_cache_every=int(cl.get("empty_cache_every", 0)), + legacy_12_col_only=bool(outopt.get("legacy_12_col_only", True)), + merge_adjacent_tear=bool(tm.get("merge_adjacent_tear", False)), + tear_merge_weights=_rel(pack_root, tm["tear_merge_weights"]) + if tm.get("tear_merge_weights") + else None, + tear_merge_class=str(tm.get("tear_merge_class", "tearing")), + tear_merge_head_sec=float(tm.get("tear_merge_head_sec", 3.0)), + tear_merge_prob=float(tm.get("tear_merge_prob", 0.9)), + tear_merge_min_frames=int(tm.get("tear_merge_min_frames", 6)), + tear_merge_verbose=bool(tm.get("tear_merge_verbose", False)), + tear_merge_full_frame=bool(tm.get("tear_merge_full_frame", False)), + gap_merge_enabled=bool(gm.get("enabled", False)), + gap_merge_max_gap_sec=float(gm.get("max_gap_sec", 2.0)), + doctor_identity_enabled=bool(did.get("enabled", True)), + doctor_identity_checkpoint=_rel(pack_root, doctor_ckpt_raw), + doctor_identity_labels_csv=_rel(pack_root, doctor_labels_raw), + doctor_identity_pose_min_detection_confidence=float( + did.get("pose_min_detection_confidence", 0.3) + ), + doctor_identity_min_identity_confidence=float(did.get("min_identity_confidence", 0.0)), + doctor_identity_middle_seconds=float(did.get("middle_seconds", 10.0)), + doctor_identity_sample_fps=float(did.get("sample_fps", 3.0)), + doctor_identity_pad_frac=float(did.get("pad_frac", 0.15)), + basket_det_conf=float(bk.get("det_conf", p2["det_conf"])), + basket_contact_iou_threshold=legacy_contact_iou, + basket_contact_iou_on=basket_contact_iou_on, + basket_contact_iou_off=basket_contact_iou_off, + basket_confirm_seconds=float(bk.get("confirm_seconds", 0.4)), + basket_cooldown_seconds=float(bk.get("cooldown_seconds", 5.0)), + basket_segment_start_offset_sec=float(bk.get("segment_start_offset_sec", 1.0)), + basket_segment_end_offset_sec=float(bk.get("segment_end_offset_sec", 5.0)), + basket_min_segment_sec=float(bk.get("min_segment_sec", 4.0)), + basket_scan_frame_stride=int(bk.get("scan_frame_stride", 1)), + basket_roi_frame=basket_roi_frame, + basket_save_roi_json=_rel(pack_root, basket_save_raw) if basket_save_raw else None, + basket_load_roi_json=_rel(pack_root, basket_load_raw) if basket_load_raw else None, + basket_skip_roi_select=bool(bk.get("skip_roi_select", False)), + basket_roi_backend=str(bk.get("roi_backend", "tkinter")), + stream_rtsp=st.get("rtsp"), + stream_ring_buffer_sec=float(st.get("ring_buffer_sec", 10.0)), + stream_fps=float(st.get("fps", 25.0)), + stream_cache_max_width=int(st.get("cache_max_width", 1920)), + stream_jpeg_quality=int(st.get("jpeg_quality", 85)), + stream_segment_start_offset_sec=float( + st.get("segment_start_offset_sec", bk.get("segment_start_offset_sec", 1.0)) + ), + stream_segment_end_offset_sec=float( + st.get("segment_end_offset_sec", bk.get("segment_end_offset_sec", 6.0)) + ), + stream_min_segment_sec=float( + st.get("min_segment_sec", bk.get("min_segment_sec", 4.0)) + ), + stream_infer_source=str(st.get("infer_source", "file")).strip().lower(), + stream_infer_fallback=str(st.get("infer_fallback", "cache")).strip().lower(), + ) diff --git a/src/excel_segments.py b/src/excel_segments.py new file mode 100644 index 0000000..a5f8bdd --- /dev/null +++ b/src/excel_segments.py @@ -0,0 +1,154 @@ +"""从 Excel 时间段列加载段列表,供 debug 主流程替代 ActionFormer。""" +from __future__ import annotations + +import re +from pathlib import Path +from typing import List, Tuple + +import cv2 +import pandas as pd + +from pack_utils import log + + +def parse_mm_ss_to_seconds(value: str) -> float: + text = str(value).strip() + if not text: + raise ValueError("empty time value") + if "." in text: + left, right = text.split(".", 1) + minutes = int(left) if left else 0 + seconds = int(right) if right else 0 + if seconds >= 60: + raise ValueError(f"invalid mm.ss seconds >= 60: {text}") + return float(minutes * 60 + seconds) + return float(int(text)) + + +def _is_legacy_mm_dot_ss(token: str) -> bool: + if "." not in token: + return False + a, b = token.split(".", 1) + if not a.isdigit() or not b.isdigit(): + return False + return 1 <= len(b) <= 2 + + +def parse_time_token(t: str) -> float: + t = str(t).strip().replace(":", ":") + if not t: + raise ValueError("empty token") + if ":" in t: + parts = [float(x) for x in t.split(":")] + if len(parts) == 3: + return parts[0] * 3600.0 + parts[1] * 60.0 + parts[2] + if len(parts) == 2: + return parts[0] * 60.0 + parts[1] + raise ValueError(f"bad colon time: {t}") + if _is_legacy_mm_dot_ss(t): + return parse_mm_ss_to_seconds(t) + return float(t) + + +def parse_cell_to_segments_v2(cell: object) -> List[Tuple[float, float]]: + """解析单元格内多段「开始-结束」(冒号 / 分.秒 / 纯秒)。""" + if cell is None or (isinstance(cell, float) and pd.isna(cell)): + return [] + text = str(cell).strip() + if not text: + return [] + text = ( + text.replace(";", ";") + .replace(",", ",") + .replace("、", ",") + .replace("\n", ";") + .replace(":", ":") + .replace(" ", "") + ) + chunks = re.split(r"[;,]+", text) + segments: List[Tuple[float, float]] = [] + for ch in chunks: + if not ch: + continue + m = re.match(r"^(.+?)\-(.+)$", ch) + if not m: + continue + left, right = m.group(1), m.group(2) + try: + s = parse_time_token(left) + e = parse_time_token(right) + except (ValueError, TypeError): + continue + if e > s: + segments.append((s, e)) + return segments + + +def _video_duration_sec(video_path: Path | None) -> float | None: + if video_path is None: + return None + cap = cv2.VideoCapture(str(video_path)) + if not cap.isOpened(): + return None + fps = float(cap.get(cv2.CAP_PROP_FPS)) or 0.0 + nfr = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + cap.release() + if fps > 0 and nfr > 0: + return nfr / fps + return None + + +def load_segments_from_excel_column_i( + excel_path: Path, + *, + col_index: int = 8, + sheet_name: int | str = 0, + video_path: Path | None = None, + default_score: float = 1.0, +) -> list[tuple[float, float, float]]: + """ + 从 Excel 指定列(默认 I 列 index=8)汇总所有行的时间段,返回 (start, end, score)。 + """ + excel_path = excel_path.resolve() + df = pd.read_excel(excel_path, sheet_name=sheet_name, header=0) + + if df.shape[1] > col_index: + time_series = df.iloc[:, col_index] + time_col_name = str(df.columns[col_index]) + else: + cand_cols = [c for c in df.columns if "时间段" in str(c)] + if not cand_cols: + raise ValueError( + f"Excel 列数不足且未找到含「时间段」的列: {excel_path} (cols={df.shape[1]})" + ) + time_col_name = str(cand_cols[0]) + time_series = df[time_col_name] + + duration = _video_duration_sec(video_path) + raw_pairs: list[tuple[float, float]] = [] + invalid_cnt = 0 + + for cell in time_series.tolist(): + segs = parse_cell_to_segments_v2(cell) + for s, e in segs: + cs, ce = s, e + if duration is not None: + cs = max(0.0, min(s, duration)) + ce = max(0.0, min(e, duration)) + if ce <= cs: + invalid_cnt += 1 + continue + raw_pairs.append((cs, ce)) + + raw_pairs.sort(key=lambda x: (x[0], x[1])) + segs_out = [(s, e, float(default_score)) for s, e in raw_pairs] + + log( + f"[debug] Excel 时间段列「{time_col_name}」(index={col_index}) " + f"→ {len(segs_out)} 段" + + (f",丢弃无效 {invalid_cnt} 段" if invalid_cnt else "") + ) + if duration is not None: + log(f"[debug] 视频时长 {duration:.3f}s,段已裁剪到 [0, duration]") + + return segs_out diff --git a/src/orchestrator.py b/src/orchestrator.py new file mode 100644 index 0000000..dc25b8b --- /dev/null +++ b/src/orchestrator.py @@ -0,0 +1,532 @@ +"""主流程编排:与仓库 main_pipeline.PipelineManager 逻辑一致,参数来自 YAML(SimpleNamespace)。""" +from __future__ import annotations + +import importlib.util +import tempfile +from argparse import Namespace +from pathlib import Path +from typing import Any + +import cv2 +import run_haocai_actionformer_consumables_e2e as e2e +from actionformer_utils import ActionSegmenter +from excel_segments import load_segments_from_excel_column_i +from pipeline.hand_roi_merge import HandMergeConfig, HandRoiGrouper +from pipeline.segment_processor import ( + HaocaiOnlyClassifier, + process_segment_haocai_from_cap_with_gate_retries, +) +from pipeline.gap_adjacent_merge import merge_all_by_gap +from pipeline.tear_gate_merge import ( + merge_all, + parse_e2e_rows_from_body_lines, + tear_class_index, +) +from run_segments_consumable_vote import pad_box_bottom_only as _pad_box +from ultralytics import YOLO + +from basket_segmenter import build_segments_from_basket +from pack_utils import load_allowed_names_from_excel, log, resolve_allowed_class_idx +from stream_orchestrator import _haocai_infer_kwargs + + +def _load_doctor_module(script_path: Path) -> Any: + spec = importlib.util.spec_from_file_location("doctor_identity_runtime", script_path) + if spec is None or spec.loader is None: + raise RuntimeError(f"无法加载医生识别脚本: {script_path}") + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +def _infer_doctor_text(args: Namespace, video_path: Path) -> str: + if not bool(getattr(args, "doctor_identity_enabled", True)): + return "未启用" + + checkpoint = Path(args.doctor_identity_checkpoint).resolve() + labels_csv = Path(args.doctor_identity_labels_csv).resolve() + if not checkpoint.is_file(): + return f"识别失败(缺少权重: {checkpoint})" + if not labels_csv.is_file(): + return f"识别失败(缺少标签映射: {labels_csv})" + + pack_root = Path(__file__).resolve().parent.parent + script_path = pack_root / "doctor_identity_package" / "infer_doctor_from_video.py" + if not script_path.is_file(): + return f"识别失败(缺少脚本: {script_path})" + + try: + doctor_mod = _load_doctor_module(script_path) + model_path = doctor_mod._ensure_pose_lite_model(script_path.parent / ".mediapipe_models") + opts = doctor_mod.PoseLandmarkerOptions( + base_options=doctor_mod.BaseOptions(model_asset_path=str(model_path)), + running_mode=doctor_mod.VisionRunningMode.IMAGE, + min_pose_detection_confidence=float( + args.doctor_identity_pose_min_detection_confidence + ), + ) + landmarker = doctor_mod.PoseLandmarker.create_from_options(opts) + try: + best_crop = doctor_mod.pick_best_person_crop( + video_path=video_path, + landmarker=landmarker, + middle_seconds=float(args.doctor_identity_middle_seconds), + sample_fps=float(args.doctor_identity_sample_fps), + pad_frac=float(args.doctor_identity_pad_frac), + ) + finally: + landmarker.close() + + raw_pid, conf = doctor_mod.run_inference(best_crop, checkpoint) + min_conf = float(args.doctor_identity_min_identity_confidence) + name_map = doctor_mod.load_name_mapping(labels_csv) + doctor_name = name_map.get(str(raw_pid), "") + suffix = " [低置信度]" if conf < min_conf else "" + if doctor_name: + return f"{doctor_name} (id={raw_pid}, conf={conf:.4f}){suffix}" + return f"doctor_id={raw_pid} (conf={conf:.4f}){suffix}" + except Exception as exc: # noqa: BLE001 + return f"识别失败({exc})" + + +def _resolve_allowed_names(args: Namespace, excel_path: Path) -> list[str] | None: + if not getattr(args, "use_whitelist", True): + return [] + if args.whitelist_json is not None: + if not args.whitelist_json.is_file(): + log(f"找不到白名单 JSON: {args.whitelist_json}") + return None + return e2e.load_whitelist_json(args.whitelist_json.resolve()) + return load_allowed_names_from_excel(excel_path) + + +def _validate_phase2_weights(args: Namespace, *, require_actionformer: bool) -> bool: + checks: list[tuple[Any, str]] = [ + (args.hand_model, "手部检测"), + (args.goodbad_model, "好坏帧"), + (args.haocai_model, "耗材分类"), + ] + if require_actionformer: + checks.insert(0, (args.actionformer_ckpt, "ActionFormer ckpt")) + if getattr(args, "merge_adjacent_tear", False): + checks.append((args.tear_model, "撕膜分类")) + for p, lab in checks: + if p is None or not Path(p).is_file(): + log(f"缺少{lab}: {p}") + return False + if args.merge_adjacent_tear: + tmw = (args.tear_merge_weights or args.tear_model).resolve() + if not tmw.is_file(): + log(f"撕膜门控需要权重文件: {tmw}") + return False + return True + + +def _filter_segments_by_min_length( + segs: list[tuple[float, float, float]], min_seg_seconds: float +) -> list[tuple[float, float, float]]: + if min_seg_seconds <= 0: + return segs + return [(s, e, sc) for s, e, sc in segs if (e - s) >= min_seg_seconds - 1e-9] + + +class PipelineManager: + def __init__(self, args: Namespace) -> None: + self.args = args + + def run(self) -> int: + args = self.args + video_path = args.video.resolve() + if not video_path.is_file(): + log(f"找不到视频: {video_path}") + return 1 + excel_path = args.excel.resolve() + if not excel_path.is_file(): + log(f"找不到 Excel: {excel_path}") + return 1 + + allowed_names = _resolve_allowed_names(args, excel_path) + if allowed_names is None: + return 1 + if not _validate_phase2_weights(args, require_actionformer=True): + return 1 + + stem = video_path.stem + tmp_ctx: tempfile.TemporaryDirectory | None = None + if args.work_dir is not None: + work = Path(args.work_dir).resolve() + work.mkdir(parents=True, exist_ok=True) + elif args.keep_work_dir: + work = Path(tempfile.mkdtemp(prefix="main_pipeline_")) + log(f"工作目录(保留): {work}") + else: + tmp_ctx = tempfile.TemporaryDirectory(prefix="main_pipeline_") + work = Path(tmp_ctx.name) + + try: + product_map = e2e.load_product_code_map(excel_path) + segs = ActionSegmenter.build_segments( + video_path=video_path, + stem=stem, + work=work, + actionformer_ckpt=args.actionformer_ckpt, + af_min_score=args.af_min_score, + af_min_seg_seconds=args.af_min_seg_seconds, + python_exe=args.python, + feat_batch_size=args.feat_batch_size, + device=args.device, + ) + return self._run_phase2_and_write( + segs, + video_path=video_path, + excel_path=excel_path, + allowed_names=allowed_names, + product_map=product_map, + work_dir_log=work if args.work_dir is not None or args.keep_work_dir else None, + ) + finally: + if tmp_ctx is not None: + tmp_ctx.cleanup() + + def _run_phase2_and_write( + self, + segs: list[tuple[float, float, float]], + *, + video_path: Path, + excel_path: Path, + allowed_names: list[str], + product_map: dict[str, str], + work_dir_log: Path | None = None, + ) -> int: + args = self.args + + predict_kw: dict[str, Any] = {"device": args.device} + if args.half: + predict_kw["half"] = True + + log("Phase2:加载 YOLO(手 / 好坏帧 / 耗材)…") + det = YOLO(str(args.hand_model)) + gb = YOLO(str(args.goodbad_model)) + cls_m = YOLO(str(args.haocai_model)) + + cls_names = cls_m.names + hc = HaocaiOnlyClassifier( + cls_m, + cls_names=cls_names, + imgsz_cls=int(args.imgsz_cls), + predict_kw=predict_kw, + gb=gb, + gb_names=gb.names, + ) + infer_kw = _haocai_infer_kwargs(args, cls_names, None, predict_kw) + + try: + allowed_idx = resolve_allowed_class_idx(args, excel_path, cls_names) + except FileNotFoundError as exc: + log(str(exc)) + return 1 + infer_kw["allowed_class_idx"] = allowed_idx + if getattr(args, "use_whitelist", True): + log(f"白名单启用,{len(allowed_idx or ())} 个类参与投票") + else: + log("白名单已关闭,使用全 41 类") + + cap = cv2.VideoCapture(str(video_path)) + if not cap.isOpened(): + log("无法打开视频") + return 1 + + sep = "\t" + base_cols = [ + "rank", + "start_sec", + "end_sec", + "product_id_top1", + "top1_name", + "top1_conf", + "product_id_top2", + "top2_name", + "top2_conf", + "product_id_top3", + "top3_name", + "top3_conf", + ] + ext_cols = ["tear_top1_name", "tear_top2_name"] + header = sep.join(base_cols if args.legacy_12_col_only else base_cols + ext_cols) + lines_out = [header] + span_to_cells: dict[tuple[float, float], list[str]] = {} + span_to_pairs: dict[tuple[float, float], list[tuple[str, float]]] = {} + + def span_key(t0: float, t1: float) -> tuple[float, float]: + return (round(float(t0), 6), round(float(t1), 6)) + + def infer_one(rank: int, t0: float, t1: float) -> str: + info = process_segment_haocai_from_cap_with_gate_retries( + cap, + det, + hc, + start_sec=t0, + end_sec=t1, + seek_margin_sec=float(args.seek_margin_sec), + log_fn=log, + log_prefix=f"段落 rank={rank}: ", + **infer_kw, + ) + if not info.get("ok"): + reason = str(info.get("reason", "")) + span_to_pairs[span_key(t0, t1)] = [] + row = [ + str(rank), + f"{t0:.6f}", + f"{t1:.6f}", + "", + reason, + "", + "", + "", + "", + "", + "", + "", + ] + if not args.legacy_12_col_only: + row.extend(["", ""]) + span_to_cells[span_key(t0, t1)] = row[1:] + return sep.join(row) + + n1, n2, n3 = info["top_names"] + c1, c2, c3 = info["top_confs"] + id1 = product_map.get(n1, "") if n1 else "" + id2 = product_map.get(n2, "") if n2 else "" + id3 = product_map.get(n3, "") if n3 else "" + for nm, pid in ((n1, id1), (n2, id2), (n3, id3)): + if nm and not pid: + log(f"警告: 商品表无名称「{nm}」,产品编码置空。") + + row = [ + str(rank), + f"{t0:.6f}", + f"{t1:.6f}", + id1, + n1, + f"{c1:.6f}" if n1 else "", + id2, + n2, + f"{c2:.6f}" if n2 else "", + id3, + n3, + f"{c3:.6f}" if n3 else "", + ] + if not args.legacy_12_col_only: + row.extend(["", ""]) + span_to_cells[span_key(t0, t1)] = row[1:] + span_to_pairs[span_key(t0, t1)] = list(info.get("pairs") or []) + return sep.join(row) + + try: + for rank, (t0, t1, af_sc) in enumerate(segs, start=1): + log(f"段落 rank={rank} [{t0:.3f},{t1:.3f}] score={af_sc:.4f} …") + lines_out.append(infer_one(rank, t0, t1)) + + if args.merge_adjacent_tear: + log("撕膜门控:合并相邻同 top1 成功段…") + if args.tear_model is None or not Path(args.tear_model).is_file(): + log(f"缺少撕膜分类权重,跳过 tear_merge: {args.tear_model}") + else: + tw_path = (args.tear_merge_weights or args.tear_model).resolve() + tear_gate_m = YOLO(str(tw_path)) + tidx = tear_class_index(tear_gate_m, args.tear_merge_class) + merge_cfg = HandMergeConfig( + merge_iou_gt=args.merge_iou_gt, + merge_center_dist_max_px=args.merge_center_dist_max_px, + merge_center_dist_max_frac_diag=args.merge_center_dist_max_frac_diag, + ) + grouper = HandRoiGrouper( + merge_cfg, pad_box_fn=_pad_box, pad_ratio=args.pad_ratio + ) + body_lines = lines_out[1:] + e2e_rows = parse_e2e_rows_from_body_lines(body_lines) + mg_det = det if not args.tear_merge_full_frame else None + mg_grouper = grouper if not args.tear_merge_full_frame else None + merged_rows = merge_all( + e2e_rows, + cap, + tear_gate_m, + tidx, + head_sec=float(args.tear_merge_head_sec), + tear_prob=float(args.tear_merge_prob), + tear_min_frames=int(args.tear_merge_min_frames), + imgsz=int(args.imgsz_cls), + predict_kw=predict_kw, + verbose=bool(args.tear_merge_verbose), + det=mg_det, + grouper=mg_grouper, + imgsz_det=int(args.imgsz_det), + det_conf=float(args.det_conf), + ) + lines_out = [header] + for j, er in enumerate(merged_rows, start=1): + sk = span_key(er.start_sec, er.end_sec) + if sk in span_to_cells: + lines_out.append(sep.join([str(j)] + span_to_cells[sk])) + else: + log( + f"[tear_merge] 合并窗段全量重推理 rank={j} " + f"[{er.start_sec:.3f},{er.end_sec:.3f}]" + ) + lines_out.append(infer_one(j, er.start_sec, er.end_sec)) + + if getattr(args, "gap_merge_enabled", False): + log("相邻 gap 合并…") + body_lines = lines_out[1:] + e2e_rows = parse_e2e_rows_from_body_lines(body_lines) + gap_merged = merge_all_by_gap( + e2e_rows, + span_to_pairs, + product_map, + max_gap_sec=float(args.gap_merge_max_gap_sec), + log_fn=log, + ) + lines_out = [header] + for er in gap_merged: + lines_out.append(er.to_line12(er.rank)) + finally: + cap.release() + + log("医生识别:开始执行…") + doctor_text = _infer_doctor_text(args, video_path) + log(f"医生识别:{doctor_text}") + lines_out.append(f"医生信息:{doctor_text}") + + args.out.parent.mkdir(parents=True, exist_ok=True) + args.out.write_text("\n".join(lines_out) + "\n", encoding="utf-8") + log(f"已写出: {args.out.resolve()}") + if work_dir_log is not None: + log(f"工作目录: {work_dir_log}") + + return 0 + + +class DebugPipelineManager(PipelineManager): + """跳过 ActionFormer,用 Excel 时间段列作为段列表。""" + + def run(self) -> int: + args = self.args + video_path = args.video.resolve() + if not video_path.is_file(): + log(f"找不到视频: {video_path}") + return 1 + excel_path = args.excel.resolve() + if not excel_path.is_file(): + log(f"找不到 Excel: {excel_path}") + return 1 + + log("[debug] 使用 Excel 时间段,跳过 ActionFormer") + args.merge_adjacent_tear = False + log("[debug] 跳过撕膜相邻段合并(merge_adjacent_tear=false)") + + allowed_names = _resolve_allowed_names(args, excel_path) + if allowed_names is None: + return 1 + if not _validate_phase2_weights(args, require_actionformer=False): + return 1 + + col_index = int(getattr(args, "excel_time_col_index", 8)) + segs = load_segments_from_excel_column_i( + excel_path, + col_index=col_index, + video_path=video_path, + ) + if not segs: + log("Excel 未解析到任何有效时间段") + return 1 + + min_seg = float(getattr(args, "af_min_seg_seconds", 0.0)) + segs = _filter_segments_by_min_length(segs, min_seg) + if not segs: + log(f"最短段过滤(>={min_seg:g}s)后无剩余段") + return 1 + + product_map = e2e.load_product_code_map(excel_path) + return self._run_phase2_and_write( + segs, + video_path=video_path, + excel_path=excel_path, + allowed_names=allowed_names, + product_map=product_map, + ) + + +class BasketPipelineManager(PipelineManager): + """跳过 ActionFormer:OpenCV 框选篮子 + 手篮接触上升沿 → 固定窗口段。""" + + def run(self) -> int: + args = self.args + video_path = args.video.resolve() + if not video_path.is_file(): + log(f"找不到视频: {video_path}") + return 1 + excel_path = args.excel.resolve() + if not excel_path.is_file(): + log(f"找不到 Excel: {excel_path}") + return 1 + + log("[basket] 使用篮子接触分段,跳过 ActionFormer") + args.merge_adjacent_tear = False + log("[basket] 跳过撕膜相邻段合并(merge_adjacent_tear=false)") + + allowed_names = _resolve_allowed_names(args, excel_path) + if allowed_names is None: + return 1 + if not _validate_phase2_weights(args, require_actionformer=False): + return 1 + + save_json = getattr(args, "basket_save_roi_json", None) + + segs, _roi = build_segments_from_basket( + video_path, + Path(args.hand_model), + basket_roi_json=None, + save_roi_json=Path(save_json) if save_json else None, + skip_roi_select=False, + roi_frame=getattr(args, "basket_roi_frame", "middle"), + roi_backend=str(getattr(args, "basket_roi_backend", "tkinter")), + contact_iou_threshold=float(getattr(args, "basket_contact_iou_threshold", 0.05)), + contact_iou_on=float(getattr(args, "basket_contact_iou_on", 0.08)), + contact_iou_off=float(getattr(args, "basket_contact_iou_off", 0.03)), + confirm_seconds=float(getattr(args, "basket_confirm_seconds", 0.4)), + cooldown_seconds=float(getattr(args, "basket_cooldown_seconds", 5.0)), + segment_start_offset_sec=float(getattr(args, "basket_segment_start_offset_sec", 1.0)), + segment_end_offset_sec=float(getattr(args, "basket_segment_end_offset_sec", 5.0)), + min_segment_sec=float(getattr(args, "basket_min_segment_sec", 4.0)), + scan_frame_stride=int(getattr(args, "basket_scan_frame_stride", 1)), + det_conf=float(getattr(args, "basket_det_conf", args.det_conf)), + imgsz_det=int(args.imgsz_det), + device=str(args.device), + half=bool(args.half), + log_fn=log, + ) + if not segs: + log("未检测到任何手篮接触上升沿,退出") + return 1 + + product_map = e2e.load_product_code_map(excel_path) + return self._run_phase2_and_write( + segs, + video_path=video_path, + excel_path=excel_path, + allowed_names=allowed_names, + product_map=product_map, + ) + + +def run_pipeline(args: Namespace) -> int: + return PipelineManager(args).run() + + +def run_debug_pipeline(args: Namespace) -> int: + return DebugPipelineManager(args).run() + + +def run_basket_pipeline(args: Namespace) -> int: + return BasketPipelineManager(args).run() diff --git a/src/pack_utils.py b/src/pack_utils.py new file mode 100644 index 0000000..91feec5 --- /dev/null +++ b/src/pack_utils.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +import time +from argparse import Namespace +from pathlib import Path + + +def log(msg: str) -> None: + print(f"[{time.strftime('%H:%M:%S')}] {msg}", flush=True) + + +def resolve_allowed_class_idx( + args: Namespace, + excel_path: Path, + cls_names: dict, +) -> frozenset[int] | None: + """None 表示不裁剪类别(全类参与投票)。""" + if not getattr(args, "use_whitelist", True): + return None + import run_haocai_actionformer_consumables_e2e as e2e + + if args.whitelist_json is not None: + wpath = Path(args.whitelist_json) + if not wpath.is_file(): + raise FileNotFoundError(f"找不到白名单 JSON: {wpath}") + allowed_names = e2e.load_whitelist_json(wpath.resolve()) + else: + allowed_names = load_allowed_names_from_excel(excel_path) + return e2e.allowed_indices_from_json_names(allowed_names, cls_names) + + +def load_allowed_names_from_excel(excel_path: Path) -> list[str]: + import pandas as pd + + df = pd.read_excel(excel_path, sheet_name=0, header=0) + if df.shape[1] < 3: + raise ValueError(f"Excel 至少需要 C 列(第 3 列): {excel_path}") + col = df.iloc[:, 2] + names: list[str] = [] + seen: set[str] = set() + for raw in col: + if pd.isna(raw): + continue + s = str(raw).strip() + if not s or s == "商品名称": + continue + if s not in seen: + seen.add(s) + names.append(s) + return names diff --git a/src/paths.py b/src/paths.py new file mode 100644 index 0000000..122866d --- /dev/null +++ b/src/paths.py @@ -0,0 +1,23 @@ +"""pack/5.11:将 vendor code 根目录加入 sys.path(顺序与 main_pipeline 一致)。""" +from __future__ import annotations + +import sys +from pathlib import Path + + +def ensure_code_on_path(pack_root: Path) -> Path: + """ + pack_root: pack/5.11 根目录。 + 返回 CODE_ROOT(即 pack_root / 'code')。 + """ + code = (pack_root / "code").resolve() + if not (code / "repo_root.py").is_file(): + raise FileNotFoundError(f"缺少 vendor code 根: {code}") + + scripts = code / "video_clip_cls" / "scripts" + infer = code / "video_clip_cls" / "infer_single_0506" + for p in (infer, scripts, code): + s = str(p) + if s not in sys.path: + sys.path.insert(0, s) + return code diff --git a/src/segments_offline_orchestrator.py b/src/segments_offline_orchestrator.py new file mode 100644 index 0000000..a6a58e7 --- /dev/null +++ b/src/segments_offline_orchestrator.py @@ -0,0 +1,159 @@ +"""按 TSV 时间段对离线视频做手检 → 好帧门控 → 耗材识别(无分段、无撕膜)。""" +from __future__ import annotations + +import gc +from argparse import Namespace +from pathlib import Path +from typing import Any + +import cv2 +import run_haocai_actionformer_consumables_e2e as e2e +from pipeline.segment_processor import ( + HaocaiOnlyClassifier, + process_segment_haocai_from_cap_with_gate_retries, +) +from ultralytics import YOLO + +from pack_utils import log, resolve_allowed_class_idx +from stream_orchestrator import ( + _format_result_row, + _maybe_free_gpu, + _resolve_haocai_min_conf_retry, +) +from tsv_segments import load_segments_from_result_tsv + + +def _validate_haocai_weights(args: Namespace) -> bool: + for p, lab in ( + (args.hand_model, "手部检测"), + (args.goodbad_model, "好坏帧"), + (args.haocai_model, "耗材分类"), + ): + if not Path(p).is_file(): + log(f"缺少{lab}: {p}") + return False + return True + + +def run_segments_offline_pipeline(args: Namespace) -> int: + video_path = Path(args.video).resolve() + if not video_path.is_file(): + log(f"找不到视频: {video_path}") + return 1 + + excel_path = Path(args.excel).resolve() + if not excel_path.is_file(): + log(f"找不到 Excel: {excel_path}") + return 1 + + tsv_path = Path(args.segments_tsv).resolve() + if not tsv_path.is_file(): + log(f"找不到时间段 TSV: {tsv_path}") + return 1 + + if not _validate_haocai_weights(args): + return 1 + + segs = load_segments_from_result_tsv( + tsv_path, + skip_empty_top1=bool(getattr(args, "segments_skip_empty", False)), + ) + if not segs: + log("TSV 未解析到任何有效时间段") + return 1 + + product_map = e2e.load_product_code_map(excel_path) + out_path = Path(args.out).resolve() + out_path.parent.mkdir(parents=True, exist_ok=True) + + predict_kw: dict[str, Any] = {"device": args.device} + if args.half: + predict_kw["half"] = True + + log("[segments-offline] 加载 YOLO(手 / 好坏帧 / 耗材)…") + det = YOLO(str(args.hand_model)) + gb = YOLO(str(args.goodbad_model)) + cls_m = YOLO(str(args.haocai_model)) + hc = HaocaiOnlyClassifier( + cls_m, + cls_names=cls_m.names, + imgsz_cls=int(args.imgsz_cls), + predict_kw=predict_kw, + gb=gb, + gb_names=gb.names, + ) + try: + allowed_idx = resolve_allowed_class_idx(args, excel_path, cls_m.names) + except FileNotFoundError as exc: + log(str(exc)) + return 1 + if getattr(args, "use_whitelist", True): + log(f"[segments-offline] 白名单启用,{len(allowed_idx or ())} 个类参与投票") + else: + log("[segments-offline] 白名单已关闭,使用全 41 类") + + cap = cv2.VideoCapture(str(video_path)) + if not cap.isOpened(): + log("无法打开视频") + return 1 + + header = "\t".join( + [ + "rank", + "start_sec", + "end_sec", + "product_id_top1", + "top1_name", + "top1_conf", + "product_id_top2", + "top2_name", + "top2_conf", + "product_id_top3", + "top3_name", + "top3_conf", + ] + ) + lines_out = [header] + + try: + for rank, (t0, t1, _sc) in enumerate(segs, start=1): + log(f"[segments-offline] rank={rank} [{t0:.3f},{t1:.3f}] …") + info = process_segment_haocai_from_cap_with_gate_retries( + cap, + det, + hc, + start_sec=t0, + end_sec=t1, + seek_margin_sec=float(args.seek_margin_sec), + det_conf=float(args.det_conf), + pad_ratio=float(args.pad_ratio), + imgsz_det=int(args.imgsz_det), + frame_stride=max(1, int(args.frame_stride)), + haocai_min_conf=float(args.haocai_min_conf), + haocai_min_conf_retry=_resolve_haocai_min_conf_retry(args), + good_top1_conf_threshold=float(args.good_top1_conf_threshold), + good_top1_retry_threshold=float(args.good_top1_retry_threshold), + cls_names=cls_m.names, + allowed_class_idx=allowed_idx, + predict_kw=predict_kw, + log_fn=log, + log_prefix=f"[segments-offline] rank={rank}: ", + ) + lines_out.append( + _format_result_row( + rank, + t0, + t1, + info, + product_map, + legacy_12_col=bool(args.legacy_12_col_only), + ) + ) + _maybe_free_gpu() + finally: + cap.release() + gc.collect() + + out_path.write_text("\n".join(lines_out) + "\n", encoding="utf-8") + log(f"[segments-offline] 完成,共 {len(segs)} 段,结果: {out_path}") + return 0 diff --git a/src/stream_basket_session.py b/src/stream_basket_session.py new file mode 100644 index 0000000..e7cac85 --- /dev/null +++ b/src/stream_basket_session.py @@ -0,0 +1,149 @@ +"""推流篮子会话:逐帧手部检测 + ActionTriggerLogic + 待识别片段队列。""" +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, Callable + +import numpy as np +from ultralytics import YOLO + +from action_trigger_logic import ActionTriggerLogic +from run_segments_consumable_vote import collect_hand_boxes +from stream_frame_buffer import FrameRingBuffer + + +@dataclass +class CachedClip: + """收满窗口后可送耗材识别的片段(已解码帧,识别后应显式释放)。""" + + contact_t: float + start_sec: float + end_sec: float + frames: list[tuple[float, np.ndarray]] + + @property + def duration_sec(self) -> float: + return max(0.0, self.end_sec - self.start_sec) + + +@dataclass +class _PendingClip: + contact_t: float + start_sec: float + end_sec: float + + +class StreamBasketSession: + """ + 每帧 push_frame: + 1. 手部检测 + ActionTriggerLogic → 可选 start + 2. 写入环形缓存 + 3. poll_ready_clips 返回已收满 [start+off0, start+off1] 的片段 + """ + + def __init__( + self, + basket_xyxy: list[float], + hand_model: YOLO, + trigger: ActionTriggerLogic, + *, + segment_start_offset_sec: float = 1.0, + segment_end_offset_sec: float = 6.0, + min_segment_sec: float = 4.0, + ring_buffer_sec: float = 10.0, + fps: float = 25.0, + cache_max_width: int = 1920, + jpeg_quality: int = 85, + det_conf: float = 0.6, + imgsz_det: int = 640, + predict_kw: dict[str, Any] | None = None, + log_fn: Callable[[str], None] | None = None, + ) -> None: + self.basket_xyxy = [float(v) for v in basket_xyxy] + self.hand_model = hand_model + self.trigger = trigger + self.segment_start_offset = float(segment_start_offset_sec) + self.segment_end_offset = float(segment_end_offset_sec) + self.min_segment_sec = float(min_segment_sec) + self.det_conf = float(det_conf) + self.imgsz_det = int(imgsz_det) + self.predict_kw = dict(predict_kw or {}) + self.log_fn = log_fn + + self.buffer = FrameRingBuffer( + max_seconds=ring_buffer_sec, + fps=fps, + cache_max_width=cache_max_width, + jpeg_quality=jpeg_quality, + ) + self._pending: list[_PendingClip] = [] + self._current_t = 0.0 + + def push_frame(self, t_sec: float, frame: np.ndarray) -> float | None: + """处理一帧;若触发 start 返回 contact 时间戳。""" + t = float(t_sec) + self._current_t = t + + r0 = self.hand_model.predict( + frame, + conf=self.det_conf, + imgsz=self.imgsz_det, + verbose=False, + **self.predict_kw, + )[0] + hands = collect_hand_boxes(self.hand_model, r0.boxes) if r0.boxes else [] + start_t = self.trigger.process_frame(t, hands, self.basket_xyxy) + + self.buffer.append(t, frame) + self.buffer.prune_before(t - self.buffer.max_seconds) + + if start_t is not None: + contact = float(start_t) + seg0 = contact + self.segment_start_offset + seg1 = contact + self.segment_end_offset + self._pending.append( + _PendingClip(contact_t=contact, start_sec=seg0, end_sec=seg1) + ) + if self.log_fn: + self.log_fn(f"[stream] 接触上升沿 t={contact:.3f}s → 窗口 [{seg0:.3f}, {seg1:.3f}]s") + return contact + + return None + + def poll_ready_clips(self) -> list[CachedClip]: + """返回当前时刻已收满窗口、且满足最小时长的片段。""" + ready: list[CachedClip] = [] + still_pending: list[_PendingClip] = [] + + for pc in self._pending: + if self._current_t + 1e-6 < pc.end_sec: + still_pending.append(pc) + continue + + frames = self.buffer.slice_decoded(pc.start_sec, pc.end_sec) + duration = pc.end_sec - pc.start_sec + if duration + 1e-9 < self.min_segment_sec: + if self.log_fn: + self.log_fn( + f"[stream] 丢弃短段 [{pc.start_sec:.3f},{pc.end_sec:.3f}] " + f"时长 {duration:.3f}s < {self.min_segment_sec:g}s" + ) + continue + if not frames: + if self.log_fn: + self.log_fn( + f"[stream] 丢弃空段 [{pc.start_sec:.3f},{pc.end_sec:.3f}](缓存无帧)" + ) + continue + + ready.append( + CachedClip( + contact_t=pc.contact_t, + start_sec=pc.start_sec, + end_sec=pc.end_sec, + frames=frames, + ) + ) + + self._pending = still_pending + return ready diff --git a/src/stream_frame_buffer.py b/src/stream_frame_buffer.py new file mode 100644 index 0000000..ac30823 --- /dev/null +++ b/src/stream_frame_buffer.py @@ -0,0 +1,99 @@ +"""推流帧环形缓存:JPEG 压缩存储,按时间戳截取片段。""" +from __future__ import annotations + +from collections import deque +from dataclasses import dataclass + +import cv2 +import numpy as np + + +def encode_frame_for_cache( + frame: np.ndarray, + *, + max_width: int = 1920, + jpeg_quality: int = 85, +) -> bytes: + """缩放到 max_width 以内后 JPEG 编码,显著降低 4K 推流内存占用。""" + img = frame + h, w = img.shape[:2] + mw = int(max_width) + if mw > 0 and w > mw: + scale = mw / float(w) + img = cv2.resize( + img, + (int(round(w * scale)), int(round(h * scale))), + interpolation=cv2.INTER_AREA, + ) + ok, buf = cv2.imencode(".jpg", img, [int(cv2.IMWRITE_JPEG_QUALITY), int(jpeg_quality)]) + if not ok: + raise RuntimeError("JPEG 编码失败") + return buf.tobytes() + + +def decode_cached_frame(data: bytes) -> np.ndarray: + arr = np.frombuffer(data, dtype=np.uint8) + img = cv2.imdecode(arr, cv2.IMREAD_COLOR) + if img is None: + raise RuntimeError("JPEG 解码失败") + return img + + +@dataclass(frozen=True) +class TimestampedFrame: + t_sec: float + frame_jpeg: bytes + + +class FrameRingBuffer: + """ + 保留最近 max_seconds 内的 (t, jpeg)。 + 5s 窗口 @25fps 1280 宽 JPEG 约 25~50MB,远小于 4K 原始 BGR 数 GB。 + """ + + def __init__( + self, + *, + max_seconds: float = 10.0, + fps: float = 25.0, + cache_max_width: int = 1280, + jpeg_quality: int = 85, + ) -> None: + self.max_seconds = max(1.0, float(max_seconds)) + self.fps = max(1.0, float(fps)) + self.cache_max_width = int(cache_max_width) + self.jpeg_quality = int(jpeg_quality) + cap = max(32, int(round(self.max_seconds * self.fps)) + 8) + self._items: deque[TimestampedFrame] = deque(maxlen=cap) + self._latest_t = 0.0 + + @property + def latest_t(self) -> float: + return self._latest_t + + def append(self, t_sec: float, frame: np.ndarray) -> None: + t = float(t_sec) + self._latest_t = t + jpeg = encode_frame_for_cache( + frame, + max_width=self.cache_max_width, + jpeg_quality=self.jpeg_quality, + ) + self._items.append(TimestampedFrame(t_sec=t, frame_jpeg=jpeg)) + + def prune_before(self, t_min: float) -> None: + cutoff = float(t_min) + while self._items and self._items[0].t_sec < cutoff - 1e-9: + self._items.popleft() + + def slice_decoded(self, t0: float, t1: float) -> list[tuple[float, np.ndarray]]: + """返回 t0 <= t <= t1 的解码帧(按需解码,用完即弃)。""" + lo = float(t0) + hi = float(t1) + if hi < lo: + lo, hi = hi, lo + out: list[tuple[float, np.ndarray]] = [] + for it in self._items: + if lo - 1e-6 <= it.t_sec <= hi + 1e-6: + out.append((it.t_sec, decode_cached_frame(it.frame_jpeg))) + return out diff --git a/src/stream_orchestrator.py b/src/stream_orchestrator.py new file mode 100644 index 0000000..1424b26 --- /dev/null +++ b/src/stream_orchestrator.py @@ -0,0 +1,462 @@ +"""RTSP 推流篮子耗材识别编排(无撕膜模型 / 无 tear_merge)。""" +from __future__ import annotations + +import gc +import time +from argparse import Namespace +from pathlib import Path +from typing import Any + +import cv2 +import run_haocai_actionformer_consumables_e2e as e2e +from action_trigger_logic import ActionTriggerLogic +from pipeline.segment_processor import ( + HaocaiOnlyClassifier, + process_segment_haocai_from_cap_with_gate_retries, + process_segment_haocai_from_frames_with_gate_retries, +) +from ultralytics import YOLO + +from basket_segmenter import ( + _roi_xyxy_from_select, + _scale_frame_for_display, + _select_basket_roi_tkinter, + save_basket_roi_json, +) +from pack_utils import log, resolve_allowed_class_idx +from stream_basket_session import CachedClip, StreamBasketSession + + +def _validate_stream_weights(args: Namespace) -> bool: + for p, lab in ( + (args.hand_model, "手部检测"), + (args.goodbad_model, "好坏帧"), + (args.haocai_model, "耗材分类"), + ): + if not Path(p).is_file(): + log(f"缺少{lab}: {p}") + return False + return True + + +def _resolve_basket_roi( + args: Namespace, + first_frame, + *, + t_sec: float = 0.0, +) -> list[float]: + backend = str(getattr(args, "basket_roi_backend", "tkinter")).strip().lower() + if backend != "tkinter": + log(f"[stream] 推流框选暂仅支持 tkinter,当前 {backend!r} 将回退 tkinter") + disp, scale = _scale_frame_for_display(first_frame, 1920) + log("[stream] 请在弹窗中框选篮子 ROI…") + rx, ry, rw, rh = _select_basket_roi_tkinter( + disp, t_sec=t_sec, title="框选耗材篮子(推流)" + ) + if scale != 1.0: + rx, ry, rw, rh = rx / scale, ry / scale, rw / scale, rh / scale + roi = _roi_xyxy_from_select(int(round(rx)), int(round(ry)), int(round(rw)), int(round(rh))) + log(f"[stream] 篮子 ROI xyxy={roi}") + + save_json = getattr(args, "basket_save_roi_json", None) + if save_json is not None: + save_basket_roi_json(Path(save_json), roi) + log(f"[stream] ROI 已保存: {save_json}") + return roi + + +def _format_result_row( + rank: int, + t0: float, + t1: float, + info: dict[str, Any], + product_map: dict[str, str], + *, + legacy_12_col: bool, +) -> str: + sep = "\t" + if not info.get("ok"): + reason = str(info.get("reason", "")) + row = [ + str(rank), + f"{t0:.6f}", + f"{t1:.6f}", + "", + reason, + "", + "", + "", + "", + "", + "", + "", + ] + if not legacy_12_col: + row.extend(["", ""]) + return sep.join(row) + + n1, n2, n3 = info["top_names"] + c1, c2, c3 = info["top_confs"] + id1 = product_map.get(n1, "") if n1 else "" + id2 = product_map.get(n2, "") if n2 else "" + id3 = product_map.get(n3, "") if n3 else "" + for nm, pid in ((n1, id1), (n2, id2), (n3, id3)): + if nm and not pid: + log(f"警告: 商品表无名称「{nm}」,产品编码置空。") + + row = [ + str(rank), + f"{t0:.6f}", + f"{t1:.6f}", + id1, + n1, + f"{c1:.6f}" if n1 else "", + id2, + n2, + f"{c2:.6f}" if n2 else "", + id3, + n3, + f"{c3:.6f}" if n3 else "", + ] + if not legacy_12_col: + row.extend(["", ""]) + return sep.join(row) + + +def _maybe_free_gpu() -> None: + gc.collect() + try: + import torch + + if torch.cuda.is_available(): + torch.cuda.empty_cache() + except ImportError: + pass + + +def _resolve_haocai_min_conf_retry(args: Namespace) -> float | None: + h_retry = getattr(args, "haocai_min_conf_retry", None) + if h_retry is None: + return None + h_retry = float(h_retry) + if h_retry <= 0: + return None + if h_retry >= float(args.haocai_min_conf) - 1e-12: + return None + return h_retry + + +def _haocai_infer_kwargs( + args: Namespace, + cls_names: dict, + allowed_idx: frozenset[int] | None, + predict_kw: dict[str, Any], +) -> dict[str, Any]: + return { + "det_conf": float(args.det_conf), + "pad_ratio": float(args.pad_ratio), + "imgsz_det": int(args.imgsz_det), + "frame_stride": max(1, int(args.frame_stride)), + "haocai_min_conf": float(args.haocai_min_conf), + "haocai_min_conf_retry": _resolve_haocai_min_conf_retry(args), + "good_top1_conf_threshold": float(args.good_top1_conf_threshold), + "good_top1_retry_threshold": float(args.good_top1_retry_threshold), + "cls_names": cls_names, + "allowed_class_idx": allowed_idx, + "predict_kw": predict_kw, + } + + +def _use_file_infer_for_stream(args: Namespace, *, is_file: bool) -> bool: + """本地可 seek 文件且 infer_source=file 时,段内识别回源 4K。""" + if not is_file: + return False + mode = str(getattr(args, "stream_infer_source", "file")).strip().lower() + return mode in ("file", "auto", "source") + + +def _infer_clip( + clip: CachedClip, + *, + det: YOLO, + hc: HaocaiOnlyClassifier, + cap: cv2.VideoCapture | None, + use_file_infer: bool, + args: Namespace, + cls_names: dict, + allowed_idx: frozenset[int] | None, + predict_kw: dict[str, Any], + rank: int | None = None, +) -> dict[str, Any]: + log_prefix = f"[stream] rank={rank}: " if rank is not None else "[stream] " + infer_kw = _haocai_infer_kwargs(args, cls_names, allowed_idx, predict_kw) + try: + if use_file_infer and cap is not None: + return process_segment_haocai_from_cap_with_gate_retries( + cap, + det, + hc, + start_sec=clip.start_sec, + end_sec=clip.end_sec, + seek_margin_sec=float(args.seek_margin_sec), + log_fn=log, + log_prefix=log_prefix, + **infer_kw, + ) + return process_segment_haocai_from_frames_with_gate_retries( + clip.frames, + det, + hc, + start_sec=clip.start_sec, + end_sec=clip.end_sec, + log_fn=log, + log_prefix=log_prefix, + **infer_kw, + ) + finally: + clip.frames.clear() + _maybe_free_gpu() + + +class StreamBasketOrchestrator: + def __init__(self, args: Namespace) -> None: + self.args = args + + def run(self) -> int: + args = self.args + source = str(getattr(args, "stream_rtsp", "") or getattr(args, "rtsp", "")).strip() + if not source: + log("缺少推流地址:--rtsp 或 yaml stream.rtsp") + return 1 + + excel_path = Path(args.excel).resolve() + if not excel_path.is_file(): + log(f"找不到 Excel: {excel_path}") + return 1 + if not _validate_stream_weights(args): + return 1 + + product_map = e2e.load_product_code_map(excel_path) + out_path = Path(args.out).resolve() + out_path.parent.mkdir(parents=True, exist_ok=True) + + predict_kw: dict[str, Any] = {"device": args.device} + if args.half: + predict_kw["half"] = True + + log("[stream] 加载 YOLO(手 / 好坏帧 / 耗材)…") + det = YOLO(str(args.hand_model)) + gb = YOLO(str(args.goodbad_model)) + cls_m = YOLO(str(args.haocai_model)) + cls_names = cls_m.names + hc = HaocaiOnlyClassifier( + cls_m, + cls_names=cls_names, + imgsz_cls=int(args.imgsz_cls), + predict_kw=predict_kw, + gb=gb, + gb_names=gb.names, + ) + try: + allowed_idx = resolve_allowed_class_idx(args, excel_path, cls_names) + except FileNotFoundError as exc: + log(str(exc)) + return 1 + if getattr(args, "use_whitelist", True): + log(f"[stream] 白名单启用,{len(allowed_idx or ())} 个类参与投票") + else: + log("[stream] 白名单已关闭,使用全 41 类") + + cap = cv2.VideoCapture(source) + if not cap.isOpened(): + log(f"[stream] 无法打开流: {source}") + return 1 + + is_file = Path(source).is_file() + use_file_infer = _use_file_infer_for_stream(args, is_file=is_file) + infer_cap: cv2.VideoCapture | None = None + if use_file_infer: + infer_cap = cv2.VideoCapture(source) + if not infer_cap.isOpened(): + log("[stream] 无法打开回源推理用 VideoCapture,回退 JPEG 缓存识别") + use_file_infer = False + infer_cap = None + fps = float(cap.get(cv2.CAP_PROP_FPS) or 0.0) + if fps <= 1e-3: + fps = float(getattr(args, "stream_fps", 25.0)) + + ok0, first = cap.read() + if not ok0 or first is None: + log("[stream] 无法读取首帧") + cap.release() + return 1 + + t0 = float(cap.get(cv2.CAP_PROP_POS_MSEC)) / 1000.0 if is_file else 0.0 + basket_roi = _resolve_basket_roi(args, first, t_sec=t0) + + seg_start_off = float( + getattr( + args, + "stream_segment_start_offset_sec", + getattr(args, "basket_segment_start_offset_sec", 1.0), + ) + ) + seg_end_off = float( + getattr( + args, + "stream_segment_end_offset_sec", + getattr(args, "basket_segment_end_offset_sec", 6.0), + ) + ) + ring_sec = float(getattr(args, "stream_ring_buffer_sec", 10.0)) + cache_max_w = int(getattr(args, "stream_cache_max_width", 1920)) + jpeg_q = int(getattr(args, "stream_jpeg_quality", 85)) + + trigger = ActionTriggerLogic( + fps=fps, + confirm_seconds=float(getattr(args, "basket_confirm_seconds", 0.12)), + cooldown_seconds=float(getattr(args, "basket_cooldown_seconds", 2.5)), + threshold_on=float(getattr(args, "basket_contact_iou_on", 0.04)), + threshold_off=float(getattr(args, "basket_contact_iou_off", 0.02)), + ) + + session = StreamBasketSession( + basket_roi, + det, + trigger, + segment_start_offset_sec=seg_start_off, + segment_end_offset_sec=seg_end_off, + min_segment_sec=float(getattr(args, "stream_min_segment_sec", 4.0)), + ring_buffer_sec=ring_sec, + fps=fps, + cache_max_width=cache_max_w, + jpeg_quality=jpeg_q, + det_conf=float(getattr(args, "basket_det_conf", args.det_conf)), + imgsz_det=int(args.imgsz_det), + predict_kw=predict_kw, + log_fn=log, + ) + + log( + f"[stream] 帧缓存: ring={ring_sec:g}s, jpeg≤{cache_max_w}px q={jpeg_q} " + f"(4K 原始帧不入缓存,防 OOM)" + ) + if use_file_infer: + log("[stream] 段内识别: 回源本地文件 4K(infer_source=file,与 TSV 离线一致)") + else: + fallback = str(getattr(args, "stream_infer_fallback", "cache")) + log(f"[stream] 段内识别: JPEG 缓存帧(infer_fallback={fallback})") + + header = "\t".join( + [ + "rank", + "start_sec", + "end_sec", + "product_id_top1", + "top1_name", + "top1_conf", + "product_id_top2", + "top2_name", + "top2_conf", + "product_id_top3", + "top3_name", + "top3_conf", + ] + ) + out_path.write_text(header + "\n", encoding="utf-8") + rank = 0 + frame_idx = 0 + + def process_ready() -> None: + nonlocal rank + for clip in session.poll_ready_clips(): + rank += 1 + log( + f"[stream] 识别 rank={rank} [{clip.start_sec:.3f},{clip.end_sec:.3f}] " + f"({len(clip.frames)} 帧)…" + ) + info = _infer_clip( + clip, + det=det, + hc=hc, + cap=infer_cap, + use_file_infer=use_file_infer, + args=args, + cls_names=cls_names, + allowed_idx=allowed_idx, + predict_kw=predict_kw, + rank=rank, + ) + line = _format_result_row( + rank, + clip.start_sec, + clip.end_sec, + info, + product_map, + legacy_12_col=bool(args.legacy_12_col_only), + ) + with out_path.open("a", encoding="utf-8") as f: + f.write(line + "\n") + log(f"[stream] rank={rank} 已写入") + + session.push_frame(t0, first) + process_ready() + frame_idx = 1 + + log(f"[stream] 开始读流: {source} (fps≈{fps:g})") + try: + while True: + ok, frame = cap.read() + if not ok or frame is None: + if is_file: + break + time.sleep(0.02) + continue + + if is_file: + t_sec = float(cap.get(cv2.CAP_PROP_POS_MSEC)) / 1000.0 + else: + t_sec = frame_idx / fps + + session.push_frame(t_sec, frame) + del frame + process_ready() + frame_idx += 1 + except KeyboardInterrupt: + log("[stream] 用户中断") + finally: + cap.release() + if infer_cap is not None: + infer_cap.release() + + for clip in session.poll_ready_clips(): + rank += 1 + info = _infer_clip( + clip, + det=det, + hc=hc, + cap=infer_cap, + use_file_infer=use_file_infer, + args=args, + cls_names=cls_names, + allowed_idx=allowed_idx, + predict_kw=predict_kw, + rank=rank, + ) + line = _format_result_row( + rank, + clip.start_sec, + clip.end_sec, + info, + product_map, + legacy_12_col=bool(args.legacy_12_col_only), + ) + with out_path.open("a", encoding="utf-8") as f: + f.write(line + "\n") + + log(f"[stream] 结束,共 {rank} 段,结果: {out_path}") + return 0 if rank > 0 or is_file else 0 + + +def run_stream_pipeline(args: Namespace) -> int: + return StreamBasketOrchestrator(args).run() diff --git a/src/tsv_segments.py b/src/tsv_segments.py new file mode 100644 index 0000000..5480708 --- /dev/null +++ b/src/tsv_segments.py @@ -0,0 +1,58 @@ +"""从推流/离线结果 TSV 加载时间段列表。""" +from __future__ import annotations + +from pathlib import Path + +from pack_utils import log + + +def load_segments_from_result_tsv( + tsv_path: Path, + *, + skip_empty_top1: bool = False, +) -> list[tuple[float, float, float]]: + """ + 解析 rank/start_sec/end_sec 列,返回 (start, end, score=1.0) 列表。 + skip_empty_top1: 跳过 top1_name 为空或为失败原因文案的行。 + """ + tsv_path = tsv_path.resolve() + text = tsv_path.read_text(encoding="utf-8") + lines = [ln for ln in text.splitlines() if ln.strip()] + if len(lines) < 2: + log(f"[segments] TSV 无数据行: {tsv_path}") + return [] + + header = lines[0].split("\t") + col = {name.strip(): i for i, name in enumerate(header)} + for req in ("start_sec", "end_sec"): + if req not in col: + raise ValueError(f"TSV 缺少列 {req!r}: {tsv_path}") + + top1_idx = col.get("top1_name") + segs: list[tuple[float, float, float]] = [] + skipped = 0 + + for ln in lines[1:]: + parts = ln.split("\t") + if len(parts) <= col["end_sec"]: + continue + try: + t0 = float(parts[col["start_sec"]].strip()) + t1 = float(parts[col["end_sec"]].strip()) + except ValueError: + skipped += 1 + continue + if t1 <= t0: + skipped += 1 + continue + if skip_empty_top1 and top1_idx is not None and len(parts) > top1_idx: + name = parts[top1_idx].strip() + if not name or name.startswith("("): + skipped += 1 + continue + segs.append((t0, t1, 1.0)) + + log(f"[segments] 从 TSV 加载 {len(segs)} 段: {tsv_path}") + if skipped: + log(f"[segments] 跳过无效/空行 {skipped} 条") + return segs diff --git a/tests/test_action_trigger_logic.py b/tests/test_action_trigger_logic.py new file mode 100644 index 0000000..f89a7b8 --- /dev/null +++ b/tests/test_action_trigger_logic.py @@ -0,0 +1,174 @@ +"""ActionTriggerLogic 合成 IoU 序列单元测试。""" +from __future__ import annotations + +import sys +import unittest +from pathlib import Path + +PACK_ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(PACK_ROOT / "src")) + +from paths import ensure_code_on_path # noqa: E402 + +ensure_code_on_path(PACK_ROOT) + +from action_trigger_logic import ( # noqa: E402 + ActionTriggerLogic, + max_hand_basket_iou, + resolve_contact_iou_thresholds, +) + + +class TestResolveContactIouThresholds(unittest.TestCase): + def test_legacy_threshold_derives_off(self) -> None: + on, off = resolve_contact_iou_thresholds(contact_iou_threshold=0.05) + self.assertAlmostEqual(on, 0.05) + self.assertAlmostEqual(off, 0.03) + + def test_explicit_on_off(self) -> None: + on, off = resolve_contact_iou_thresholds( + contact_iou_on=0.08, contact_iou_off=0.03 + ) + self.assertAlmostEqual(on, 0.08) + self.assertAlmostEqual(off, 0.03) + + +class TestActionTriggerLogic(unittest.TestCase): + BASKET = [0.0, 0.0, 100.0, 100.0] + + def _run_iou_sequence( + self, + ious: list[float], + *, + dt: float = 0.04, + **trigger_kw, + ) -> list[float]: + trigger = ActionTriggerLogic(fps=25, **trigger_kw) + events: list[float] = [] + for i, iou in enumerate(ious): + event_t = trigger.step_iou(i * dt, iou) + if event_t is not None: + events.append(event_t) + return events + + def test_oscillation_before_trigger_emits_once(self) -> None: + ious = [0.09, 0.02, 0.09, 0.02, 0.09, 0.09, 0.09, 0.09, 0.09, 0.09] + events = self._run_iou_sequence( + ious, + confirm_seconds=0.12, + cooldown_seconds=5.0, + threshold_on=0.08, + threshold_off=0.03, + ) + self.assertEqual(len(events), 1) + + def test_long_dwell_emits_once(self) -> None: + ious = [0.0] * 2 + [0.10] * 30 + events = self._run_iou_sequence( + ious, + confirm_seconds=0.12, + cooldown_seconds=5.0, + threshold_on=0.08, + threshold_off=0.03, + ) + self.assertEqual(len(events), 1) + + def test_cooldown_suppresses_second_event(self) -> None: + ious = ( + [0.0] * 2 + + [0.10] * 3 + + [0.0] * 6 + + [0.10] * 3 + + [0.10] * 5 + ) + events = self._run_iou_sequence( + ious, + dt=0.2, + confirm_seconds=0.12, + cooldown_seconds=5.0, + threshold_on=0.08, + threshold_off=0.03, + ) + self.assertEqual(len(events), 1) + + def test_real_second_contact_after_gap(self) -> None: + ious = ( + [0.0] * 2 + + [0.10] * 3 + + [0.0] * 30 + + [0.10] * 3 + ) + events = self._run_iou_sequence( + ious, + dt=0.2, + confirm_seconds=0.12, + cooldown_seconds=5.0, + threshold_on=0.08, + threshold_off=0.03, + ) + self.assertEqual(len(events), 2) + + def test_start_timestamp_is_streak_first_frame(self) -> None: + dt = 0.04 + trigger = ActionTriggerLogic( + fps=25, + confirm_seconds=0.12, + cooldown_seconds=5.0, + threshold_on=0.08, + threshold_off=0.03, + ) + events: list[float] = [] + for i in range(8): + event_t = trigger.step_iou(i * dt, 0.10) + if event_t is not None: + events.append(event_t) + self.assertEqual(len(events), 1) + self.assertAlmostEqual(events[0], 0.0, places=3) + + def test_multi_hand_any_high_iou_triggers(self) -> None: + trigger = ActionTriggerLogic( + fps=25, + confirm_seconds=0.12, + cooldown_seconds=5.0, + threshold_on=0.08, + threshold_off=0.03, + ) + low_hand = [80.0, 80.0, 90.0, 90.0] + high_hand = [0.0, 0.0, 50.0, 50.0] + dt = 0.04 + events: list[float] = [] + for i in range(5): + hands = [low_hand, high_hand] if i >= 1 else [low_hand] + event_t = trigger.process_frame(i * dt, hands, self.BASKET) + if event_t is not None: + events.append(event_t) + self.assertEqual(len(events), 1) + + def test_max_hand_basket_iou_picks_best(self) -> None: + low = [80.0, 80.0, 90.0, 90.0] + high = [0.0, 0.0, 50.0, 50.0] + iou = max_hand_basket_iou([low, high], self.BASKET) + self.assertGreater(iou, 0.08) + + def test_reset_clears_state(self) -> None: + trigger = ActionTriggerLogic( + fps=25, + confirm_seconds=0.12, + cooldown_seconds=5.0, + threshold_on=0.08, + threshold_off=0.03, + ) + for i in range(2): + trigger.step_iou(i * 0.04, 0.10) + trigger.reset() + events: list[float] = [] + for i in range(5): + event_t = trigger.step_iou(i * 0.04, 0.10) + if event_t is not None: + events.append(event_t) + self.assertEqual(len(events), 1) + self.assertAlmostEqual(events[0], 0.0, places=3) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_stream_basket.py b/tests/test_stream_basket.py new file mode 100644 index 0000000..fc037b5 --- /dev/null +++ b/tests/test_stream_basket.py @@ -0,0 +1,84 @@ +"""推流帧缓存单元测试。""" +from __future__ import annotations + +import sys +import unittest +from pathlib import Path +from unittest.mock import MagicMock + +import numpy as np + +PACK_ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(PACK_ROOT / "src")) + +from paths import ensure_code_on_path # noqa: E402 + +ensure_code_on_path(PACK_ROOT) + +from stream_frame_buffer import ( # noqa: E402 + FrameRingBuffer, + decode_cached_frame, + encode_frame_for_cache, +) +from stream_basket_session import StreamBasketSession # noqa: E402 + + +class TestFrameRingBuffer(unittest.TestCase): + def test_jpeg_roundtrip(self) -> None: + raw = np.random.randint(0, 255, (1080, 1920, 3), dtype=np.uint8) + data = encode_frame_for_cache(raw, max_width=1280) + self.assertLess(len(data), raw.nbytes // 4) + back = decode_cached_frame(data) + self.assertEqual(back.ndim, 3) + self.assertLessEqual(back.shape[1], 1280) + + def test_slice_inclusive(self) -> None: + buf = FrameRingBuffer(max_seconds=10.0, fps=10.0, cache_max_width=640) + for i in range(50): + f = np.zeros((64, 64, 3), dtype=np.uint8) + buf.append(i * 0.1, f) + part = buf.slice_decoded(2.0, 2.5) + ts = [p[0] for p in part] + self.assertTrue(all(2.0 - 1e-6 <= t <= 2.5 + 1e-6 for t in ts)) + self.assertGreaterEqual(len(ts), 5) + + +class TestStreamBasketSession(unittest.TestCase): + def test_pending_clip_ready_after_window(self) -> None: + trigger = MagicMock() + trigger.process_frame = MagicMock( + side_effect=lambda t, _h, _b: 2.0 if abs(t - 2.0) < 1e-9 else None + ) + hand_model = MagicMock() + + session = StreamBasketSession( + [0, 0, 100, 100], + hand_model, + trigger, + segment_start_offset_sec=2.0, + segment_end_offset_sec=8.0, + min_segment_sec=4.0, + ring_buffer_sec=10.0, + fps=25.0, + cache_max_width=640, + ) + + frame = np.zeros((64, 64, 3), dtype=np.uint8) + contact_t = None + for i in range(260): + t = i * 0.04 + start = session.push_frame(t, frame) + if start is not None and contact_t is None: + contact_t = start + + self.assertAlmostEqual(contact_t, 2.0, places=3) + clips = session.poll_ready_clips() + self.assertGreaterEqual(len(clips), 1) + clip = clips[0] + self.assertAlmostEqual(clip.start_sec, 4.0, places=2) + self.assertAlmostEqual(clip.end_sec, 10.0, places=2) + self.assertGreater(len(clip.frames), 0) + + +if __name__ == "__main__": + unittest.main() diff --git a/weights/goodbad_frame.pt b/weights/goodbad_frame.pt new file mode 100644 index 0000000..fa8b56b Binary files /dev/null and b/weights/goodbad_frame.pt differ diff --git a/weights/hand_detect.pt b/weights/hand_detect.pt new file mode 100644 index 0000000..082a181 Binary files /dev/null and b/weights/hand_detect.pt differ diff --git a/weights/haocai_classify.pt b/weights/haocai_classify.pt new file mode 100644 index 0000000..3671ac2 Binary files /dev/null and b/weights/haocai_classify.pt differ