commit 07816bd18a67b19f4c05b3838e2ef77c7883f640
Author: hsz <2091085305@qq.com>
Date:   Tue Jun 2 16:59:42 2026 +0800

    6.2

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..62502ef
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,38 @@
+# 推理结果（保留空目录占位）
+/output/*
+!/output/.gitkeep
+
+# 用户放入的测试视频 / 转码产物（保留商品表 Excel）
+/input/*.mp4
+/input/*.avi
+/input/*.mkv
+/input/*.mov
+/input/remuxed/
+
+# 运行期 ROI / 日志
+/output/*.json
+/output/*.txt
+/output/*.log
+
+# Python 环境与缓存
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+.venv/
+venv/
+.env
+*.egg-info/
+dist/
+build/
+
+# Jupyter / 临时文件
+.ipynb_checkpoints/
+*.swp
+*~
+
+# IDE / 系统
+.DS_Store
+.idea/
+.vscode/
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..ce8510e
--- /dev/null
+++ b/README.md
@@ -0,0 +1,113 @@
+# 手术室耗材篮子识别包（离线 + 推流）
+
+段内流程：**手检（≥2 手 union）→ 好坏帧门控 → 耗材分类**；离线另含**医生识别**。
+
+与 `configs/default_config.yaml` 当前参数一致（`imgsz_det: 1920`、`contact+1~+6` 等）。
+
+## 环境要求
+
+- Python 3.10+（推荐 conda 环境 `yolo`）
+- NVIDIA GPU + CUDA
+- `python3-tk`（每次运行首帧弹窗框选篮子 ROI）
+- `ffmpeg` / `ffprobe`（HEVC 视频建议先转 H.264，见下文）
+
+## 快速安装
+
+```bash
+cd /path/to/6.1
+bash setup.sh
+
+# 若用 conda（推荐）
+conda activate yolo
+pip install -r requirements.txt
+```
+
+## 三个入口
+
+| 脚本 | 用途 |
+|------|------|
+| `main_basket.py` | **离线**：全片篮子接触分段 → Phase2 → gap 合并 → 医生识别 |
+| `main_basket_stream.py` | **推流/本地 MP4 模拟推流**：逐帧触发 → 段内识别 → 实时写 TSV |
+| `main_segments_offline.py` | 按 TSV 时间段对离线 MP4 重跑段内识别（校验用） |
+
+## 1. 离线跑视频
+
+```bash
+conda activate yolo
+cd /path/to/6.1
+
+python main_basket.py \
+  --video /path/to/your.mp4 \
+  --excel input/视频中的商品信息表.xlsx \
+  --out output/result_offline.txt \
+  --save-basket-roi output/basket_roi.json \
+  --config configs/default_config.yaml
+```
+
+运行后**弹窗框选篮子 ROI**，然后自动全片扫描 + 段内识别。
+
+## 2. 推流（或本地 MP4 测试）
+
+```bash
+python main_basket_stream.py \
+  --rtsp /path/to/your.mp4 \
+  --excel input/视频中的商品信息表.xlsx \
+  --out output/result_stream.txt \
+  --save-basket-roi output/basket_roi_stream.json \
+  --config configs/default_config.yaml
+```
+
+- 本地 MP4：`stream.infer_source: file` → 段内**回源 4K**（与离线一致）
+- 真 RTSP：无法 seek 时回退 JPEG 缓存（`cache_max_width: 1920`）
+
+## 3. HEVC 视频
+
+4K HEVC 可能导致 OpenCV 解码不稳定，建议先转码：
+
+```bash
+bash scripts/remux_hevc.sh /path/to/source.mp4
+# 输出: input/remuxed/<name>_h264.mp4
+```
+
+## 配置说明（`configs/default_config.yaml`）
+
+| 段 | 关键参数 |
+|----|----------|
+| `phase2` | `imgsz_det: 1920`，`pad_bottom_ratio: 0.5`，`det_conf: 0.6` |
+| `classification` | 好帧 0.8，耗材 0.8，重试 0.6 / 0.5 |
+| `basket` | `iou_on: 0.03`，`confirm: 0.1`，`cooldown: 3`，窗口 contact+1~+6 |
+| `stream` | 段窗口与 basket 一致；`infer_source: file` |
+| `io` | `use_whitelist: false`（全 41 类） |
+
+## 模型文件（`weights/`）
+
+- `hand_detect.pt` — 手部检测
+- `goodbad_frame.pt` — 好坏帧门控
+- `haocai_classify.pt` — 耗材分类
+
+## 输出格式
+
+12 列 TSV + 离线末尾一行 `医生信息：...`（推流无医生行）。
+
+## 目录结构
+
+```
+6.1/
+├── main_basket.py              # 离线入口
+├── main_basket_stream.py       # 推流入口
+├── main_segments_offline.py    # TSV 段内重跑
+├── configs/default_config.yaml
+├── weights/                    # 3 个 YOLO 权重
+├── input/视频中的商品信息表.xlsx
+├── doctor_identity_package/    # 医生识别（仅离线）
+├── src/  code/                 # 编排与算法
+├── output/                     # 结果输出目录
+├── setup.sh  requirements.txt
+└── README.md
+```
+
+## 常见问题
+
+1. **CUDA OOM**：勿将 `imgsz_det` 设为 3840；当前 1920 在 8GB 显卡可用。
+2. **无 GUI**：无法弹窗框选 ROI，需另备 ROI JSON（当前入口每次仍弹窗）。
+3. **分段过多**：可调大 `basket.confirm_seconds` 或 `cooldown_seconds`。
diff --git a/code/dataset.py b/code/dataset.py
new file mode 100644
index 0000000..046f843
--- /dev/null
+++ b/code/dataset.py
@@ -0,0 +1,1421 @@
+#!/usr/bin/env python3
+"""
+从 ~/data/haocai/ 递归扫描「叶子会话目录」（含 mp4 + xlsx，且子目录中不再含 mp4），
+按 Excel 中的时间段从对应视频抽帧，输出到「输出根/images/<商品名称>/<规格>/」并生成 JSON 元数据。
+输出分辨率默认与源视频帧一致；可用 --max-width / --max-height 限制最大尺寸（仅缩小、不放大）。
+可选 --sample-every N：按全局成功保存顺序，每第 N 张在 JSON 中标记 sample=true（便于抽检）。
+可选 --limit N：最多生成 N 条（图片或片段），用于快速检查 JSON 格式；0 表示不限制。
+可选 --extract-backend：抽帧方式。默认 auto（有 ffmpeg 则用 ffmpeg）。默认精确 seek（-ss 在 -i 之后）；
+  可加 --ffmpeg-fast-seek 换快 seek（部分 HEVC/H.265 文件会得到全灰无效帧，脚本会自动改回精确 seek 重试）。
+  建议安装 ffprobe 与 ffmpeg，时长/帧率以 ffprobe 为准。
+可选 --detect-bbox：用 Grounding DINO（transformers + torch）检测人体并输出 bbox 到 JSON。
+可选 --save-vis：在输出根下单独目录（默认 vis/）生成与 images 同结构的 *_vis.jpg，框与英文类别叠加在图上。
+
+列约定（与样本数据一致）：
+- 单个 xlsx、两个视频：约 A–J，表头含「视频1」「视频2」时间段列（常见为第 9、10 列）。
+- 单个 xlsx、一个视频：约 A–I，最后一列为「视频内时间段」。
+- 两个 xlsx、两个视频：每个文件 A–I，最后一列为该视频「视频内时间段」；按文件名中的 01/02 与视频配对。
+"""
+
+from __future__ import annotations
+
+import argparse
+import hashlib
+import json
+import re
+import shutil
+import subprocess
+import sys
+import time
+from dataclasses import dataclass, asdict
+from pathlib import Path
+from typing import Any, Callable, Iterator, Optional
+
+import cv2
+import numpy as np
+import pandas as pd
+
+# 临时 / 锁文件
+_IGNORE_XLSX = re.compile(r"^~\$|^\._|^\.\~", re.I)
+
+
+def _log(msg: str) -> None:
+    """运行日志（stderr，立即刷新）。"""
+    ts = time.strftime("%H:%M:%S")
+    print(f"[{ts}] {msg}", file=sys.stderr, flush=True)
+
+
+@dataclass
+class ImageRecord:
+    name: str
+    path: str
+    label_category: str  # 商品名称
+    size: str  # 规格
+    sample: bool = False  # 每第 N 张（见 --sample-every）为 True
+    # YOLO 格式 [x_center, y_center, w, h] 归一化 0–1；未启用检测或未检出时为 None
+    bbox_xywhn: Optional[list[float]] = None
+    detection_score: Optional[float] = None
+
+
+@dataclass
+class VideoMeta:
+    """视频流元数据；优先来自 ffprobe（比 OpenCV 对 HEVC/VFR 更可靠）。"""
+
+    width: int
+    height: int
+    fps: float
+    duration_sec: float
+    frame_count: int = 0
+
+
+def _parse_fraction(s: str) -> float:
+    s = (s or "").strip()
+    if not s or s == "0/0":
+        return 0.0
+    if "/" in s:
+        a, b = s.split("/", 1)
+        try:
+            den = float(b)
+            return float(a) / den if den else 0.0
+        except ValueError:
+            return 0.0
+    try:
+        return float(s)
+    except ValueError:
+        return 0.0
+
+
+def _ffprobe_video_meta(path: Path, ffprobe_bin: str) -> Optional[VideoMeta]:
+    if not shutil.which(ffprobe_bin):
+        return None
+    cmd = [
+        ffprobe_bin,
+        "-v",
+        "error",
+        "-select_streams",
+        "v:0",
+        "-show_entries",
+        "stream=width,height,avg_frame_rate,r_frame_rate,nb_frames,duration",
+        "-show_entries",
+        "format=duration",
+        "-of",
+        "json",
+        str(path),
+    ]
+    try:
+        p = subprocess.run(
+            cmd, capture_output=True, text=True, timeout=60, check=False
+        )
+    except (subprocess.TimeoutExpired, OSError):
+        return None
+    if p.returncode != 0 or not p.stdout:
+        return None
+    try:
+        data = json.loads(p.stdout)
+    except json.JSONDecodeError:
+        return None
+    streams = data.get("streams") or []
+    if not streams:
+        return None
+    st = streams[0]
+    w = int(st.get("width") or 0)
+    h = int(st.get("height") or 0)
+    if w < 2 or h < 2:
+        return None
+    fps = _parse_fraction(str(st.get("avg_frame_rate") or ""))
+    if fps <= 0:
+        fps = _parse_fraction(str(st.get("r_frame_rate") or ""))
+    dur_s = float(st.get("duration") or 0.0)
+    fmt = data.get("format") or {}
+    if dur_s <= 0:
+        dur_s = float(fmt.get("duration") or 0.0)
+    nbf = st.get("nb_frames")
+    frame_count = 0
+    if nbf is not None and str(nbf).strip() and str(nbf).upper() != "N/A":
+        try:
+            frame_count = int(nbf)
+        except (TypeError, ValueError):
+            frame_count = 0
+    if frame_count <= 0 and dur_s > 0 and fps > 0:
+        frame_count = int(round(dur_s * fps))
+    if fps <= 0 and dur_s > 0 and frame_count > 0:
+        fps = frame_count / dur_s
+    if fps <= 0:
+        fps = 25.0
+    return VideoMeta(
+        width=w,
+        height=h,
+        fps=float(fps),
+        duration_sec=float(dur_s),
+        frame_count=frame_count,
+    )
+
+
+def _opencv_video_meta(path: Path) -> VideoMeta:
+    cap = cv2.VideoCapture(str(path), cv2.CAP_FFMPEG)
+    if not cap.isOpened():
+        return VideoMeta(0, 0, 25.0, 0.0, 0)
+    try:
+        w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) or 0)
+        h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) or 0)
+        fps = float(cap.get(cv2.CAP_PROP_FPS) or 25.0)
+        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0)
+        duration = (
+            (frame_count / fps) if fps > 0 and frame_count > 0 else 0.0
+        )
+        return VideoMeta(
+            width=w, height=h, fps=fps, duration_sec=duration, frame_count=frame_count
+        )
+    finally:
+        cap.release()
+
+
+# 同一视频在一张表里会抽多次帧；缓存 ffprobe 结果，避免每个时间点都跑一遍 ffprobe。
+_VIDEO_META_CACHE: dict[tuple[str, str], VideoMeta] = {}
+
+
+def get_video_meta(path: Path, ffprobe_bin: str = "ffprobe") -> VideoMeta:
+    key = (str(Path(path).resolve()), ffprobe_bin)
+    if key in _VIDEO_META_CACHE:
+        return _VIDEO_META_CACHE[key]
+    m = _ffprobe_video_meta(path, ffprobe_bin)
+    if m is not None:
+        _VIDEO_META_CACHE[key] = m
+        return m
+    m = _opencv_video_meta(path)
+    _VIDEO_META_CACHE[key] = m
+    return m
+
+
+def _clamp_time_sec(t_sec: float, meta: VideoMeta) -> float:
+    if meta.duration_sec > 0:
+        margin = 1.0 / max(meta.fps, 1.0)
+        return float(
+            min(max(0.0, t_sec), max(0.0, meta.duration_sec - margin))
+        )
+    return max(0.0, t_sec)
+
+
+def _time_to_frame_index(t_sec: float, meta: VideoMeta) -> int:
+    fps = meta.fps if meta.fps > 0 else 25.0
+    t = _clamp_time_sec(t_sec, meta)
+    idx = int(round(t * fps))
+    if meta.frame_count > 0:
+        idx = min(idx, meta.frame_count - 1)
+    return max(0, idx)
+
+
+def _expand_root(p: str | Path) -> Path:
+    return Path(p).expanduser().resolve()
+
+
+def _is_real_xlsx(path: Path) -> bool:
+    if path.suffix.lower() not in (".xlsx", ".xls"):
+        return False
+    name = path.name
+    if name.startswith("~$") or name.startswith(".~"):
+        return False
+    if _IGNORE_XLSX.search(name):
+        return False
+    return True
+
+
+def _is_real_mp4(path: Path) -> bool:
+    if path.suffix.lower() != ".mp4":
+        return False
+    if ".crdownload" in path.name.lower():
+        return False
+    return True
+
+
+def _dir_has_mp4_recursive(d: Path) -> bool:
+    if not d.is_dir():
+        return False
+    try:
+        for p in d.rglob("*.mp4"):
+            if _is_real_mp4(p):
+                return True
+    except OSError:
+        pass
+    return False
+
+
+def iter_leaf_session_dirs(root: Path) -> Iterator[Path]:
+    """叶子目录：直接包含至少一个有效 mp4 与 xlsx，且其子目录内不再出现 mp4。"""
+    import os
+
+    root = root.resolve()
+    if not root.is_dir():
+        return
+
+    for dirpath, dirnames, filenames in os.walk(root, topdown=True):
+        p = Path(dirpath)
+        mp4s = [p / f for f in filenames if _is_real_mp4(p / f)]
+        xlsxs = [p / f for f in filenames if _is_real_xlsx(p / f)]
+        if not mp4s or not xlsxs:
+            continue
+        sub_has_mp4 = False
+        for sub in dirnames:
+            if _dir_has_mp4_recursive(p / sub):
+                sub_has_mp4 = True
+                break
+        if sub_has_mp4:
+            continue
+        yield p
+
+
+def _video_sort_key(path: Path) -> tuple:
+    stem = path.stem
+    m = re.search(r"(\d+)", stem)
+    n = int(m.group(1)) if m else 10**9
+    return (n, stem.lower())
+
+
+def list_videos(session_dir: Path) -> list[Path]:
+    vids = [p for p in session_dir.iterdir() if p.is_file() and _is_real_mp4(p)]
+    return sorted(vids, key=_video_sort_key)
+
+
+def list_excels(session_dir: Path) -> list[Path]:
+    xs = [p for p in session_dir.iterdir() if p.is_file() and _is_real_xlsx(p)]
+    return sorted(xs, key=lambda p: p.name.lower())
+
+
+def _excel_pair_key(path: Path) -> tuple:
+    m = re.search(r"(\d+)", path.stem)
+    n = int(m.group(1)) if m else 10**9
+    return (n, path.name.lower())
+
+
+def _normalize_header(s: Any) -> str:
+    if s is None or (isinstance(s, float) and pd.isna(s)):
+        return ""
+    return str(s).strip()
+
+
+def _find_col(df: pd.DataFrame, *candidates: str) -> str | None:
+    cols = [str(c).strip() for c in df.columns]
+    for want in candidates:
+        for c in df.columns:
+            h = _normalize_header(c)
+            if h == want or want in h:
+                return c
+    return None
+
+
+def normalize_haocai_class_name(name: str) -> str:
+    """
+    与 build_haocai_dataset_hand_crops.row_product 保持一致的类名归一。
+    Excel 与训练类名在个别耗材上同物异名，此处合并为同一条目。
+    """
+    s = (name or "").strip()
+    if s == "一次性使用灭菌棉签":
+        return "一次性医用灭菌棉签"
+    if s in (
+        "一次性使用手术衣",
+        "一次性使用手术单（一次性医用垫单）",
+        "一次性医用垫单",
+    ):
+        return "一次性使用手术单"
+    return s
+
+
+def parse_time_range(text: Any) -> tuple[float, float] | None:
+    """
+    支持：
+    - 1.23-2.23 → 1 分 23 秒 到 2 分 23 秒
+    - 0.05-0.11 → 0 分 5 秒 到 0 分 11 秒（点后为两位秒）
+    - 00：10-00：16 / 00:10-00:16 → mm:ss
+    """
+    if text is None or (isinstance(text, float) and pd.isna(text)):
+        return None
+    s = str(text).strip()
+    if not s or s.lower() == "nan":
+        return None
+
+    # 全角冒号
+    s = s.replace("：", ":")
+
+    # mm:ss - mm:ss
+    m = re.match(
+        r"^\s*(\d{1,2}):(\d{2})\s*[-–—~～]\s*(\d{1,2}):(\d{2})\s*$",
+        s,
+    )
+    if m:
+        h1, m1, h2, m2 = m.groups()
+        a = int(h1) * 60 + int(m1)
+        b = int(h2) * 60 + int(m2)
+        return (float(min(a, b)), float(max(a, b)))
+
+    # M.SS - M.SS（分.秒，秒为 1～2 位时按两位秒理解）
+    m = re.match(
+        r"^\s*(\d+)\s*\.\s*(\d{1,2})\s*[-–—~～]\s*(\d+)\s*\.\s*(\d{1,2})\s*$",
+        s,
+    )
+    if m:
+        mm1, ss1, mm2, ss2 = m.groups()
+        ss1 = ss1.zfill(2)[:2]
+        ss2 = ss2.zfill(2)[:2]
+        a = int(mm1) * 60 + int(ss1)
+        b = int(mm2) * 60 + int(ss2)
+        return (float(min(a, b)), float(max(a, b)))
+
+    return None
+
+
+def _midpoint_seconds(start: float, end: float) -> float:
+    return max(0.0, (start + end) / 2.0)
+
+
+def _sample_time_in_tear_segment(
+    start: float,
+    end: float,
+    *,
+    mode: str = "tear_first_half",
+) -> float:
+    """
+    在 Excel 标注的「撕」时间段 [start, end] 内选取抽帧时刻。
+
+    - tear_first_half（默认）：落在区间**前半段**，取该半段内 3/4 分位
+      t = start + 0.375 * (end - start)，与「后半段 3/4」对称。
+    - tear_second_half：整段的后 3/4 分位 t = start + 0.75 * (end - start)。
+    - midpoint：取 (start+end)/2。
+    """
+    if end <= start:
+        return max(0.0, start)
+    span = end - start
+    if mode == "midpoint":
+        return _midpoint_seconds(start, end)
+    if mode == "tear_second_half":
+        return max(0.0, start + 0.75 * span)
+    # tear_first_half
+    return max(0.0, start + 0.375 * span)
+
+
+def resize_frame_to_max(
+    frame: Any,
+    max_width: int,
+    max_height: int,
+) -> Any:
+    """
+    将帧限制在 max_width×max_height 以内，保持宽高比。
+    max_width / max_height 为 0 表示该方向不限制；二者均为 0 则返回原帧（原始分辨率）。
+    仅缩小不放大。
+    """
+    if frame is None:
+        return None
+    if max_width <= 0 and max_height <= 0:
+        return frame
+    h, w = frame.shape[:2]
+    scales: list[float] = []
+    if max_width > 0:
+        scales.append(max_width / w)
+    if max_height > 0:
+        scales.append(max_height / h)
+    if not scales:
+        return frame
+    scale = min(scales)
+    scale = min(scale, 1.0)
+    if scale >= 1.0:
+        return frame
+    nw = max(1, int(round(w * scale)))
+    nh = max(1, int(round(h * scale)))
+    return cv2.resize(frame, (nw, nh), interpolation=cv2.INTER_AREA)
+
+
+def save_frame_jpeg(
+    frame: Any,
+    out_path: Path,
+    jpeg_quality: int = 85,
+    max_width: int = 0,
+    max_height: int = 0,
+) -> tuple[bool, Optional[np.ndarray]]:
+    """按 max_width/max_height 可选缩小后以 JPEG 写出；返回 (是否成功, 与磁盘一致的 BGR 图)。"""
+    img = resize_frame_to_max(frame, max_width, max_height)
+    if img is None:
+        return False, None
+    params = [int(cv2.IMWRITE_JPEG_QUALITY), jpeg_quality]
+    ok = bool(cv2.imwrite(str(out_path), img, params))
+    return ok, img if ok else None
+
+
+def save_bbox_vis_jpeg(
+    img_bgr: np.ndarray,
+    out_path: Path,
+    bbox_xywhn: Optional[list[float]],
+    detection_score: Optional[float],
+    jpeg_quality: int = 85,
+) -> bool:
+    """在副本上画框后保存为 JPEG。bbox_xywhn 为 YOLO 格式归一化 [cx, cy, w, h]。"""
+    vis = img_bgr.copy()
+    h, w = vis.shape[:2]
+    if bbox_xywhn and len(bbox_xywhn) == 4:
+        cx, cy, bw, bh = bbox_xywhn
+        x1 = int(round((cx - bw / 2) * w))
+        y1 = int(round((cy - bh / 2) * h))
+        x2 = int(round((cx + bw / 2) * w))
+        y2 = int(round((cy + bh / 2) * h))
+        x1 = max(0, min(x1, w - 1))
+        x2 = max(0, min(x2, w - 1))
+        y1 = max(0, min(y1, h - 1))
+        y2 = max(0, min(y2, h - 1))
+        cv2.rectangle(vis, (x1, y1), (x2, y2), (0, 220, 0), max(1, min(w, h) // 400))
+        cap = f"{detection_score:.2f}" if detection_score is not None else "det"
+        (tw, th), _ = cv2.getTextSize(cap, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
+        ty = max(y1 - 4, th + 4)
+        cv2.rectangle(vis, (x1, ty - th - 4), (x1 + tw + 4, ty + 2), (0, 220, 0), -1)
+        cv2.putText(vis, cap, (x1 + 2, ty), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
+    else:
+        cv2.putText(vis, "no detection", (8, 24), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (128, 128, 128), 2, cv2.LINE_AA)
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    params = [int(cv2.IMWRITE_JPEG_QUALITY), jpeg_quality]
+    return bool(cv2.imwrite(str(out_path), vis, params))
+
+
+def _write_vis_if_enabled(
+    vis_out_root: Optional[Path],
+    label_category: str,
+    size: str,
+    fname: str,
+    img_bgr: np.ndarray,
+    bbox_xywhn: Optional[list[float]],
+    detection_score: Optional[float],
+) -> None:
+    if vis_out_root is None:
+        return
+    vis_dir = _product_image_dir(vis_out_root, label_category, size)
+    vis_dir.mkdir(parents=True, exist_ok=True)
+    vis_path = vis_dir / f"{Path(fname).stem}_vis.jpg"
+    save_bbox_vis_jpeg(img_bgr, vis_path, bbox_xywhn, detection_score)
+
+
+def _clip_xyxy_xyxy(
+    xyxy: list[float], w: int, h: int
+) -> list[float]:
+    x1, y1, x2, y2 = xyxy
+    x1 = float(max(0, min(x1, w - 1)))
+    x2 = float(max(0, min(x2, w)))
+    y1 = float(max(0, min(y1, h - 1)))
+    y2 = float(max(0, min(y2, h)))
+    if x2 <= x1:
+        x2 = min(x1 + 1.0, float(w))
+    if y2 <= y1:
+        y2 = min(y1 + 1.0, float(h))
+    return [x1, y1, x2, y2]
+
+
+def _xyxy_to_xywhn(xyxy: list[float], w: int, h: int) -> list[float]:
+    """xyxy 像素 → YOLO [x_center, y_center, width, height] 归一化 0–1。"""
+    x1, y1, x2, y2 = xyxy
+    bw = x2 - x1
+    bh = y2 - y1
+    cx = (x1 + x2) / 2.0
+    cy = (y1 + y2) / 2.0
+    return [cx / w, cy / h, bw / w, bh / h]
+
+
+class GroundingDinoDetector:
+    """
+    使用 Grounding DINO（HuggingFace transformers）做开放词汇检测。
+    返回得分最高的一个框：YOLO 格式 [cx, cy, w, h] 归一化 + 分数。
+    """
+
+    def __init__(
+        self,
+        model_id: str = "IDEA-Research/grounding-dino-base",
+        prompt: str = "person .",
+        box_threshold: float = 0.30,
+        text_threshold: float = 0.25,
+    ) -> None:
+        import torch
+        from PIL import Image as _PILImage  # noqa: F401
+        from transformers import AutoModelForZeroShotObjectDetection, AutoProcessor
+
+        self._torch = torch
+        self._PILImage = _PILImage
+        self._device = "cuda" if torch.cuda.is_available() else "cpu"
+        self._processor = AutoProcessor.from_pretrained(model_id)
+        self._model = AutoModelForZeroShotObjectDetection.from_pretrained(model_id).to(self._device)
+        self._model.eval()
+        self.prompt = prompt
+        self.box_threshold = box_threshold
+        self.text_threshold = text_threshold
+        _log(f"GroundingDinoDetector loaded: {model_id} on {self._device}")
+
+    def detect(self, img_bgr: np.ndarray) -> tuple[
+        Optional[list[float]],
+        Optional[float],
+    ]:
+        h, w = img_bgr.shape[:2]
+        if w < 2 or h < 2:
+            return None, None
+
+        rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
+        pil = self._PILImage.fromarray(rgb)
+
+        with self._torch.no_grad():
+            inputs = self._processor(images=pil, text=self.prompt, return_tensors="pt").to(self._device)
+            outputs = self._model(**inputs)
+            target_sizes = self._torch.tensor([[h, w]], device=self._device)
+            try:
+                results = self._processor.post_process_grounded_object_detection(
+                    outputs,
+                    inputs.input_ids,
+                    threshold=self.box_threshold,
+                    text_threshold=self.text_threshold,
+                    target_sizes=target_sizes,
+                )[0]
+            except TypeError:
+                results = self._processor.post_process_grounded_object_detection(
+                    outputs,
+                    inputs.input_ids,
+                    box_threshold=self.box_threshold,
+                    text_threshold=self.text_threshold,
+                    target_sizes=target_sizes,
+                )[0]
+
+        if results is None or len(results["boxes"]) == 0:
+            return None, None
+
+        best_idx = int(results["scores"].argmax().item())
+        b = results["boxes"][best_idx].tolist()
+        score = float(results["scores"][best_idx].item())
+        xyxy = _clip_xyxy_xyxy([float(b[0]), float(b[1]), float(b[2]), float(b[3])], w, h)
+        xywhn = _xyxy_to_xywhn(xyxy, w, h)
+        return xywhn, score
+
+
+def _is_degenerate_gray_frame(img: np.ndarray) -> bool:
+    """ffmpeg 快 seek 在部分 HEVC 码流上可能输出近似中性灰、几乎无纹理的无效帧。"""
+    if img is None or img.size == 0:
+        return True
+    m = float(np.mean(img))
+    s = float(np.std(img))
+    return 118.0 <= m <= 138.0 and s < 8.0
+
+
+def extract_frame_ffmpeg(
+    video_path: Path,
+    t_sec: float,
+    *,
+    ffmpeg_bin: str = "ffmpeg",
+    ffprobe_bin: str = "ffprobe",
+    accurate_seek: bool = True,
+    timeout_sec: float = 600.0,
+) -> np.ndarray | None:
+    """
+    使用 ffmpeg 解码单帧。时间戳 clamp 优先用 ffprobe，避免 OpenCV 对 HEVC 的 fps/时长偏差。
+
+    accurate_seek=True（默认）：-ss 在 -i 之后，解码正确，长视频较慢。
+    accurate_seek=False：-ss 在 -i 之前，快，少数文件仍可能异常。
+    """
+    if not shutil.which(ffmpeg_bin):
+        return None
+    meta = get_video_meta(video_path, ffprobe_bin)
+    if meta.width < 2 or meta.height < 2:
+        return None
+    t_clamped = _clamp_time_sec(t_sec, meta)
+    w, h = meta.width, meta.height
+    expected_raw = w * h * 3
+
+    def _run_ffmpeg(cmd: list[str]) -> tuple[Optional[bytes], Optional[str]]:
+        try:
+            p = subprocess.run(
+                cmd,
+                capture_output=True,
+                timeout=timeout_sec,
+                check=False,
+            )
+        except subprocess.TimeoutExpired:
+            return None, "timeout"
+        err = (p.stderr or b"").decode("utf-8", errors="replace")[:800]
+        if p.returncode != 0:
+            return None, err or f"exit {p.returncode}"
+        if not p.stdout:
+            return None, err or "empty stdout"
+        return p.stdout, None
+
+    def _decode_png(data: bytes) -> Optional[np.ndarray]:
+        arr = np.frombuffer(data, dtype=np.uint8)
+        img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
+        return img
+
+    # 1) 精确 seek + PNG（通用）
+    if accurate_seek:
+        cmd_png = [
+            ffmpeg_bin,
+            "-hide_banner",
+            "-loglevel",
+            "error",
+            "-i",
+            str(video_path),
+            "-ss",
+            f"{t_clamped:.6f}",
+            "-frames:v",
+            "1",
+            "-an",
+            "-f",
+            "image2pipe",
+            "-vcodec",
+            "png",
+            "-",
+        ]
+    else:
+        cmd_png = [
+            ffmpeg_bin,
+            "-hide_banner",
+            "-loglevel",
+            "error",
+            "-ss",
+            f"{t_clamped:.6f}",
+            "-i",
+            str(video_path),
+            "-frames:v",
+            "1",
+            "-an",
+            "-f",
+            "image2pipe",
+            "-vcodec",
+            "png",
+            "-",
+        ]
+    out, err = _run_ffmpeg(cmd_png)
+    if out is not None:
+        img = _decode_png(out)
+        if img is not None and img.size > 0:
+            if not accurate_seek and _is_degenerate_gray_frame(img):
+                _log(
+                    f"快 seek 输出疑似灰帧，改用精确 seek: {video_path.name} t={t_clamped:.2f}s"
+                )
+                return extract_frame_ffmpeg(
+                    video_path,
+                    t_sec,
+                    ffmpeg_bin=ffmpeg_bin,
+                    ffprobe_bin=ffprobe_bin,
+                    accurate_seek=True,
+                    timeout_sec=timeout_sec,
+                )
+            return img
+        if err and err != "timeout":
+            _log(f"ffmpeg PNG 解码失败: {video_path.name}: {err[:200]}")
+
+    # 2) 精确 seek + raw BGR（避免 PNG 编解码；尺寸来自 ffprobe）
+    cmd_raw = [
+        ffmpeg_bin,
+        "-hide_banner",
+        "-loglevel",
+        "error",
+        "-i",
+        str(video_path),
+        "-ss",
+        f"{t_clamped:.6f}",
+        "-frames:v",
+        "1",
+        "-an",
+        "-f",
+        "rawvideo",
+        "-pix_fmt",
+        "bgr24",
+        "-s",
+        f"{w}x{h}",
+        "-",
+    ]
+    if not accurate_seek:
+        cmd_raw = [
+            ffmpeg_bin,
+            "-hide_banner",
+            "-loglevel",
+            "error",
+            "-ss",
+            f"{t_clamped:.6f}",
+            "-i",
+            str(video_path),
+            "-frames:v",
+            "1",
+            "-an",
+            "-f",
+            "rawvideo",
+            "-pix_fmt",
+            "bgr24",
+            "-s",
+            f"{w}x{h}",
+            "-",
+        ]
+    out2, err2 = _run_ffmpeg(cmd_raw)
+    if out2 is not None and len(out2) == expected_raw:
+        img = np.frombuffer(out2, dtype=np.uint8).reshape((h, w, 3)).copy()
+        if not accurate_seek and _is_degenerate_gray_frame(img):
+            _log(
+                f"快 seek raw 疑似灰帧，改用精确 seek: {video_path.name} t={t_clamped:.2f}s"
+            )
+            return extract_frame_ffmpeg(
+                video_path,
+                t_sec,
+                ffmpeg_bin=ffmpeg_bin,
+                ffprobe_bin=ffprobe_bin,
+                accurate_seek=True,
+                timeout_sec=timeout_sec,
+            )
+        return img
+    if err2 and err2 != "timeout":
+        _log(f"ffmpeg rawvideo 失败: {video_path.name}: {err2[:200]}")
+
+    return None
+
+
+def extract_frame_opencv_sequential(
+    video_path: Path,
+    t_sec: float,
+    ffprobe_bin: str = "ffprobe",
+) -> Any | None:
+    """
+    从第 0 帧顺序读到目标帧；帧索引由 ffprobe 元数据计算（比仅用 OpenCV fps 更稳）。
+    """
+    meta = get_video_meta(video_path, ffprobe_bin)
+    target_idx = _time_to_frame_index(t_sec, meta)
+    cap = cv2.VideoCapture(str(video_path), cv2.CAP_FFMPEG)
+    if not cap.isOpened():
+        return None
+    try:
+        frame: Any | None = None
+        for _ in range(target_idx + 1):
+            ok, frame = cap.read()
+            if not ok or frame is None:
+                return None
+        return frame
+    finally:
+        cap.release()
+
+
+def make_extract_frame_fn(
+    backend: str,
+    ffmpeg_bin: str,
+    ffprobe_bin: str,
+    accurate_seek: bool,
+) -> tuple[Callable[[Path, float], Any | None], str]:
+    """
+    返回 (抽帧函数, 实际后端说明)。
+    auto：有 ffmpeg 用 ffmpeg，否则 OpenCV 顺序解码。
+    """
+    b = backend.strip().lower()
+    if b == "auto":
+        b = "ffmpeg" if shutil.which(ffmpeg_bin) else "opencv"
+    if b == "ffmpeg" and not shutil.which(ffmpeg_bin):
+        _log(f"未找到 {ffmpeg_bin!r}，改用 OpenCV 顺序解码（较慢）")
+        b = "opencv"
+    if b == "ffmpeg":
+
+        def fn_ffmpeg(p: Path, t: float) -> Any | None:
+            img = extract_frame_ffmpeg(
+                p,
+                t,
+                ffmpeg_bin=ffmpeg_bin,
+                ffprobe_bin=ffprobe_bin,
+                accurate_seek=accurate_seek,
+            )
+            if img is None:
+                return extract_frame_opencv_sequential(p, t, ffprobe_bin)
+            return img
+
+        mode = "ffmpeg_accurate" if accurate_seek else "ffmpeg_fast"
+        return fn_ffmpeg, mode
+    def fn_cv_only(p: Path, t: float) -> Any | None:
+        return extract_frame_opencv_sequential(p, t, ffprobe_bin)
+
+    return fn_cv_only, "opencv_sequential"
+
+
+def _unique_image_name(
+    session_rel: str,
+    row_idx: int,
+    video_tag: str,
+    time_raw: str,
+    ext: str = ".jpg",
+) -> str:
+    h = hashlib.sha1(
+        f"{session_rel}|{row_idx}|{video_tag}|{time_raw}".encode("utf-8")
+    ).hexdigest()[:16]
+    safe = re.sub(r"[^\w\u4e00-\u9fff\-]+", "_", session_rel)[-80:]
+    return f"{safe}__r{row_idx}_{video_tag}_{h}{ext}"
+
+
+def _sanitize_dir_segment(text: Any, fallback: str) -> str:
+    """目录名：去掉路径非法字符，过长截断；空则用 fallback。"""
+    if text is None:
+        return fallback
+    if isinstance(text, float) and pd.isna(text):
+        return fallback
+    t = str(text).strip()
+    if not t:
+        return fallback
+    t = re.sub(r'[/\\:\0<>"|?*]+', "_", t)
+    t = t.strip(" .")
+    if not t or all(c == "." for c in t):
+        return fallback
+    max_len = 180
+    if len(t) > max_len:
+        t = t[:max_len].rstrip()
+    return t or fallback
+
+
+def _product_image_dir(
+    images_out: Path, label_category: str, size: str
+) -> Path:
+    """images/<商品名称>/<规格>/"""
+    d_name = _sanitize_dir_segment(label_category, "未命名商品")
+    d_spec = _sanitize_dir_segment(size, "未填规格")
+    return images_out / d_name / d_spec
+
+
+def _read_excel(path: Path) -> pd.DataFrame:
+    return pd.read_excel(path, header=0)
+
+
+def _limit_reached(records: list[ImageRecord], limit: int) -> bool:
+    """limit>0 且已保存条数达到上限时返回 True。"""
+    return limit > 0 and len(records) >= limit
+
+
+def _record_saved(
+    records: list[ImageRecord],
+    global_idx: list[int],
+    sample_every: int,
+    fname: str,
+    out_path: Path,
+    label_category: str,
+    size: str,
+    bbox_xywhn: Optional[list[float]] = None,
+    detection_score: Optional[float] = None,
+) -> None:
+    """global_idx[0] 为已成功保存张数；每第 sample_every 张标记 sample（N=10 → 第 10、20… 张）。"""
+    global_idx[0] += 1
+    sample = bool(
+        sample_every > 0 and global_idx[0] % sample_every == 0
+    )
+    records.append(
+        ImageRecord(
+            name=fname,
+            path=str(out_path.resolve()),
+            label_category=label_category,
+            size=size,
+            sample=sample,
+            bbox_xywhn=bbox_xywhn,
+            detection_score=detection_score,
+        )
+    )
+
+
+def _bbox_from_detector(
+    detector: Optional[GroundingDinoDetector],
+    img_bgr: Optional[np.ndarray],
+) -> tuple[Optional[list[float]], Optional[float]]:
+    if detector is None or img_bgr is None:
+        return None, None
+    return detector.detect(img_bgr)
+
+
+def process_session(
+    session_dir: Path,
+    data_root: Path,
+    images_out: Path,
+    records: list[ImageRecord],
+    global_idx: list[int],
+    sample_every: int,
+    limit: int = 0,
+    max_width: int = 0,
+    max_height: int = 0,
+    bbox_detector: Optional[GroundingDinoDetector] = None,
+    vis_out_root: Optional[Path] = None,
+    extract_frame_fn: Callable[[Path, float], Any | None] = extract_frame_opencv_sequential,
+    time_sample_mode: str = "tear_first_half",
+) -> int:
+    """处理一个叶子目录，返回成功写入的图片数量。limit>0 时最多再写入到总条数达 limit。"""
+    videos = list_videos(session_dir)
+    excels = list_excels(session_dir)
+    if not videos or not excels:
+        return 0
+
+    session_rel = str(session_dir.relative_to(data_root))
+    n_ok = 0
+
+    def row_product(row: pd.Series, df: pd.DataFrame) -> tuple[str, str]:
+        c_name = _find_col(df, "商品名称")
+        c_spec = _find_col(df, "规格")
+        name = ""
+        spec = ""
+        if c_name is not None:
+            v = row.get(c_name)
+            if v is not None and not (isinstance(v, float) and pd.isna(v)):
+                name = str(v).strip()
+        if c_spec is not None:
+            v = row.get(c_spec)
+            if v is not None and not (isinstance(v, float) and pd.isna(v)):
+                spec = str(v).strip()
+        return normalize_haocai_class_name(name), spec
+
+    # 两个 Excel + 两个视频：各读各表，按行与对应视频抽帧
+    if len(excels) >= 2 and len(videos) >= 2:
+        excel_list = sorted(excels, key=_excel_pair_key)
+        vid_list = sorted(videos, key=_video_sort_key)
+        pairs = min(len(excel_list), len(vid_list), 2)
+        for pi in range(pairs):
+            df = _read_excel(excel_list[pi])
+            vid = vid_list[pi]
+            time_col = _find_col(
+                df,
+                "视频内时间段",
+                "视频01内时间段",
+                "视频02内时间段",
+            )
+            if time_col is None:
+                # 最后一列常为时间
+                time_col = df.columns[-1]
+            for ri, (_, row) in enumerate(df.iterrows()):
+                if _limit_reached(records, limit):
+                    return n_ok
+                tr = row.get(time_col)
+                pr = parse_time_range(tr)
+                if pr is None:
+                    continue
+                t0, t1 = pr
+                label, size = row_product(row, df)
+                if not label and not size:
+                    continue
+                t_mid = _sample_time_in_tear_segment(
+                    t0, t1, mode=time_sample_mode
+                )
+                frame = extract_frame_fn(vid, t_mid)
+                if frame is None:
+                    continue
+                fname = _unique_image_name(
+                    session_rel, ri, f"v{pi + 1}", str(tr)
+                )
+                out_dir = _product_image_dir(images_out, label, size)
+                out_dir.mkdir(parents=True, exist_ok=True)
+                out_path = out_dir / fname
+                saved, img_out = save_frame_jpeg(
+                    frame,
+                    out_path,
+                    max_width=max_width,
+                    max_height=max_height,
+                )
+                if saved:
+                    bx, ds = _bbox_from_detector(bbox_detector, img_out)
+                    _record_saved(
+                        records, global_idx, sample_every,
+                        fname, out_path, label, size,
+                        bbox_xywhn=bx, detection_score=ds,
+                    )
+                    _write_vis_if_enabled(
+                        vis_out_root, label, size, fname, img_out, bx, ds,
+                    )
+                    n_ok += 1
+                    if _limit_reached(records, limit):
+                        return n_ok
+        return n_ok
+
+    # 单个 Excel
+    if len(excels) == 1:
+        df = _read_excel(excels[0])
+        c_v1 = _find_col(df, "视频1内时间段", "视频01内时间段")
+        c_v2 = _find_col(df, "视频2内时间段", "视频02内时间段")
+
+        if len(videos) >= 2 and c_v1 is not None and c_v2 is not None:
+            vid_list = sorted(videos, key=_video_sort_key)[:2]
+            for ri, (_, row) in enumerate(df.iterrows()):
+                for vi, (c_time, vid) in enumerate(
+                    zip([c_v1, c_v2], vid_list)
+                ):
+                    if _limit_reached(records, limit):
+                        return n_ok
+                    tr = row.get(c_time)
+                    pr = parse_time_range(tr)
+                    if pr is None:
+                        continue
+                    t_mid = _sample_time_in_tear_segment(
+                        *pr, mode=time_sample_mode
+                    )
+                    frame = extract_frame_fn(vid, t_mid)
+                    if frame is None:
+                        continue
+                    label, size = row_product(row, df)
+                    fname = _unique_image_name(
+                        session_rel, ri, f"v{vi + 1}", str(tr)
+                    )
+                    out_dir = _product_image_dir(images_out, label, size)
+                    out_dir.mkdir(parents=True, exist_ok=True)
+                    out_path = out_dir / fname
+                    saved, img_out = save_frame_jpeg(
+                        frame,
+                        out_path,
+                        max_width=max_width,
+                        max_height=max_height,
+                    )
+                    if saved:
+                        bx, ds = _bbox_from_detector(bbox_detector, img_out)
+                        _record_saved(
+                            records, global_idx, sample_every,
+                            fname, out_path, label, size,
+                            bbox_xywhn=bx, detection_score=ds,
+                        )
+                        _write_vis_if_enabled(
+                            vis_out_root, label, size, fname, img_out, bx, ds,
+                        )
+                        n_ok += 1
+                        if _limit_reached(records, limit):
+                            return n_ok
+            return n_ok
+
+        # 单视频：最后一列或「视频内时间段」
+        time_col = _find_col(df, "视频内时间段", "视频1内时间段")
+        if time_col is None:
+            time_col = df.columns[-1]
+        vid = vid_list[0] if (vid_list := sorted(videos, key=_video_sort_key)) else None
+        if vid is None:
+            return 0
+        for ri, (_, row) in enumerate(df.iterrows()):
+            if _limit_reached(records, limit):
+                return n_ok
+            tr = row.get(time_col)
+            pr = parse_time_range(tr)
+            if pr is None:
+                continue
+            t_mid = _sample_time_in_tear_segment(
+                *pr, mode=time_sample_mode
+            )
+            frame = extract_frame_fn(vid, t_mid)
+            if frame is None:
+                continue
+            label, size = row_product(row, df)
+            fname = _unique_image_name(session_rel, ri, "v1", str(tr))
+            out_dir = _product_image_dir(images_out, label, size)
+            out_dir.mkdir(parents=True, exist_ok=True)
+            out_path = out_dir / fname
+            saved, img_out = save_frame_jpeg(
+                frame,
+                out_path,
+                max_width=max_width,
+                max_height=max_height,
+            )
+            if saved:
+                bx, ds = _bbox_from_detector(bbox_detector, img_out)
+                _record_saved(
+                    records, global_idx, sample_every,
+                    fname, out_path, label, size,
+                    bbox_xywhn=bx, detection_score=ds,
+                )
+                _write_vis_if_enabled(
+                    vis_out_root, label, size, fname, img_out, bx, ds,
+                )
+                n_ok += 1
+                if _limit_reached(records, limit):
+                    return n_ok
+        return n_ok
+
+    # 其余情况：尝试用第一个 Excel + 第一个视频
+    if excels and videos:
+        df = _read_excel(excels[0])
+        time_col = _find_col(df, "视频内时间段") or df.columns[-1]
+        vid = sorted(videos, key=_video_sort_key)[0]
+        for ri, (_, row) in enumerate(df.iterrows()):
+            if _limit_reached(records, limit):
+                return n_ok
+            tr = row.get(time_col)
+            pr = parse_time_range(tr)
+            if pr is None:
+                continue
+            t_mid = _sample_time_in_tear_segment(
+                *pr, mode=time_sample_mode
+            )
+            frame = extract_frame_fn(vid, t_mid)
+            if frame is None:
+                continue
+            label, size = row_product(row, df)
+            fname = _unique_image_name(session_rel, ri, "v1", str(tr))
+            out_dir = _product_image_dir(images_out, label, size)
+            out_dir.mkdir(parents=True, exist_ok=True)
+            out_path = out_dir / fname
+            saved, img_out = save_frame_jpeg(
+                frame,
+                out_path,
+                max_width=max_width,
+                max_height=max_height,
+            )
+            if saved:
+                bx, ds = _bbox_from_detector(bbox_detector, img_out)
+                _record_saved(
+                    records, global_idx, sample_every,
+                    fname, out_path, label, size,
+                    bbox_xywhn=bx, detection_score=ds,
+                )
+                _write_vis_if_enabled(
+                    vis_out_root, label, size, fname, img_out, bx, ds,
+                )
+                n_ok += 1
+                if _limit_reached(records, limit):
+                    return n_ok
+    return n_ok
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="浩材视频抽帧数据集生成")
+    parser.add_argument(
+        "--data-root",
+        type=str,
+        default="~/data/haocai",
+        help="数据根目录（默认 ~/data/haocai）",
+    )
+    parser.add_argument(
+        "--output-dir",
+        type=str,
+        default="./haocai_dataset",
+        help="输出根目录（图片与 JSON 放在其下）",
+    )
+    parser.add_argument(
+        "--json-name",
+        type=str,
+        default="dataset.json",
+        help="JSON 文件名（位于 output-dir 下）",
+    )
+    parser.add_argument(
+        "--images-subdir",
+        type=str,
+        default="images",
+        help="图片子目录名（位于 output-dir 下）",
+    )
+    parser.add_argument(
+        "--sample-every",
+        type=int,
+        default=0,
+        metavar="N",
+        help="全局按保存顺序计数，每第 N 张在 JSON 中 sample=true（0 表示全部 sample=false）",
+    )
+    parser.add_argument(
+        "--limit",
+        type=int,
+        default=0,
+        metavar="N",
+        help="最多生成 N 条记录（与 JSON 条目数一致），用于试跑检查格式；0 表示不限制",
+    )
+    parser.add_argument(
+        "--max-width",
+        type=int,
+        default=0,
+        metavar="PX",
+        help="输出 JPEG 最大宽度（像素），0=不限制（默认，保持原始分辨率）",
+    )
+    parser.add_argument(
+        "--max-height",
+        type=int,
+        default=0,
+        metavar="PX",
+        help="输出 JPEG 最大高度（像素），0=不限制（默认）。与 --max-width 同时生效时缩放到可放入矩形内",
+    )
+    parser.add_argument(
+        "--detect-bbox",
+        action="store_true",
+        help="用 Grounding DINO 检测人体并写 bbox 到 JSON（需 pip install transformers torch pillow）",
+    )
+    parser.add_argument(
+        "--dino-model-id",
+        type=str,
+        default="IDEA-Research/grounding-dino-base",
+        metavar="ID",
+        help="Grounding DINO HuggingFace 模型 ID",
+    )
+    parser.add_argument(
+        "--dino-prompt",
+        type=str,
+        default="person .",
+        metavar="TEXT",
+        help="Grounding DINO 检测 prompt（默认 'person .'）",
+    )
+    parser.add_argument(
+        "--dino-box-threshold",
+        type=float,
+        default=0.30,
+        metavar="F",
+        help="Grounding DINO box 置信度阈值（默认 0.30）",
+    )
+    parser.add_argument(
+        "--dino-text-threshold",
+        type=float,
+        default=0.25,
+        metavar="F",
+        help="Grounding DINO text 置信度阈值（默认 0.25）",
+    )
+    parser.add_argument(
+        "--save-vis",
+        action="store_true",
+        help="在 output-dir 下写入可视化图（默认子目录 vis/），与 images 同目录结构，文件名为 <原名>_vis.jpg",
+    )
+    parser.add_argument(
+        "--vis-subdir",
+        type=str,
+        default="vis",
+        help="可视化 JPEG 所在子目录名（位于 output-dir 下，默认 vis）",
+    )
+    parser.add_argument(
+        "--extract-backend",
+        type=str,
+        choices=("auto", "ffmpeg", "opencv"),
+        default="auto",
+        help="抽帧：auto=有 ffmpeg 则用 ffmpeg（推荐，HEVC 不易花屏）；"
+        "ffmpeg=必须可用 ffmpeg；opencv=顺序解码，无 ffmpeg 时可用但较慢",
+    )
+    parser.add_argument(
+        "--ffmpeg-bin",
+        type=str,
+        default="ffmpeg",
+        metavar="CMD",
+        help="ffmpeg 可执行文件名或绝对路径（默认 ffmpeg）",
+    )
+    parser.add_argument(
+        "--ffprobe-bin",
+        type=str,
+        default="ffprobe",
+        metavar="CMD",
+        help="ffprobe 可执行文件名（用于时长/帧率/分辨率；默认 ffprobe）",
+    )
+    parser.add_argument(
+        "--ffmpeg-fast-seek",
+        action="store_true",
+        help="快 seek：-ss 在 -i 之前，长视频抽帧快很多；默认精确 seek 从开头解码到目标时刻，故很慢",
+    )
+    parser.add_argument(
+        "--sample-midpoint",
+        action="store_true",
+        help="时间段内抽帧取中点；默认取「撕」区间前半段（半段内 3/4 分位）",
+    )
+    parser.add_argument(
+        "--tear-second-half",
+        action="store_true",
+        help="撕时间段内用整段后半 3/4 分位（旧默认）；与默认前半段二选一",
+    )
+    args = parser.parse_args()
+
+    if args.sample_every < 0:
+        print("--sample-every 须 >= 0", file=sys.stderr)
+        return 2
+    if args.limit < 0:
+        print("--limit 须 >= 0", file=sys.stderr)
+        return 2
+    if args.max_width < 0 or args.max_height < 0:
+        print("--max-width / --max-height 须 >= 0", file=sys.stderr)
+        return 2
+    bbox_detector: Optional[GroundingDinoDetector] = None
+    if args.detect_bbox:
+        try:
+            _log("Grounding DINO bbox detection enabled")
+            _log(
+                f"model={args.dino_model_id}, prompt={args.dino_prompt!r}, "
+                f"box_threshold={args.dino_box_threshold}, "
+                f"text_threshold={args.dino_text_threshold}"
+            )
+            bbox_detector = GroundingDinoDetector(
+                model_id=args.dino_model_id,
+                prompt=args.dino_prompt,
+                box_threshold=args.dino_box_threshold,
+                text_threshold=args.dino_text_threshold,
+            )
+        except Exception as e:
+            print(
+                f"启用 --detect-bbox 失败: {type(e).__name__}: {e}\n"
+                "请确认已安装: pip install transformers torch pillow",
+                file=sys.stderr,
+            )
+            return 2
+
+    data_root = _expand_root(args.data_root)
+    out_root = _expand_root(args.output_dir)
+    images_out = out_root / args.images_subdir
+    images_out.mkdir(parents=True, exist_ok=True)
+
+    vis_out_root: Optional[Path] = None
+    if args.save_vis:
+        vis_out_root = out_root / args.vis_subdir
+        vis_out_root.mkdir(parents=True, exist_ok=True)
+
+    records: list[ImageRecord] = []
+    global_idx = [0]
+    total = 0
+    sessions = list(iter_leaf_session_dirs(data_root))
+    if not sessions:
+        print(f"未找到叶子会话目录（需同时含 mp4 与 xlsx）: {data_root}", file=sys.stderr)
+
+    if not shutil.which(args.ffprobe_bin):
+        _log(
+            f"未找到 {args.ffprobe_bin!r}，时长/帧率将仅用 OpenCV（HEVC 可能偏差）；"
+            "建议: conda install ffmpeg 或 apt install ffmpeg"
+        )
+    extract_frame_fn, extract_mode = make_extract_frame_fn(
+        args.extract_backend,
+        args.ffmpeg_bin,
+        args.ffprobe_bin,
+        accurate_seek=not args.ffmpeg_fast_seek,
+    )
+    _log(f"抽帧后端: {extract_mode}")
+    if args.sample_midpoint:
+        time_sample_mode = "midpoint"
+    elif args.tear_second_half:
+        time_sample_mode = "tear_second_half"
+    else:
+        time_sample_mode = "tear_first_half"
+    _log(
+        "时间段采样: "
+        + (
+            "中点（--sample-midpoint）"
+            if time_sample_mode == "midpoint"
+            else (
+                "撕区间后半段 3/4（--tear-second-half）"
+                if time_sample_mode == "tear_second_half"
+                else "撕区间前半段（默认，半段内 3/4 分位）"
+            )
+        )
+    )
+    if extract_mode.startswith("ffmpeg") and not args.ffmpeg_fast_seek:
+        _log(
+            "精确 seek（默认）在长视频、大时间戳时很慢：每次抽帧都会从文件开头解码到目标时刻。"
+            "若可接受略快 seek，请加 --ffmpeg-fast-seek 加速。"
+        )
+
+    for sd in sorted(sessions):
+        if _limit_reached(records, args.limit):
+            break
+        n = process_session(
+            sd,
+            data_root,
+            images_out,
+            records,
+            global_idx,
+            args.sample_every,
+            args.limit,
+            args.max_width,
+            args.max_height,
+            bbox_detector,
+            vis_out_root,
+            extract_frame_fn=extract_frame_fn,
+            time_sample_mode=time_sample_mode,
+        )
+        total += n
+        print(f"{sd.relative_to(data_root)}: {n} 张")
+
+    json_path = out_root / args.json_name
+    payload = [asdict(r) for r in records]
+    json_path.write_text(
+        json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8"
+    )
+    lim_note = f"（limit={args.limit}）" if args.limit > 0 else ""
+    vis_note = (
+        f"，可视化目录: {vis_out_root}"
+        if vis_out_root is not None
+        else ""
+    )
+    print(
+        f"共写入 {total} 张图片{lim_note}，JSON 条目 {len(records)}，元数据: {json_path}{vis_note}"
+    )
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/code/repo_root.py b/code/repo_root.py
new file mode 100644
index 0000000..baa7e09
--- /dev/null
+++ b/code/repo_root.py
@@ -0,0 +1,6 @@
+"""仓库根目录常量：本文件必须位于含 dataset.py 的 code/ 根目录。"""
+from __future__ import annotations
+
+from pathlib import Path
+
+CODE_ROOT = Path(__file__).resolve().parent
diff --git a/code/video_clip_cls/infer_single_0506/run_segments_consumable_vote.py b/code/video_clip_cls/infer_single_0506/run_segments_consumable_vote.py
new file mode 100644
index 0000000..656a5ab
--- /dev/null
+++ b/code/video_clip_cls/infer_single_0506/run_segments_consumable_vote.py
@@ -0,0 +1,564 @@
+#!/usr/bin/env python3
+"""
+仅在「时间段 txt」内跑：人手检测 → **逐帧**好/坏门控（**top1 为 good 且 top1conf>阈值**，默认阈值 0.9）
+→ 仅通过的帧跑 41 类耗材分类；（可选）仅保留 **耗材 softmax 最大值 > --haocai-min-conf** 的帧；
+对保留帧的标签序列做 **滑动窗口多数票平滑**，再 **`consumable` 取平滑后序列众数**。
+
+**avg_softmax_*** ：仅对上述「高置信耗材帧」统计；类别为 softmax 均值分布前三；置信度为三档边际 softmax 在时间上的平均。
+
+不扫全片；每段从视频中按起止时间解码。
+
+用法（建议在 yolo 环境）:
+  python code/video_clip_cls/infer_single_0506/run_segments_consumable_vote.py \\
+    --segments .../03视频_segments_mutual_exclusive_score_gt_0.1.txt \\
+    --video .../03视频.mp4 \\
+    --out .../03视频_segments_consumables.txt
+"""
+
+from __future__ import annotations
+
+import argparse
+import sys
+from collections import Counter
+from pathlib import Path
+
+import cv2
+import numpy as np
+from ultralytics import YOLO
+
+for _repo in Path(__file__).resolve().parents:
+    if (_repo / "repo_root.py").is_file() and (_repo / "dataset.py").is_file():
+        if str(_repo) not in sys.path:
+            sys.path.insert(0, str(_repo))
+        break
+else:
+    raise RuntimeError("未定位到仓库 code/ 根目录")
+
+from repo_root import CODE_ROOT  # noqa: E402
+
+
+def parse_segments_txt(path: Path) -> list[tuple[int, float, float]]:
+    rows: list[tuple[int, float, float]] = []
+    for raw in path.read_text(encoding="utf-8").splitlines():
+        if not raw.strip() or raw.lower().startswith("rank"):
+            continue
+        parts = raw.split("\t")
+        if len(parts) < 4:
+            continue
+        rank = int(parts[0].strip())
+        t0 = float(parts[1].strip())
+        t1 = float(parts[2].strip())
+        rows.append((rank, t0, t1))
+    return rows
+
+
+def collect_hand_boxes(det_model: YOLO, boxes) -> list[list[float]]:
+    names = det_model.names
+    out: list[list[float]] = []
+    for box in boxes:
+        cid = int(box.cls[0])
+        label = names.get(cid, "")
+        if label == "hand":
+            out.append(box.xyxy[0].tolist())
+    return out
+
+
+def pad_box(
+    xyxy: list[float], img_w: int, img_h: int, pad_ratio: float
+) -> tuple[int, int, int, int]:
+    """四向等比外扩（legacy；Phase2 现用 pad_box_bottom_only）。"""
+    x1, y1, x2, y2 = xyxy
+    bw, bh = x2 - x1, y2 - y1
+    px, py = bw * pad_ratio, bh * pad_ratio
+    return (
+        max(0, int(x1 - px)),
+        max(0, int(y1 - py)),
+        min(img_w, int(x2 + px)),
+        min(img_h, int(y2 + py)),
+    )
+
+
+def pad_box_bottom_only(
+    xyxy: list[float], img_w: int, img_h: int, bottom_ratio: float
+) -> tuple[int, int, int, int]:
+    """紧框 union 后仅向下延伸：y2 += 框高 * bottom_ratio；上/左/右不变。"""
+    x1, y1, x2, y2 = xyxy
+    bh = y2 - y1
+    dy = bh * float(bottom_ratio)
+    return (
+        max(0, int(x1)),
+        max(0, int(y1)),
+        min(img_w, int(x2)),
+        min(img_h, int(y2 + dy)),
+    )
+
+
+def largest_hand(hands: list[list[float]]) -> list[float]:
+    def area(b: list[float]) -> float:
+        return max(0.0, b[2] - b[0]) * max(0.0, b[3] - b[1])
+
+    return max(hands, key=area)
+
+
+def _float_top1conf(pr) -> float:
+    tc = pr.top1conf
+    if tc is None:
+        return 0.0
+    if isinstance(tc, (float, int, np.floating)):
+        return float(tc)
+    return float(tc.detach().float().cpu().item())
+
+
+def passes_good_gate_top1_conf(
+    gb_model: YOLO,
+    crop: np.ndarray,
+    gb_names: dict,
+    imgsz: int,
+    top1_conf_must_exceed: float,
+) -> bool:
+    """好/坏分类：predicted top1 为 good，且 top1conf 严格大于给定阈值。"""
+    if crop.size == 0:
+        return False
+    r = gb_model.predict(crop, imgsz=imgsz, verbose=False)[0]
+    pr = r.probs
+    if pr is None:
+        return False
+    tid = int(pr.top1)
+    label = str(gb_names.get(tid, "")).strip().lower()
+    conf = _float_top1conf(pr)
+    return label == "good" and conf > top1_conf_must_exceed
+
+
+def haocai_softmax_probs(
+    cls_model: YOLO, crop: np.ndarray, imgsz: int, n_cls: int
+) -> np.ndarray | None:
+    """耗材分类：返回长度 n_cls 的 softmax 概率向量（与模型 top1 一致）。"""
+    if crop.size == 0:
+        return None
+    r = cls_model.predict(crop, imgsz=imgsz, verbose=False)[0]
+    pr = r.probs
+    if pr is None or pr.data is None:
+        return None
+    v = pr.data.detach().float().cpu().numpy().astype(np.float64).ravel()
+    if v.size < n_cls:
+        v = np.resize(v, n_cls)
+    v = v[:n_cls].copy()
+    s = float(np.sum(v))
+    if s <= 1e-12:
+        return None
+    # 若未归一化则 softmax
+    if abs(s - 1.0) > 0.08:
+        v = v - float(np.max(v))
+        e = np.exp(np.clip(v, -40.0, 40.0))
+        out = e / float(np.sum(e))
+        return out
+    return v / s
+
+
+def _cls_name(names: dict, idx: int) -> str:
+    return str(names.get(int(idx), str(idx)))
+
+
+def mean_softmax_top3(
+    probs_list: list[np.ndarray], cls_names: dict
+) -> tuple[list[str], list[float]]:
+    """
+    类名：多帧 softmax 按类逐维算术平均，在平均向量上取概率最大的前三类。
+
+    置信度（与类名解耦）：逐帧对 softmax 从高到低排序，取第 1/2/3 大的概率，
+    再在各帧上对这三档分别做算术平均（「帧内边际 topk」的时间平均）。
+    返回三个槽位（不足则用空字符串与 0.0 补齐）。
+    """
+    names_out: list[str] = []
+    probs_out: list[float] = []
+    if not probs_list:
+        for _ in range(3):
+            names_out.append("")
+            probs_out.append(0.0)
+        return names_out, probs_out
+    stacked = np.stack(probs_list, axis=0)
+    p = np.mean(stacked, axis=0, dtype=np.float64)
+    order = np.argsort(-p)
+    for k in range(3):
+        if k < order.size:
+            j = int(order[k])
+            names_out.append(_cls_name(cls_names, j))
+        else:
+            names_out.append("")
+    # 逐帧降序 softmax，对第 1/2/3 档做时间平均
+    row_sorted = np.sort(stacked, axis=1)[:, ::-1]
+    n_cls = row_sorted.shape[1]
+    for k in range(3):
+        if k < n_cls:
+            probs_out.append(float(np.mean(row_sorted[:, k], dtype=np.float64)))
+        else:
+            probs_out.append(0.0)
+    return names_out, probs_out
+
+
+def smooth_labels_majority(labels: list[str], window: int) -> list[str]:
+    """
+    对时间有序的类别名做平滑：对每个位置取以该位置为中心、长度为奇数 window 的邻域，
+    用邻域内众数替换（打破平局时用最邻域计数最高者）。
+    window<=1 时原样返回。
+    """
+    if window <= 1 or not labels:
+        return list(labels)
+    w = window if window % 2 == 1 else window + 1
+    half = w // 2
+    n = len(labels)
+    out: list[str] = []
+    for i in range(n):
+        lo = max(0, i - half)
+        hi = min(n, i + half + 1)
+        chunk = labels[lo:hi]
+        top, _c = Counter(chunk).most_common(1)[0]
+        out.append(top)
+    return out
+
+
+def process_segment(
+    cap: cv2.VideoCapture,
+    det: YOLO,
+    gb: YOLO,
+    cls_m: YOLO,
+    *,
+    start_sec: float,
+    end_sec: float,
+    seek_margin_sec: float,
+    det_conf: float,
+    pad_ratio: float,
+    imgsz_det: int,
+    imgsz_cls: int,
+    frame_stride: int,
+    good_top1_conf_threshold: float,
+    haocai_min_conf: float,
+    smooth_label_window: int,
+    gb_names: dict,
+    cls_names: dict,
+) -> dict:
+    # HEVC/部分 mp4：直接 seek 到 start 易产生坏参考帧；先往回跳再顺序解码丢到起点。
+    probe_from = float(max(0.0, start_sec - seek_margin_sec))
+    cap.set(cv2.CAP_PROP_POS_MSEC, probe_from * 1000.0)
+    synced_frame: np.ndarray | None = None
+    synced_t: float | None = None
+    tol = 0.04
+    while True:
+        ok0, grab = cap.read()
+        if not ok0 or grab is None:
+            synced_frame, synced_t = None, None
+            break
+        t0 = float(cap.get(cv2.CAP_PROP_POS_MSEC)) / 1000.0
+        if t0 + tol >= start_sec:
+            synced_frame, synced_t = grab, t0
+            break
+
+    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+
+    n_cls_key_max = max(int(k) for k in cls_names.keys())
+    n_cls = n_cls_key_max + 1
+
+    n_hand_frames = 0
+    # top1==good 且 top1conf>阈值的帧数（门控通过即计数，与是否成功得到 softmax 无关）
+    n_gate_pass = 0
+    cls_labels: list[str] = []
+    cls_prob_rows: list[np.ndarray] = []
+    frames_read_in_segment = 0
+
+    def one_frame(fr: np.ndarray, _t_abs: float) -> None:
+        nonlocal frames_read_in_segment, n_hand_frames, n_gate_pass, cls_labels, cls_prob_rows
+        frames_read_in_segment += 1
+        if frame_stride > 1 and (frames_read_in_segment - 1) % frame_stride != 0:
+            return
+
+        r0 = det.predict(
+            fr,
+            conf=det_conf,
+            imgsz=imgsz_det,
+            verbose=False,
+        )[0]
+        hands = collect_hand_boxes(det, r0.boxes) if r0.boxes else []
+        if not hands:
+            return
+
+        n_hand_frames += 1
+        xyxy = largest_hand(hands)
+        x1, y1, x2, y2 = pad_box(xyxy, w, h, pad_ratio)
+        crop = fr[y1:y2, x1:x2]
+        ok_gate = passes_good_gate_top1_conf(
+            gb, crop, gb_names, imgsz_cls, good_top1_conf_threshold
+        )
+        if ok_gate:
+            n_gate_pass += 1
+            vec = haocai_softmax_probs(cls_m, crop, imgsz_cls, n_cls)
+            if vec is not None:
+                top_prob = float(np.max(vec))
+                if top_prob <= haocai_min_conf:
+                    return
+                cls_prob_rows.append(vec)
+                cls_labels.append(_cls_name(cls_names, int(np.argmax(vec))))
+
+    if synced_frame is not None and synced_t is not None:
+        if synced_t <= end_sec + 0.08:
+            one_frame(synced_frame, synced_t)
+
+    while True:
+        ok, frame = cap.read()
+        if not ok or frame is None:
+            break
+        t = float(cap.get(cv2.CAP_PROP_POS_MSEC)) / 1000.0
+        if t > end_sec + 0.08:
+            break
+        if t + 1e-6 < start_sec:
+            continue
+        one_frame(frame, t)
+
+    if n_hand_frames == 0:
+        return {
+            "consumable": "（段内未检测到手部）",
+            "n_hand_frames": 0,
+            "n_gate_pass": 0,
+            "n_predictions": 0,
+            "top_vote_count": 0,
+            "avg_top1_cls": "",
+            "avg_top1_prob": "",
+            "avg_top2_cls": "",
+            "avg_top2_prob": "",
+            "avg_top3_cls": "",
+            "avg_top3_prob": "",
+        }
+
+    if not cls_labels:
+        return {
+            "consumable": (
+                "（无满足条件的耗材帧：好帧置信度或未过门控"
+                + (
+                    "" if haocai_min_conf <= 0.0
+                    else "，或耗材 top1 softmax 不大于阈值"
+                )
+                + "）"
+            ),
+            "n_hand_frames": n_hand_frames,
+            "n_gate_pass": n_gate_pass,
+            "n_predictions": 0,
+            "top_vote_count": 0,
+            "avg_top1_cls": "",
+            "avg_top1_prob": "",
+            "avg_top2_cls": "",
+            "avg_top2_prob": "",
+            "avg_top3_cls": "",
+            "avg_top3_prob": "",
+        }
+
+    smoothed = smooth_labels_majority(cls_labels, smooth_label_window)
+    top_name, vote_n = Counter(smoothed).most_common(1)[0]
+    a1, ap1 = mean_softmax_top3(cls_prob_rows, cls_names)
+    return {
+        "consumable": top_name,
+        "n_hand_frames": n_hand_frames,
+        "n_gate_pass": n_gate_pass,
+        "n_predictions": len(cls_labels),
+        "top_vote_count": int(vote_n),
+        "avg_top1_cls": a1[0],
+        "avg_top1_prob": f"{ap1[0]:.6f}",
+        "avg_top2_cls": a1[1],
+        "avg_top2_prob": f"{ap1[1]:.6f}",
+        "avg_top3_cls": a1[2],
+        "avg_top3_prob": f"{ap1[2]:.6f}",
+    }
+
+
+def main() -> int:
+    ap = argparse.ArgumentParser(
+        description="手检 + 逐帧 top1=good 且 top1conf>阈值门控 + 耗材分类；段内众数"
+    )
+    ap.add_argument(
+        "--segments",
+        type=Path,
+        default=Path(__file__).resolve().parent
+        / "results"
+        / "03视频_segments_mutual_exclusive_score_gt_0.1.txt",
+    )
+    ap.add_argument(
+        "--video",
+        type=Path,
+        default=CODE_ROOT.parent
+        / "data/haocai/5月6号视频/5月6日第二次视频/03视频.mp4",
+    )
+    ap.add_argument(
+        "--hand-model",
+        type=Path,
+        default=CODE_ROOT
+        / "hand_detection/runs/hand_det_y11s_multiframe-better/weights/best.pt",
+    )
+    ap.add_argument(
+        "--goodbad-model",
+        type=Path,
+        default=CODE_ROOT
+        / "goodORbad_frame/runs/goodbad_frame_y11m_e50/weights/best.pt",
+    )
+    ap.add_argument(
+        "--haocai-model",
+        type=Path,
+        default=CODE_ROOT
+        / "haocai_classify/runs/haocai_cls_41cls_goodframe_lastest-0.95"
+        / "weights/best.pt",
+    )
+    ap.add_argument(
+        "--out",
+        type=Path,
+        default=Path(__file__).resolve().parent
+        / "results"
+        / "03视频_segments_consumables.txt",
+    )
+    ap.add_argument(
+        "--good-top1-conf-threshold",
+        type=float,
+        default=0.90,
+        dest="good_top1_conf_threshold",
+        help="逐帧：仅当 top1 为 good 且 top1conf **严格大于**该值时才跑耗材分类（默认对应 top1conf>0.9）",
+    )
+    ap.add_argument(
+        "--haocai-min-conf",
+        type=float,
+        default=0.0,
+        metavar="P",
+        help="耗材：仅 softmax 最大值 **严格大于** P 的帧计入标签与 softmax 统计（0 表示不按耗材置信度筛）",
+    )
+    ap.add_argument(
+        "--smooth-label-window",
+        type=int,
+        default=1,
+        metavar="W",
+        help="耗材标签平滑：长度为 W 的奇数滑动窗口内多数票（W≤1 不平滑）；众数取平滑后的序列",
+    )
+    ap.add_argument("--det-conf", type=float, default=0.5)
+    ap.add_argument("--pad-ratio", type=float, default=0.30)
+    ap.add_argument("--imgsz-det", type=int, default=640)
+    ap.add_argument("--imgsz-cls", type=int, default=224)
+    ap.add_argument(
+        "--frame-stride",
+        type=int,
+        default=1,
+        help=">1 时代码逐帧解码但每 N 帧推理一次（省算力，结论可能略粗糙）",
+    )
+    ap.add_argument(
+        "--seek-margin-sec",
+        type=float,
+        default=3.0,
+        help="HEVC 等非关键帧 seek 时往回多跳若干秒再解码到段起点，减轻花屏",
+    )
+    args = ap.parse_args()
+
+    seg_path = args.segments.resolve()
+    vid_path = args.video.resolve()
+    if not seg_path.is_file():
+        print("找不到时间段文件:", seg_path, file=sys.stderr)
+        return 1
+    if not vid_path.is_file():
+        print("找不到视频:", vid_path, file=sys.stderr)
+        return 1
+    for pt, lab in (
+        (args.hand_model, "hand"),
+        (args.goodbad_model, "good/bad"),
+        (args.haocai_model, "haocai cls"),
+    ):
+        if not Path(pt).is_file():
+            print(f"缺少{lab} 权重:", pt, file=sys.stderr)
+            return 1
+
+    segments = parse_segments_txt(seg_path)
+    if not segments:
+        print("时间段为空:", seg_path, file=sys.stderr)
+        return 1
+
+    print("加载模型…", flush=True)
+    det = YOLO(str(args.hand_model))
+    gb = YOLO(str(args.goodbad_model))
+    cls_m = YOLO(str(args.haocai_model))
+    gb_names = gb.names
+    cls_names = cls_m.names
+
+    cap = cv2.VideoCapture(str(vid_path))
+    if not cap.isOpened():
+        print("无法打开视频:", vid_path, file=sys.stderr)
+        return 1
+
+    sep = "\t"
+    out_lines = [
+        sep.join([
+            "rank",
+            "start_sec",
+            "end_sec",
+            "consumable",
+            "n_hand_frames",
+            "n_frames_top1_good_conf_gt_thresh",
+            "n_consumable_predictions",
+            "top_label_vote_count",
+            "avg_softmax_top1_cls",
+            "avg_softmax_top1_prob",
+            "avg_softmax_top2_cls",
+            "avg_softmax_top2_prob",
+            "avg_softmax_top3_cls",
+            "avg_softmax_top3_prob",
+        ])
+    ]
+
+    try:
+        for rank, t0, t1 in segments:
+            print(f"段落 rank={rank} [{t0:.3f},{t1:.3f}]s …", flush=True)
+            info = process_segment(
+                cap,
+                det,
+                gb,
+                cls_m,
+                start_sec=t0,
+                end_sec=t1,
+                seek_margin_sec=args.seek_margin_sec,
+                det_conf=args.det_conf,
+                pad_ratio=args.pad_ratio,
+                imgsz_det=args.imgsz_det,
+                imgsz_cls=args.imgsz_cls,
+                frame_stride=max(1, args.frame_stride),
+                good_top1_conf_threshold=args.good_top1_conf_threshold,
+                haocai_min_conf=args.haocai_min_conf,
+                smooth_label_window=max(1, args.smooth_label_window),
+                gb_names=gb_names,
+                cls_names=cls_names,
+            )
+            row = sep.join([
+                str(rank),
+                f"{t0:.6f}",
+                f"{t1:.6f}",
+                str(info["consumable"]),
+                str(info["n_hand_frames"]),
+                str(info["n_gate_pass"]),
+                str(info["n_predictions"]),
+                str(info["top_vote_count"]),
+                info["avg_top1_cls"],
+                info["avg_top1_prob"],
+                info["avg_top2_cls"],
+                info["avg_top2_prob"],
+                info["avg_top3_cls"],
+                info["avg_top3_prob"],
+            ])
+            out_lines.append(row)
+            print(
+                f"  -> {info['consumable']} "
+                f"(votes {info['top_vote_count']}/{info['n_predictions']}, "
+                f"goodgate {info['n_gate_pass']}/{info['n_hand_frames']} hand frames)",
+                flush=True,
+            )
+    finally:
+        cap.release()
+
+    out_path = args.out.resolve()
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    out_path.write_text("\n".join(out_lines) + "\n", encoding="utf-8")
+    print("已写出:", out_path, flush=True)
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/code/video_clip_cls/scripts/pipeline/__init__.py b/code/video_clip_cls/scripts/pipeline/__init__.py
new file mode 100644
index 0000000..709eed1
--- /dev/null
+++ b/code/video_clip_cls/scripts/pipeline/__init__.py
@@ -0,0 +1 @@
+"""可复用的多模型段内处理子模块。"""
diff --git a/code/video_clip_cls/scripts/pipeline/gap_adjacent_merge.py b/code/video_clip_cls/scripts/pipeline/gap_adjacent_merge.py
new file mode 100644
index 0000000..a4fe618
--- /dev/null
+++ b/code/video_clip_cls/scripts/pipeline/gap_adjacent_merge.py
@@ -0,0 +1,132 @@
+"""相邻成功段 gap 小于阈值时合并，pairs_h 拼接后 aggregate_top3_votes。"""
+from __future__ import annotations
+
+from dataclasses import replace
+from typing import Callable
+
+from run_haocai_actionformer_consumables_e2e import aggregate_top3_votes
+
+from .tear_gate_merge import E2eRow
+
+_GAP_EPS = 1e-9
+
+
+def span_key(t0: float, t1: float) -> tuple[float, float]:
+    return (round(float(t0), 6), round(float(t1), 6))
+
+
+def group_rows_by_gap(
+    rows: list[E2eRow],
+    max_gap_sec: float = 2.0,
+) -> list[list[E2eRow]]:
+    """左→右贪心分组；失败行单独成组且不跨组合并。"""
+    groups: list[list[E2eRow]] = []
+    i = 0
+    while i < len(rows):
+        row = rows[i]
+        if not row.is_success():
+            groups.append([row])
+            i += 1
+            continue
+        grp = [row]
+        j = i + 1
+        while j < len(rows):
+            nxt = rows[j]
+            if not nxt.is_success():
+                break
+            gap = float(nxt.start_sec) - float(grp[-1].end_sec)
+            if gap < float(max_gap_sec) - _GAP_EPS:
+                grp.append(nxt)
+                j += 1
+            else:
+                break
+        groups.append(grp)
+        i = j
+    return groups
+
+
+def e2e_row_from_pairs(
+    start_sec: float,
+    end_sec: float,
+    pairs: list[tuple[str, float]],
+    product_map: dict[str, str],
+    *,
+    rank: int = 0,
+) -> E2eRow:
+    names, confs = aggregate_top3_votes(pairs)
+    n1, n2, n3 = (names + ["", "", ""])[:3]
+    c1, c2, c3 = (confs + [0.0, 0.0, 0.0])[:3]
+    id1 = product_map.get(n1, "") if n1 else ""
+    id2 = product_map.get(n2, "") if n2 else ""
+    id3 = product_map.get(n3, "") if n3 else ""
+
+    def _cf(nm: str, c: float) -> str:
+        return f"{c:.6f}" if nm else ""
+
+    return E2eRow(
+        rank=rank,
+        start_sec=float(start_sec),
+        end_sec=float(end_sec),
+        id1=id1,
+        n1=n1,
+        c1=_cf(n1, c1),
+        id2=id2,
+        n2=n2,
+        c2=_cf(n2, c2),
+        id3=id3,
+        n3=n3,
+        c3=_cf(n3, c3),
+    )
+
+
+def merge_all_by_gap(
+    rows: list[E2eRow],
+    span_to_pairs: dict[tuple[float, float], list[tuple[str, float]]],
+    product_map: dict[str, str],
+    *,
+    max_gap_sec: float = 2.0,
+    log_fn: Callable[[str], None] | None = None,
+) -> list[E2eRow]:
+    """按 gap 分组合并；组内拼接 pairs_h 后重新 aggregate top3。"""
+    merged: list[E2eRow] = []
+    for grp in group_rows_by_gap(rows, max_gap_sec):
+        if len(grp) == 1:
+            merged.append(grp[0])
+            continue
+
+        all_pairs: list[tuple[str, float]] = []
+        pair_counts: list[int] = []
+        missing = False
+        for r in grp:
+            sk = span_key(r.start_sec, r.end_sec)
+            pairs = span_to_pairs.get(sk)
+            if pairs is None:
+                missing = True
+                break
+            pair_counts.append(len(pairs))
+            all_pairs.extend(pairs)
+
+        if missing or not all_pairs:
+            if log_fn and missing:
+                ranks = ",".join(str(r.rank) for r in grp)
+                log_fn(f"[gap_merge] 跳过合并 rank={ranks}（缺少 pairs_h 缓存）")
+            merged.extend(grp)
+            continue
+
+        out_row = e2e_row_from_pairs(
+            grp[0].start_sec,
+            grp[-1].end_sec,
+            all_pairs,
+            product_map,
+        )
+        if log_fn:
+            cnt_str = "+".join(str(n) for n in pair_counts)
+            ranks = "~".join(str(r.rank) for r in grp)
+            log_fn(
+                f"[gap_merge] 合并 rank={ranks} "
+                f"[{out_row.start_sec:.3f},{out_row.end_sec:.3f}] "
+                f"pairs 帧数 {cnt_str}={len(all_pairs)}"
+            )
+        merged.append(out_row)
+
+    return [replace(r, rank=i) for i, r in enumerate(merged, start=1)]
diff --git a/code/video_clip_cls/scripts/pipeline/hand_roi_merge.py b/code/video_clip_cls/scripts/pipeline/hand_roi_merge.py
new file mode 100644
index 0000000..f264130
--- /dev/null
+++ b/code/video_clip_cls/scripts/pipeline/hand_roi_merge.py
@@ -0,0 +1,131 @@
+"""
+双手检测框分组：检测到至少两只手时合并为单个 ROI；不足两只手则跳过该帧。
+
+坐标系：全部在原图像素空间（与 Ultralytics xyxy 一致）。
+内存：仅产出 numpy 切片的 .copy() 小图，避免长时间引用整帧。
+"""
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+import numpy as np
+
+
+@dataclass
+class HandMergeConfig:
+    """两手是否合并为单个外接 ROI 的判定（OR 关系，满足任一即合并）。"""
+
+    # IoU 严格大于该值则合并；默认 0 表示只要有交叠（IoU>0）即合并
+    merge_iou_gt: float = 0.0
+    # 两框中心欧氏距离（像素）不超过该值则合并；None 表示不启用该项
+    merge_center_dist_max_px: float | None = None
+    # 中心距不超过 frame_diag * 该比例则合并；None 表示不启用（对角线 sqrt(W^2+H^2)）
+    merge_center_dist_max_frac_diag: float | None = None
+
+
+def bbox_area_xyxy(b: list[float]) -> float:
+    x1, y1, x2, y2 = b
+    return max(0.0, x2 - x1) * max(0.0, y2 - y1)
+
+
+def bbox_iou_xyxy(a: list[float], b: list[float]) -> float:
+    """轴对齐框 IoU。"""
+    ax1, ay1, ax2, ay2 = a
+    bx1, by1, bx2, by2 = b
+    ix1 = max(ax1, bx1)
+    iy1 = max(ay1, by1)
+    ix2 = min(ax2, bx2)
+    iy2 = min(ay2, by2)
+    iw = max(0.0, ix2 - ix1)
+    ih = max(0.0, iy2 - iy1)
+    inter = iw * ih
+    if inter <= 0:
+        return 0.0
+    ua = bbox_area_xyxy(a) + bbox_area_xyxy(b) - inter
+    if ua <= 1e-12:
+        return 0.0
+    return inter / ua
+
+
+def bbox_center(xyxy: list[float]) -> tuple[float, float]:
+    x1, y1, x2, y2 = xyxy
+    return (0.5 * (x1 + x2), 0.5 * (y1 + y2))
+
+
+def bbox_center_distance(a: list[float], b: list[float]) -> float:
+    cx1, cy1 = bbox_center(a)
+    cx2, cy2 = bbox_center(b)
+    dx = cx1 - cx2
+    dy = cy1 - cy2
+    return float((dx * dx + dy * dy) ** 0.5)
+
+
+def union_xyxy(a: list[float], b: list[float]) -> list[float]:
+    """两框轴对齐最小外接矩形（仍在原图坐标）。"""
+    ax1, ay1, ax2, ay2 = a
+    bx1, by1, bx2, by2 = b
+    return [
+        min(ax1, bx1),
+        min(ay1, by1),
+        max(ax2, bx2),
+        max(ay2, by2),
+    ]
+
+
+def two_largest_hands(hands: list[list[float]]) -> tuple[list[float], list[float]]:
+    """按面积取最大的两只手（ hands 已非空且至少 2 个）。"""
+    sorted_h = sorted(hands, key=bbox_area_xyxy, reverse=True)
+    return sorted_h[0], sorted_h[1]
+
+
+def hands_should_merge(
+    h1: list[float],
+    h2: list[float],
+    cfg: HandMergeConfig,
+    frame_diag: float,
+) -> bool:
+    iou = bbox_iou_xyxy(h1, h2)
+    if iou > cfg.merge_iou_gt + 1e-12:
+        return True
+    d = bbox_center_distance(h1, h2)
+    if cfg.merge_center_dist_max_px is not None and d <= cfg.merge_center_dist_max_px + 1e-12:
+        return True
+    if (
+        cfg.merge_center_dist_max_frac_diag is not None
+        and d <= cfg.merge_center_dist_max_frac_diag * frame_diag + 1e-12
+    ):
+        return True
+    return False
+
+
+class HandRoiGrouper:
+    """根据配置把手框列表转为 1~2 张 ROI（带 padding 的裁剪图）。"""
+
+    def __init__(
+        self,
+        merge_cfg: HandMergeConfig,
+        pad_box_fn,
+        pad_ratio: float,
+    ) -> None:
+        self.merge_cfg = merge_cfg
+        self.pad_box_fn = pad_box_fn
+        self.pad_ratio = pad_ratio
+
+    def frame_to_rois(
+        self,
+        frame: np.ndarray,
+        hands: list[list[float]],
+    ) -> list[np.ndarray]:
+        """
+        从整帧与手框列表得到本帧用于分类的小图列表。
+        至少两只手：取面积最大的两只，合并外接框后 1 张；否则返回空（跳过该帧）。
+        """
+        h, w = frame.shape[:2]
+        if len(hands) < 2:
+            return []
+
+        h1, h2 = two_largest_hands(hands)
+        uni = union_xyxy(h1, h2)
+        x1, y1, x2, y2 = self.pad_box_fn(uni, w, h, self.pad_ratio)
+        crop = np.ascontiguousarray(frame[y1:y2, x1:x2].copy())
+        return [crop]
diff --git a/code/video_clip_cls/scripts/pipeline/segment_processor.py b/code/video_clip_cls/scripts/pipeline/segment_processor.py
new file mode 100644
index 0000000..3e3b7da
--- /dev/null
+++ b/code/video_clip_cls/scripts/pipeline/segment_processor.py
@@ -0,0 +1,916 @@
+"""
+单段时间范围内的流式解码：多手部 ROI → 好帧门控 → 耗材 + 撕膜分类，汇总投票样本。
+
+不将整段视频载入内存；每帧处理后可 del 大图与 ROI（由调用方循环内负责）。
+"""
+from __future__ import annotations
+
+import gc
+import sys
+from collections import Counter
+from pathlib import Path
+from typing import Any, Callable
+
+import cv2
+import numpy as np
+
+for _repo in Path(__file__).resolve().parents:
+    if (_repo / "repo_root.py").is_file() and (_repo / "dataset.py").is_file():
+        CODE_ROOT = _repo
+        if str(_repo) not in sys.path:
+            sys.path.insert(0, str(_repo))
+        break
+else:
+    raise RuntimeError("未定位到仓库 code/ 根目录")
+
+_SCRIPTS = CODE_ROOT / "video_clip_cls" / "scripts"
+if str(_SCRIPTS) not in sys.path:
+    sys.path.insert(0, str(_SCRIPTS))
+
+_INF = CODE_ROOT / "video_clip_cls" / "infer_single_0506"
+if str(_INF) not in sys.path:
+    sys.path.insert(0, str(_INF))
+
+import run_segments_consumable_vote as _rsv  # noqa: E402
+from run_haocai_actionformer_consumables_e2e import (  # noqa: E402
+    aggregate_top3_votes,
+    mask_probs_whitelist,
+)
+from ultralytics import YOLO  # noqa: E402
+
+from pipeline.hand_roi_merge import HandRoiGrouper, two_largest_hands, union_xyxy  # noqa: E402
+
+# 与 run_haocai_actionformer_consumables_e2e 段内失败 return 文案一致，供 Phase2 重试判断
+REASON_NO_VALID_HAOCAI_FRAMES = "（无有效耗材帧：好帧/白名单/耗材置信度未全部满足）"
+# 推流 / TSV 离线（无好坏帧门控）
+REASON_NO_VALID_HAOCAI_FRAMES_STREAM = "（无有效耗材帧：白名单/耗材置信度未满足）"
+
+collect_hand_boxes = _rsv.collect_hand_boxes
+pad_box = _rsv.pad_box_bottom_only
+_cls_name = _rsv._cls_name
+
+
+def _float_top1conf(pr: Any) -> float:
+    tc = pr.top1conf
+    if tc is None:
+        return 0.0
+    if isinstance(tc, (float, int, np.floating)):
+        return float(tc)
+    return float(tc.detach().float().cpu().item())
+
+
+def passes_good_gate_top1_conf_kw(
+    gb_model: YOLO,
+    crop: np.ndarray,
+    gb_names: dict,
+    imgsz: int,
+    top1_conf_must_exceed: float,
+    predict_kw: dict[str, Any],
+) -> bool:
+    """与 run_segments_consumable_vote 一致，但向 predict 透传 half/device。"""
+    if crop.size == 0:
+        return False
+    r = gb_model.predict(crop, imgsz=imgsz, verbose=False, **predict_kw)[0]
+    pr = r.probs
+    if pr is None:
+        return False
+    tid = int(pr.top1)
+    label = str(gb_names.get(tid, "")).strip().lower()
+    conf = _float_top1conf(pr)
+    return label == "good" and conf > top1_conf_must_exceed
+
+
+def aggregate_top2_votes(
+    pairs: list[tuple[str, float]],
+) -> tuple[list[str], list[float]]:
+    """与 aggregate_top3 相同思想，取前二类及次数归一化置信度。"""
+    empty = (["", ""], [0.0, 0.0])
+    if not pairs:
+        return empty
+    cnt = Counter(p[0] for p in pairs)
+    ranked = sorted(cnt.items(), key=lambda x: (-x[1], x[0]))
+    top = ranked[:2]
+    if not top:
+        return empty
+    total = float(sum(c for _, c in top))
+    if total <= 0:
+        return empty
+    out_names: list[str] = ["", ""]
+    out_conf: list[float] = [0.0, 0.0]
+    for i, (nm, c) in enumerate(top):
+        out_names[i] = nm
+        out_conf[i] = float(c) / total
+    return out_names, out_conf
+
+
+def _clip_xyxy(box: np.ndarray, img_w: int, img_h: int) -> np.ndarray:
+    """
+    将 xyxy 框裁剪到图像边界，并保证 x2>x1, y2>y1。
+    """
+    x1, y1, x2, y2 = [float(v) for v in box]
+    x1 = max(0.0, min(x1, img_w - 1.0))
+    y1 = max(0.0, min(y1, img_h - 1.0))
+    x2 = max(0.0, min(x2, img_w - 1.0))
+    y2 = max(0.0, min(y2, img_h - 1.0))
+    if x2 <= x1:
+        x2 = min(img_w - 1.0, x1 + 1.0)
+    if y2 <= y1:
+        y2 = min(img_h - 1.0, y1 + 1.0)
+    return np.array([x1, y1, x2, y2], dtype=np.float32)
+
+
+def _fuse_hands_to_one_box(hands: list[list[float]], img_w: int, img_h: int) -> np.ndarray | None:
+    """
+    多手框融合为一个大框（x1,y1,x2,y2），用于段内时序平滑与短时补帧。
+    """
+    if not hands:
+        return None
+    arr = np.asarray(hands, dtype=np.float32)
+    if arr.ndim != 2 or arr.shape[1] < 4:
+        return None
+    x1 = float(np.min(arr[:, 0]))
+    y1 = float(np.min(arr[:, 1]))
+    x2 = float(np.max(arr[:, 2]))
+    y2 = float(np.max(arr[:, 3]))
+    fused = np.array([x1, y1, x2, y2], dtype=np.float32)
+    return _clip_xyxy(fused, img_w, img_h)
+
+
+def _crop_two_hands_union(
+    fr: np.ndarray,
+    hands: list[list[float]],
+    pad_ratio: float,
+) -> np.ndarray | None:
+    """至少两只手时取最大两只 union 并 pad；否则 None（跳过该帧）。"""
+    if len(hands) < 2:
+        return None
+    img_h, img_w = fr.shape[:2]
+    h1, h2 = two_largest_hands(hands)
+    uni = union_xyxy(h1, h2)
+    x1, y1, x2, y2 = pad_box(uni, img_w, img_h, pad_ratio)
+    return fr[y1:y2, x1:x2]
+
+
+class FineGrainedClassifier:
+    """好坏帧 / 耗材 / 撕膜：薄封装 Ultralytics cls.predict，便于统一 half/device。"""
+
+    def __init__(
+        self,
+        gb: YOLO,
+        cls_m: YOLO,
+        tear_m: YOLO,
+        *,
+        gb_names: dict,
+        cls_names: dict,
+        tear_names: dict,
+        imgsz_cls: int,
+        predict_kw: dict[str, Any],
+    ) -> None:
+        self.gb = gb
+        self.cls_m = cls_m
+        self.tear_m = tear_m
+        self.gb_names = gb_names
+        self.cls_names = cls_names
+        self.tear_names = tear_names
+        self.imgsz_cls = imgsz_cls
+        self.predict_kw = predict_kw
+
+    def passes_good(
+        self,
+        crop: np.ndarray,
+        good_top1_conf_threshold: float,
+    ) -> bool:
+        return passes_good_gate_top1_conf_kw(
+            self.gb,
+            crop,
+            self.gb_names,
+            self.imgsz_cls,
+            good_top1_conf_threshold,
+            self.predict_kw,
+        )
+
+    def haocai_label_top_prob(
+        self,
+        crop: np.ndarray,
+        n_cls: int,
+        allowed_class_idx: frozenset[int] | None,
+        haocai_min_conf: float,
+    ) -> tuple[str, float] | None:
+        if crop.size == 0:
+            return None
+        r = self.cls_m.predict(crop, imgsz=self.imgsz_cls, verbose=False, **self.predict_kw)[0]
+        pr = r.probs
+        if pr is None or pr.data is None:
+            return None
+        v = pr.data.detach().float().cpu().numpy().astype(np.float64).ravel()
+        if v.size < n_cls:
+            v = np.resize(v, n_cls)
+        v = v[:n_cls].copy()
+        s = float(np.sum(v))
+        if s <= 1e-12:
+            return None
+        if abs(s - 1.0) > 0.08:
+            v = v - float(np.max(v))
+            e = np.exp(np.clip(v, -40.0, 40.0))
+            vec_raw = e / float(np.sum(e))
+        else:
+            vec_raw = v / s
+        if allowed_class_idx is not None:
+            vec = mask_probs_whitelist(vec_raw, allowed_class_idx, n_cls)
+        else:
+            vec = vec_raw
+        if vec is None:
+            return None
+        top_prob = float(np.max(vec))
+        if top_prob <= haocai_min_conf:
+            return None
+        label = int(np.argmax(vec))
+        return _cls_name(self.cls_names, label), top_prob
+
+    def tear_label_top_conf(self, crop: np.ndarray) -> tuple[str, float] | None:
+        if crop.size == 0:
+            return None
+        r = self.tear_m.predict(crop, imgsz=self.imgsz_cls, verbose=False, **self.predict_kw)[0]
+        pr = r.probs
+        if pr is None:
+            return None
+        tid = int(pr.top1)
+        conf = _float_top1conf(pr)
+        return str(self.tear_names.get(tid, str(tid))).strip(), conf
+
+
+def _maybe_cuda_empty_cache(every: int, frame_idx: int) -> None:
+    if every <= 0:
+        return
+    if frame_idx % every != 0:
+        return
+    gc.collect()
+    try:
+        import torch
+
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+    except ImportError:
+        pass
+
+
+def process_segment_multi_hand_tear(
+    cap: cv2.VideoCapture,
+    det: YOLO,
+    fg: FineGrainedClassifier,
+    grouper: HandRoiGrouper,
+    *,
+    start_sec: float,
+    end_sec: float,
+    seek_margin_sec: float,
+    det_conf: float,
+    imgsz_det: int,
+    frame_stride: int,
+    good_top1_conf_threshold: float,
+    haocai_min_conf: float,
+    cls_names: dict,
+    allowed_class_idx: frozenset[int] | None,
+    tracking_alpha: float = 0.6,
+    tracking_max_lost_frames: int = 0,
+    empty_cache_every: int = 0,
+) -> dict[str, Any]:
+    """
+    与 process_segment_e2e 相同 seek 策略；每帧最多两 ROI，逐 ROI做好帧+耗材+撕膜门控。
+    """
+    probe_from = float(max(0.0, start_sec - seek_margin_sec))
+    cap.set(cv2.CAP_PROP_POS_MSEC, probe_from * 1000.0)
+    synced_frame: np.ndarray | None = None
+    synced_t: float | None = None
+    tol = 0.04
+    while True:
+        ok0, grab = cap.read()
+        if not ok0 or grab is None:
+            synced_frame, synced_t = None, None
+            break
+        t0 = float(cap.get(cv2.CAP_PROP_POS_MSEC)) / 1000.0
+        if t0 + tol >= start_sec:
+            synced_frame, synced_t = grab, t0
+            break
+
+    n_cls_key_max = max(int(k) for k in cls_names.keys())
+    n_cls = n_cls_key_max + 1
+
+    n_hand_frames = 0
+    n_gate_pass = 0
+    # pairs_h 存放段内耗材候选 (类名, 置信度)，后续会做“按置信度加权”的段内投票聚合。
+    # 仅记录通过门控的样本；失败分支仍按是否为空来判定，不改变既有逻辑。
+    pairs_h: list[tuple[str, float]] = []
+    pairs_t: list[tuple[str, float]] = []
+    frames_read_in_segment = 0
+
+    def one_frame(fr: np.ndarray) -> None:
+        nonlocal frames_read_in_segment, n_hand_frames, n_gate_pass, pairs_h, pairs_t
+        frames_read_in_segment += 1
+        idx_local = frames_read_in_segment
+        _maybe_cuda_empty_cache(empty_cache_every, idx_local)
+
+        if frame_stride > 1 and (frames_read_in_segment - 1) % frame_stride != 0:
+            return
+
+        r0 = det.predict(fr, conf=det_conf, imgsz=imgsz_det, verbose=False, **fg.predict_kw)[0]
+        hands = collect_hand_boxes(det, r0.boxes) if r0.boxes else []
+        if len(hands) < 2:
+            return
+
+        n_hand_frames += 1
+        rois = grouper.frame_to_rois(fr, hands)
+        if not rois:
+            return
+        for crop in rois:
+            if not fg.passes_good(crop, good_top1_conf_threshold):
+                del crop
+                continue
+            n_gate_pass += 1
+            hc = fg.haocai_label_top_prob(
+                crop, n_cls, allowed_class_idx, haocai_min_conf
+            )
+            tr = fg.tear_label_top_conf(crop)
+            del crop
+            if hc is not None:
+                pairs_h.append(hc)
+            if tr is not None:
+                pairs_t.append(tr)
+
+    if synced_frame is not None and synced_t is not None and synced_t <= end_sec + 0.08:
+        one_frame(synced_frame)
+        del synced_frame
+        synced_frame = None
+
+    while True:
+        ok, frame = cap.read()
+        if not ok or frame is None:
+            break
+        t = float(cap.get(cv2.CAP_PROP_POS_MSEC)) / 1000.0
+        if t > end_sec + 0.08:
+            del frame
+            break
+        if t + 1e-6 < start_sec:
+            del frame
+            continue
+        one_frame(frame)
+        del frame
+
+    gc.collect()
+    if empty_cache_every > 0:
+        try:
+            import torch
+
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+        except ImportError:
+            pass
+
+    if n_hand_frames == 0:
+        return {"ok": False, "reason": "（段内未检测到手部）", "pairs_h": [], "pairs_t": [], "n_gate_pass": 0}
+    if not pairs_h:
+        return {
+            "ok": False,
+            "reason": REASON_NO_VALID_HAOCAI_FRAMES,
+            "pairs_h": [],
+            "pairs_t": pairs_t,
+            "n_hand_frames": n_hand_frames,
+            "n_gate_pass": n_gate_pass,
+        }
+
+    n1, c1 = aggregate_top3_votes(pairs_h)
+    t1, t2 = aggregate_top2_votes(pairs_t)
+    return {
+        "ok": True,
+        "top_names": n1,
+        "top_confs": c1,
+        "tear_top_names": t1,
+        "tear_top_confs": t2,
+        "pairs_h": pairs_h,
+        "pairs_t": pairs_t,
+        "n_hand_frames": n_hand_frames,
+        "n_gate_pass": n_gate_pass,
+        "n_valid_haocai": len(pairs_h),
+    }
+
+
+def process_segment_multi_hand_tear_with_gate_retries(
+    cap: cv2.VideoCapture,
+    det: YOLO,
+    fg: FineGrainedClassifier,
+    grouper: HandRoiGrouper,
+    *,
+    start_sec: float,
+    end_sec: float,
+    seek_margin_sec: float,
+    det_conf: float,
+    imgsz_det: int,
+    frame_stride: int,
+    good_top1_conf_threshold: float,
+    good_top1_retry_threshold: float,
+    haocai_min_conf: float,
+    haocai_min_conf_retry: float | None,
+    cls_names: dict,
+    allowed_class_idx: frozenset[int] | None,
+    empty_cache_every: int = 0,
+    log_fn: Callable[[str], None] | None = None,
+    log_prefix: str | None = None,
+    tracking_alpha: float = 0.6,
+    tracking_max_lost_frames: int = 0,
+) -> dict[str, Any]:
+    """
+    先跑段内推理；若仍为「无有效耗材帧」则：
+    1) 可放宽好帧 top1 阈值（good_top1_retry_threshold）再试；
+    2) 再放宽耗材置信阈值（haocai_min_conf_retry）再试。
+    log_fn / log_prefix：重试时各打一行（如 log_prefix='段落 rank=3: '）。
+    """
+
+    def run(good_thr: float, haocai_thr: float) -> dict[str, Any]:
+        return process_segment_multi_hand_tear(
+            cap,
+            det,
+            fg,
+            grouper,
+            start_sec=start_sec,
+            end_sec=end_sec,
+            seek_margin_sec=seek_margin_sec,
+            det_conf=det_conf,
+            imgsz_det=imgsz_det,
+            frame_stride=frame_stride,
+            tracking_alpha=tracking_alpha,
+            tracking_max_lost_frames=tracking_max_lost_frames,
+            good_top1_conf_threshold=good_thr,
+            haocai_min_conf=haocai_thr,
+            cls_names=cls_names,
+            allowed_class_idx=allowed_class_idx,
+            empty_cache_every=empty_cache_every,
+        )
+
+    good_thr = float(good_top1_conf_threshold)
+    haocai_thr = float(haocai_min_conf)
+    info = run(good_thr, haocai_thr)
+
+    rgb = float(good_top1_retry_threshold)
+    if (
+        not info.get("ok")
+        and str(info.get("reason", "")) == REASON_NO_VALID_HAOCAI_FRAMES
+        and rgb > 0
+        and rgb < good_thr - 1e-12
+    ):
+        if log_fn and log_prefix:
+            log_fn(
+                f"{log_prefix}以 good_top1_conf_threshold={rgb} 重试本段（无有效耗材帧）…"
+            )
+        good_thr = rgb
+        info = run(good_thr, haocai_thr)
+
+    if (
+        haocai_min_conf_retry is not None
+        and haocai_min_conf_retry > 1e-12
+        and haocai_min_conf_retry < haocai_thr - 1e-12
+    ):
+        if (
+            not info.get("ok")
+            and str(info.get("reason", "")) == REASON_NO_VALID_HAOCAI_FRAMES
+        ):
+            h2 = float(haocai_min_conf_retry)
+            if log_fn and log_prefix:
+                log_fn(
+                    f"{log_prefix}以 haocai_min_conf={h2} 重试本段（无有效耗材帧）…"
+                )
+            info = run(good_thr, h2)
+
+    return info
+
+
+class HaocaiOnlyClassifier:
+    """耗材分类（推流/TSV 离线）；可选好坏帧门控，无撕膜。"""
+
+    def __init__(
+        self,
+        cls_m: YOLO,
+        *,
+        cls_names: dict,
+        imgsz_cls: int,
+        predict_kw: dict[str, Any],
+        gb: YOLO | None = None,
+        gb_names: dict | None = None,
+    ) -> None:
+        self.cls_m = cls_m
+        self.cls_names = cls_names
+        self.imgsz_cls = imgsz_cls
+        self.predict_kw = predict_kw
+        self.gb = gb
+        self.gb_names = gb_names or {}
+
+    @property
+    def use_good_gate(self) -> bool:
+        return self.gb is not None
+
+    def passes_good(self, crop: np.ndarray, good_top1_conf_threshold: float) -> bool:
+        if self.gb is None:
+            return True
+        return passes_good_gate_top1_conf_kw(
+            self.gb,
+            crop,
+            self.gb_names,
+            self.imgsz_cls,
+            good_top1_conf_threshold,
+            self.predict_kw,
+        )
+
+    def haocai_label_top_prob(
+        self,
+        crop: np.ndarray,
+        n_cls: int,
+        allowed_class_idx: frozenset[int] | None,
+        haocai_min_conf: float,
+    ) -> tuple[str, float] | None:
+        if crop.size == 0:
+            return None
+        r = self.cls_m.predict(crop, imgsz=self.imgsz_cls, verbose=False, **self.predict_kw)[0]
+        pr = r.probs
+        if pr is None or pr.data is None:
+            return None
+        v = pr.data.detach().float().cpu().numpy().astype(np.float64).ravel()
+        if v.size < n_cls:
+            v = np.resize(v, n_cls)
+        v = v[:n_cls].copy()
+        s = float(np.sum(v))
+        if s <= 1e-12:
+            return None
+        if abs(s - 1.0) > 0.08:
+            v = v - float(np.max(v))
+            e = np.exp(np.clip(v, -40.0, 40.0))
+            vec_raw = e / float(np.sum(e))
+        else:
+            vec_raw = v / s
+        if allowed_class_idx is not None:
+            vec = mask_probs_whitelist(vec_raw, allowed_class_idx, n_cls)
+        else:
+            vec = vec_raw
+        if vec is None:
+            return None
+        top_prob = float(np.max(vec))
+        if top_prob <= haocai_min_conf:
+            return None
+        label = int(np.argmax(vec))
+        return _cls_name(self.cls_names, label), top_prob
+
+
+def _haocai_fail_reason(hc: HaocaiOnlyClassifier) -> str:
+    if hc.use_good_gate:
+        return REASON_NO_VALID_HAOCAI_FRAMES
+    return REASON_NO_VALID_HAOCAI_FRAMES_STREAM
+
+
+def process_segment_haocai_from_frames(
+    frames: list[tuple[float, np.ndarray]],
+    det: YOLO,
+    hc: HaocaiOnlyClassifier,
+    *,
+    start_sec: float,
+    end_sec: float,
+    det_conf: float,
+    pad_ratio: float,
+    imgsz_det: int,
+    frame_stride: int,
+    haocai_min_conf: float,
+    good_top1_conf_threshold: float = 0.9,
+    cls_names: dict,
+    allowed_class_idx: frozenset[int] | None,
+    predict_kw: dict[str, Any] | None = None,
+) -> dict[str, Any]:
+    """
+    对内存中的帧列表做耗材识别（手 → 可选好帧 → haocai），不含撕膜。
+    frames: [(t_sec, bgr), ...] 已按时间过滤到 [start_sec, end_sec]。
+    """
+    if not frames:
+        return {"ok": False, "reason": "（段内无帧）", "pairs": [], "n_gate_pass": 0}
+
+    pred_kw = dict(predict_kw or {})
+    n_cls_key_max = max(int(k) for k in cls_names.keys())
+    n_cls = n_cls_key_max + 1
+
+    n_hand_frames = 0
+    n_gate_pass = 0
+    pairs: list[tuple[str, float]] = []
+    frames_in_segment = 0
+
+    def one_frame(fr: np.ndarray) -> None:
+        nonlocal frames_in_segment, n_hand_frames, n_gate_pass, pairs
+        frames_in_segment += 1
+        if frame_stride > 1 and (frames_in_segment - 1) % frame_stride != 0:
+            return
+
+        r0 = det.predict(fr, conf=det_conf, imgsz=imgsz_det, verbose=False, **pred_kw)[0]
+        hands = collect_hand_boxes(det, r0.boxes) if r0.boxes else []
+        crop = _crop_two_hands_union(fr, hands, pad_ratio)
+        if crop is None:
+            return
+
+        n_hand_frames += 1
+        if hc.use_good_gate and not hc.passes_good(crop, good_top1_conf_threshold):
+            del crop
+            return
+        n_gate_pass += 1
+        label_prob = hc.haocai_label_top_prob(
+            crop, n_cls, allowed_class_idx, haocai_min_conf
+        )
+        del crop
+        if label_prob is not None:
+            pairs.append(label_prob)
+
+    lo = float(start_sec)
+    hi = float(end_sec)
+    for t, fr in frames:
+        if t + 1e-6 < lo:
+            continue
+        if t > hi + 0.08:
+            break
+        one_frame(fr)
+
+    if n_hand_frames == 0:
+        return {"ok": False, "reason": "（段内未检测到手部）", "pairs": [], "n_gate_pass": 0}
+    if not pairs:
+        return {
+            "ok": False,
+            "reason": _haocai_fail_reason(hc),
+            "pairs": [],
+            "n_hand_frames": n_hand_frames,
+            "n_gate_pass": n_gate_pass,
+        }
+
+    n1, c1 = aggregate_top3_votes(pairs)
+    return {
+        "ok": True,
+        "top_names": n1,
+        "top_confs": c1,
+        "pairs": pairs,
+        "n_hand_frames": n_hand_frames,
+        "n_gate_pass": n_gate_pass,
+        "n_valid_haocai": len(pairs),
+    }
+
+
+def process_segment_haocai_from_cap(
+    cap: cv2.VideoCapture,
+    det: YOLO,
+    hc: HaocaiOnlyClassifier,
+    *,
+    start_sec: float,
+    end_sec: float,
+    seek_margin_sec: float,
+    det_conf: float,
+    pad_ratio: float,
+    imgsz_det: int,
+    frame_stride: int,
+    haocai_min_conf: float,
+    good_top1_conf_threshold: float = 0.9,
+    cls_names: dict,
+    allowed_class_idx: frozenset[int] | None,
+    predict_kw: dict[str, Any] | None = None,
+) -> dict[str, Any]:
+    """从视频逐帧解码做耗材识别（手 → 可选好帧 → haocai），不含撕膜。"""
+    probe_from = float(max(0.0, start_sec - seek_margin_sec))
+    cap.set(cv2.CAP_PROP_POS_MSEC, probe_from * 1000.0)
+    synced_frame: np.ndarray | None = None
+    synced_t: float | None = None
+    tol = 0.04
+    while True:
+        ok0, grab = cap.read()
+        if not ok0 or grab is None:
+            synced_frame, synced_t = None, None
+            break
+        t0 = float(cap.get(cv2.CAP_PROP_POS_MSEC)) / 1000.0
+        if t0 + tol >= start_sec:
+            synced_frame, synced_t = grab, t0
+            break
+
+    pred_kw = dict(predict_kw or {})
+    n_cls_key_max = max(int(k) for k in cls_names.keys())
+    n_cls = n_cls_key_max + 1
+
+    n_hand_frames = 0
+    n_gate_pass = 0
+    pairs: list[tuple[str, float]] = []
+    frames_in_segment = 0
+
+    def one_frame(fr: np.ndarray) -> None:
+        nonlocal frames_in_segment, n_hand_frames, n_gate_pass, pairs
+        frames_in_segment += 1
+        if frame_stride > 1 and (frames_in_segment - 1) % frame_stride != 0:
+            return
+
+        img_h, img_w = fr.shape[:2]
+        r0 = det.predict(fr, conf=det_conf, imgsz=imgsz_det, verbose=False, **pred_kw)[0]
+        hands = collect_hand_boxes(det, r0.boxes) if r0.boxes else []
+        crop = _crop_two_hands_union(fr, hands, pad_ratio)
+        if crop is None:
+            return
+
+        n_hand_frames += 1
+        if hc.use_good_gate and not hc.passes_good(crop, good_top1_conf_threshold):
+            del crop
+            return
+        n_gate_pass += 1
+        label_prob = hc.haocai_label_top_prob(
+            crop, n_cls, allowed_class_idx, haocai_min_conf
+        )
+        del crop
+        if label_prob is not None:
+            pairs.append(label_prob)
+
+    lo = float(start_sec)
+    hi = float(end_sec)
+
+    if synced_frame is not None and synced_t is not None and synced_t <= hi + 0.08:
+        if synced_t + 1e-6 >= lo:
+            one_frame(synced_frame)
+
+    while True:
+        ok, fr = cap.read()
+        if not ok or fr is None:
+            break
+        t = float(cap.get(cv2.CAP_PROP_POS_MSEC)) / 1000.0
+        if t > hi + 0.08:
+            break
+        if t + 1e-6 < lo:
+            continue
+        one_frame(fr)
+
+    if n_hand_frames == 0:
+        return {"ok": False, "reason": "（段内未检测到手部）", "pairs": [], "n_gate_pass": 0}
+    if not pairs:
+        return {
+            "ok": False,
+            "reason": _haocai_fail_reason(hc),
+            "pairs": [],
+            "n_hand_frames": n_hand_frames,
+            "n_gate_pass": n_gate_pass,
+        }
+
+    n1, c1 = aggregate_top3_votes(pairs)
+    return {
+        "ok": True,
+        "top_names": n1,
+        "top_confs": c1,
+        "pairs": pairs,
+        "n_hand_frames": n_hand_frames,
+        "n_gate_pass": n_gate_pass,
+        "n_valid_haocai": len(pairs),
+    }
+
+
+def _apply_haocai_gate_retries(
+    run: Callable[[float, float], dict[str, Any]],
+    *,
+    hc: HaocaiOnlyClassifier,
+    good_top1_conf_threshold: float,
+    good_top1_retry_threshold: float,
+    haocai_min_conf: float,
+    haocai_min_conf_retry: float | None,
+    log_fn: Callable[[str], None] | None = None,
+    log_prefix: str | None = None,
+) -> dict[str, Any]:
+    fail_reason = _haocai_fail_reason(hc)
+    good_thr = float(good_top1_conf_threshold)
+    haocai_thr = float(haocai_min_conf)
+    info = run(good_thr, haocai_thr)
+
+    if hc.use_good_gate:
+        rgb = float(good_top1_retry_threshold)
+        if (
+            not info.get("ok")
+            and str(info.get("reason", "")) == fail_reason
+            and rgb > 0
+            and rgb < good_thr - 1e-12
+        ):
+            if log_fn and log_prefix:
+                log_fn(
+                    f"{log_prefix}以 good_top1_conf_threshold={rgb} 重试本段（无有效耗材帧）…"
+                )
+            good_thr = rgb
+            info = run(good_thr, haocai_thr)
+
+    if (
+        haocai_min_conf_retry is not None
+        and haocai_min_conf_retry > 1e-12
+        and haocai_min_conf_retry < haocai_thr - 1e-12
+    ):
+        if not info.get("ok") and str(info.get("reason", "")) == fail_reason:
+            h2 = float(haocai_min_conf_retry)
+            if log_fn and log_prefix:
+                log_fn(
+                    f"{log_prefix}以 haocai_min_conf={h2} 重试本段（无有效耗材帧）…"
+                )
+            info = run(good_thr, h2)
+
+    return info
+
+
+def process_segment_haocai_from_frames_with_gate_retries(
+    frames: list[tuple[float, np.ndarray]],
+    det: YOLO,
+    hc: HaocaiOnlyClassifier,
+    *,
+    start_sec: float,
+    end_sec: float,
+    det_conf: float,
+    pad_ratio: float,
+    imgsz_det: int,
+    frame_stride: int,
+    haocai_min_conf: float,
+    haocai_min_conf_retry: float | None,
+    good_top1_conf_threshold: float = 0.9,
+    good_top1_retry_threshold: float = 0.5,
+    cls_names: dict,
+    allowed_class_idx: frozenset[int] | None,
+    predict_kw: dict[str, Any] | None = None,
+    log_fn: Callable[[str], None] | None = None,
+    log_prefix: str | None = None,
+) -> dict[str, Any]:
+    """推流帧列表：好帧门控 + 耗材阈值；失败时先放宽好帧再放宽耗材。"""
+
+    def run(good_thr: float, haocai_thr: float) -> dict[str, Any]:
+        return process_segment_haocai_from_frames(
+            frames,
+            det,
+            hc,
+            start_sec=start_sec,
+            end_sec=end_sec,
+            det_conf=det_conf,
+            pad_ratio=pad_ratio,
+            imgsz_det=imgsz_det,
+            frame_stride=frame_stride,
+            haocai_min_conf=haocai_thr,
+            good_top1_conf_threshold=good_thr,
+            cls_names=cls_names,
+            allowed_class_idx=allowed_class_idx,
+            predict_kw=predict_kw,
+        )
+
+    return _apply_haocai_gate_retries(
+        run,
+        hc=hc,
+        good_top1_conf_threshold=good_top1_conf_threshold,
+        good_top1_retry_threshold=good_top1_retry_threshold,
+        haocai_min_conf=haocai_min_conf,
+        haocai_min_conf_retry=haocai_min_conf_retry,
+        log_fn=log_fn,
+        log_prefix=log_prefix,
+    )
+
+
+def process_segment_haocai_from_cap_with_gate_retries(
+    cap: cv2.VideoCapture,
+    det: YOLO,
+    hc: HaocaiOnlyClassifier,
+    *,
+    start_sec: float,
+    end_sec: float,
+    seek_margin_sec: float,
+    det_conf: float,
+    pad_ratio: float,
+    imgsz_det: int,
+    frame_stride: int,
+    haocai_min_conf: float,
+    haocai_min_conf_retry: float | None,
+    good_top1_conf_threshold: float = 0.9,
+    good_top1_retry_threshold: float = 0.5,
+    cls_names: dict,
+    allowed_class_idx: frozenset[int] | None,
+    predict_kw: dict[str, Any] | None = None,
+    log_fn: Callable[[str], None] | None = None,
+    log_prefix: str | None = None,
+) -> dict[str, Any]:
+    """离线视频逐帧解码：手 → 可选好帧 → haocai，含门控重试。"""
+
+    def run(good_thr: float, haocai_thr: float) -> dict[str, Any]:
+        return process_segment_haocai_from_cap(
+            cap,
+            det,
+            hc,
+            start_sec=start_sec,
+            end_sec=end_sec,
+            seek_margin_sec=seek_margin_sec,
+            det_conf=det_conf,
+            pad_ratio=pad_ratio,
+            imgsz_det=imgsz_det,
+            frame_stride=frame_stride,
+            haocai_min_conf=haocai_thr,
+            good_top1_conf_threshold=good_thr,
+            cls_names=cls_names,
+            allowed_class_idx=allowed_class_idx,
+            predict_kw=predict_kw,
+        )
+
+    return _apply_haocai_gate_retries(
+        run,
+        hc=hc,
+        good_top1_conf_threshold=good_top1_conf_threshold,
+        good_top1_retry_threshold=good_top1_retry_threshold,
+        haocai_min_conf=haocai_min_conf,
+        haocai_min_conf_retry=haocai_min_conf_retry,
+        log_fn=log_fn,
+        log_prefix=log_prefix,
+    )
diff --git a/code/video_clip_cls/scripts/pipeline/tear_gate_merge.py b/code/video_clip_cls/scripts/pipeline/tear_gate_merge.py
new file mode 100644
index 0000000..ab47b5c
--- /dev/null
+++ b/code/video_clip_cls/scripts/pipeline/tear_gate_merge.py
@@ -0,0 +1,350 @@
+"""
+相邻成功行若 top1 相同：在下一段开头 head_sec 内统计「撕膜」高置信帧数；
+>= tear_min_frames 视为两次耗材（不合并），否则合并为一段。
+
+main_pipeline 内：默认在门控窗口内 **手检 → 双手 ROI（与 Phase2 相同合并策略）→ 撕膜分类**；
+若未传入 det/grouper 则退化为 **整帧** 撕膜（与旧 pack merge 脚本一致）。
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, replace
+from typing import Any
+
+import cv2
+from ultralytics import YOLO
+
+from .hand_roi_merge import HandRoiGrouper
+
+try:
+    from run_segments_consumable_vote import collect_hand_boxes
+except ImportError:  # 脚本独立运行时无 path
+    collect_hand_boxes = None  # type: ignore[misc, assignment]
+
+
+@dataclass
+class E2eRow:
+    rank: int
+    start_sec: float
+    end_sec: float
+    id1: str
+    n1: str
+    c1: str
+    id2: str
+    n2: str
+    c2: str
+    id3: str
+    n3: str
+    c3: str
+
+    def is_success(self) -> bool:
+        if not self.n1.strip():
+            return False
+        try:
+            float(self.c1.strip())
+            return True
+        except ValueError:
+            return False
+
+    def to_line12(self, rank: int) -> str:
+        r = replace(self, rank=rank)
+        return "\t".join(
+            [
+                str(r.rank),
+                f"{r.start_sec:.6f}",
+                f"{r.end_sec:.6f}",
+                r.id1,
+                r.n1,
+                r.c1,
+                r.id2,
+                r.n2,
+                r.c2,
+                r.id3,
+                r.n3,
+                r.c3,
+            ]
+        )
+
+
+def parse_e2e_rows_from_body_lines(lines: list[str]) -> list[E2eRow]:
+    rows: list[E2eRow] = []
+    for i, line in enumerate(lines, start=2):
+        if not line.strip():
+            continue
+        parts_line = line.split("\t")
+        while len(parts_line) < 12:
+            parts_line.append("")
+        parts_line = parts_line[:12]
+        try:
+            rank = int(parts_line[0])
+            s = float(parts_line[1])
+            e = float(parts_line[2])
+        except ValueError as ex:
+            raise ValueError(f"第{i}行解析失败: {line[:80]}...") from ex
+        rows.append(
+            E2eRow(
+                rank=rank,
+                start_sec=s,
+                end_sec=e,
+                id1=parts_line[3],
+                n1=parts_line[4],
+                c1=parts_line[5],
+                id2=parts_line[6],
+                n2=parts_line[7],
+                c2=parts_line[8],
+                id3=parts_line[9],
+                n3=parts_line[10],
+                c3=parts_line[11],
+            )
+        )
+    return rows
+
+
+def tear_class_index(model: YOLO, class_name: str) -> int:
+    names: dict[int, str] = model.names  # type: ignore[assignment]
+    for k, v in names.items():
+        if str(v).strip() == class_name:
+            return int(k)
+    lower = {str(v).strip().lower(): int(k) for k, v in names.items()}
+    if lower.get(class_name.lower()) is not None:
+        return lower[class_name.lower()]
+    raise ValueError(f"模型中无类别「{class_name}」，names={names}")
+
+
+def count_tearing_frames(
+    cap: cv2.VideoCapture,
+    window_start: float,
+    window_end: float,
+    yolo: YOLO,
+    tear_cls: int,
+    tear_prob: float,
+    imgsz: int,
+    *,
+    predict_kw: dict[str, Any] | None = None,
+    det: YOLO | None = None,
+    grouper: HandRoiGrouper | None = None,
+    imgsz_det: int = 640,
+    det_conf: float = 0.5,
+) -> int:
+    """[window_start, window_end) 内逐帧统计：P(tear_cls) >= tear_prob 的帧数。
+
+    若提供 det+grouper：每帧先检测手，再对每个 ROI 跑撕膜；**任一 ROI** 达到阈值则该帧计 1。
+    否则对 **整帧** 跑一次撕膜（与旧 merge_e2e 一致）。
+    """
+    pred_tear: dict[str, Any] = {"imgsz": imgsz, "verbose": False}
+    pred_det: dict[str, Any] = {"imgsz": imgsz_det, "verbose": False}
+    if predict_kw:
+        pred_tear.update(predict_kw)
+        pred_det.update(predict_kw)
+    use_hand = (
+        det is not None
+        and grouper is not None
+        and collect_hand_boxes is not None
+    )
+    cap.set(cv2.CAP_PROP_POS_MSEC, max(0.0, window_start) * 1000.0)
+    cnt = 0
+    while True:
+        ok, frame = cap.read()
+        if not ok or frame is None:
+            break
+        t = float(cap.get(cv2.CAP_PROP_POS_MSEC)) / 1000.0
+        if t >= window_end - 1e-9:
+            break
+        if t + 1e-6 < window_start:
+            continue
+        if use_hand:
+            r0 = det.predict(  # type: ignore[union-attr]
+                frame, conf=det_conf, **pred_det
+            )[0]
+            hands = collect_hand_boxes(det, r0.boxes) if r0.boxes else []  # type: ignore[arg-type]
+            if not hands:
+                continue
+            rois = grouper.frame_to_rois(frame, hands)  # type: ignore[union-attr]
+            frame_hit = False
+            for crop in rois:
+                if crop is None or crop.size == 0:
+                    continue
+                res = yolo.predict(crop, **pred_tear)[0]
+                if res.probs is None:
+                    continue
+                prob_tear = float(res.probs.data[tear_cls].item())
+                if prob_tear >= tear_prob - 1e-12:
+                    frame_hit = True
+                    break
+            if frame_hit:
+                cnt += 1
+        else:
+            res = yolo.predict(frame, **pred_tear)[0]
+            if res.probs is None:
+                continue
+            prob_tear = float(res.probs.data[tear_cls].item())
+            if prob_tear >= tear_prob - 1e-12:
+                cnt += 1
+    return cnt
+
+
+def merge_two_segments(a: E2eRow, b: E2eRow) -> E2eRow:
+    n1 = a.n1.strip()
+    fc1 = max(float(a.c1.strip()), float(b.c1.strip()))
+    c1s = f"{fc1:.6f}"
+
+    id1 = a.id1.strip() or b.id1.strip()
+
+    top1_name = n1
+    cands: list[tuple[str, float, str]] = []
+    for row in (a, b):
+        for nm, cf, pid in (
+            (row.n2.strip(), row.c2.strip(), row.id2.strip()),
+            (row.n3.strip(), row.c3.strip(), row.id3.strip()),
+        ):
+            if not nm or not cf:
+                continue
+            try:
+                cff = float(cf)
+            except ValueError:
+                continue
+            if nm == top1_name:
+                continue
+            cands.append((nm, cff, pid))
+
+    cands.sort(key=lambda x: -x[1])
+    seen: set[str] = set()
+    picked: list[tuple[str, float, str]] = []
+    for nm, cff, pid in cands:
+        if nm in seen:
+            continue
+        seen.add(nm)
+        picked.append((nm, cff, pid))
+        if len(picked) >= 2:
+            break
+
+    id2 = n2 = c2 = id3 = n3 = c3 = ""
+    if len(picked) >= 1:
+        n2, c2f, id2 = picked[0]
+        c2 = f"{c2f:.6f}"
+    if len(picked) >= 2:
+        n3, c3f, id3 = picked[1]
+        c3 = f"{c3f:.6f}"
+
+    return E2eRow(
+        rank=0,
+        start_sec=a.start_sec,
+        end_sec=b.end_sec,
+        id1=id1,
+        n1=n1,
+        c1=c1s,
+        id2=id2,
+        n2=n2,
+        c2=c2,
+        id3=id3,
+        n3=n3,
+        c3=c3,
+    )
+
+
+def one_pass_merge(
+    rows: list[E2eRow],
+    cap: cv2.VideoCapture,
+    yolo: YOLO,
+    tear_cls: int,
+    *,
+    head_sec: float,
+    tear_prob: float,
+    tear_min_frames: int,
+    imgsz: int,
+    predict_kw: dict[str, Any] | None,
+    verbose: bool,
+    det: YOLO | None = None,
+    grouper: HandRoiGrouper | None = None,
+    imgsz_det: int = 640,
+    det_conf: float = 0.5,
+) -> tuple[list[E2eRow], bool]:
+    out: list[E2eRow] = []
+    i = 0
+    changed = False
+    while i < len(rows):
+        a = rows[i]
+        if i + 1 >= len(rows):
+            out.append(a)
+            break
+        b = rows[i + 1]
+        same_top1 = (
+            a.is_success()
+            and b.is_success()
+            and a.n1.strip() == b.n1.strip()
+        )
+        if same_top1:
+            w0 = b.start_sec
+            w1 = min(b.start_sec + head_sec, b.end_sec)
+            n_high = count_tearing_frames(
+                cap,
+                w0,
+                w1,
+                yolo,
+                tear_cls,
+                tear_prob,
+                imgsz,
+                predict_kw=predict_kw,
+                det=det,
+                grouper=grouper,
+                imgsz_det=imgsz_det,
+                det_conf=det_conf,
+            )
+            if verbose:
+                mode = "hand_roi" if det is not None and grouper is not None else "full_frame"
+                print(
+                    f"[tear_gate:{mode}] 窗口 [{w0:.3f},{w1:.3f})s（下一段起点起 head_sec={head_sec:g}s，截断至本段 end） "
+                    f"P(tearing)>={tear_prob} 计数={n_high} (保留两段需>={tear_min_frames})",
+                    flush=True,
+                )
+            if n_high >= tear_min_frames:
+                out.append(a)
+                out.append(b)
+            else:
+                out.append(merge_two_segments(a, b))
+                changed = True
+            i += 2
+        else:
+            out.append(a)
+            i += 1
+    return out, changed
+
+
+def merge_all(
+    rows: list[E2eRow],
+    cap: cv2.VideoCapture,
+    yolo: YOLO,
+    tear_cls: int,
+    *,
+    head_sec: float,
+    tear_prob: float,
+    tear_min_frames: int,
+    imgsz: int,
+    predict_kw: dict[str, Any] | None = None,
+    verbose: bool = False,
+    det: YOLO | None = None,
+    grouper: HandRoiGrouper | None = None,
+    imgsz_det: int = 640,
+    det_conf: float = 0.5,
+) -> list[E2eRow]:
+    cur = rows
+    while True:
+        cur, changed = one_pass_merge(
+            cur,
+            cap,
+            yolo,
+            tear_cls,
+            head_sec=head_sec,
+            tear_prob=tear_prob,
+            tear_min_frames=tear_min_frames,
+            imgsz=imgsz,
+            predict_kw=predict_kw,
+            verbose=verbose,
+            det=det,
+            grouper=grouper,
+            imgsz_det=imgsz_det,
+            det_conf=det_conf,
+        )
+        if not changed:
+            break
+    return cur
diff --git a/code/video_clip_cls/scripts/run_haocai_actionformer_consumables_e2e.py b/code/video_clip_cls/scripts/run_haocai_actionformer_consumables_e2e.py
new file mode 100644
index 0000000..95a80d5
--- /dev/null
+++ b/code/video_clip_cls/scripts/run_haocai_actionformer_consumables_e2e.py
@@ -0,0 +1,839 @@
+#!/usr/bin/env python3
+"""
+单视频端到端：VideoSwin 特征 → ActionFormer 划段 → 分数引导边界切割+score 过滤 →
+手检 + 好帧(>阈值) + 白名单裁剪 + 耗材(softmax max>阈值) → 段内在有效帧上对类名计数，取 **票数前三**，
+再以这三类出现次数 **归一化** 为 top1~3 置信度（三项和为 1；不足三类则空位补 0）。
+商品 id 来自 Excel「产品编码」。
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import pickle
+import shutil
+import subprocess
+import sys
+import tempfile
+import time
+from collections import defaultdict
+from pathlib import Path
+from typing import Any
+
+import cv2
+import numpy as np
+from ultralytics import YOLO
+
+for _repo in Path(__file__).resolve().parents:
+    if (_repo / "repo_root.py").is_file() and (_repo / "dataset.py").is_file():
+        if str(_repo) not in sys.path:
+            sys.path.insert(0, str(_repo))
+        break
+else:
+    raise RuntimeError("未定位到仓库 code/ 根目录")
+
+from repo_root import CODE_ROOT  # noqa: E402
+
+# 单文件夹打包：由 run.py 设置 HAOCAI_E2E_BUNDLE=解压根目录，权重/Excel 走包内路径，ActionFormer 在 <bundle>/actionformer_release
+_BUNDLE_ENV = os.environ.get("HAOCAI_E2E_BUNDLE", "").strip()
+_BUNDLE_ROOT: Path | None = Path(_BUNDLE_ENV).resolve() if _BUNDLE_ENV else None
+
+if _BUNDLE_ROOT is not None:
+    _DEFAULT_EXCEL = _BUNDLE_ROOT / "data" / "视频中的商品信息表.xlsx"
+    _DEFAULT_AF_CKPT = _BUNDLE_ROOT / "models" / "actionformer_epoch_045.pth.tar"
+    _DEFAULT_HAND = _BUNDLE_ROOT / "models" / "hand_detect.pt"
+    _DEFAULT_GOODBAD = _BUNDLE_ROOT / "models" / "goodbad_frame.pt"
+    _DEFAULT_HAOCAI = _BUNDLE_ROOT / "models" / "haocai_classify.pt"
+else:
+    _DEFAULT_EXCEL = CODE_ROOT.parent / "data/haocai/视频中的商品信息表.xlsx"
+    _DEFAULT_AF_CKPT = (
+        CODE_ROOT
+        / "video_clip_cls/runs/actionformer_ckpt/haocai_main_perspective_videoswin_haocai_main_perspective_videoswin/epoch_045.pth.tar"
+    )
+    _DEFAULT_HAND = CODE_ROOT / "hand_detection/runs/hand_det_y11s_multiframe-better/weights/best.pt"
+    _DEFAULT_GOODBAD = CODE_ROOT / "goodORbad_frame/runs/goodbad_frame_y11m_e50/weights/best.pt"
+    _DEFAULT_HAOCAI = (
+        CODE_ROOT / "haocai_classify/runs/haocai_cls_41cls_goodframe_lastest-0.95/weights/best.pt"
+    )
+
+
+def _actionformer_release_dir() -> Path:
+    if _BUNDLE_ROOT is not None:
+        return _BUNDLE_ROOT / "actionformer_release"
+    return CODE_ROOT / "actionformer_release"
+
+
+# 耗材投票：复用片段推理工具（infer_single_0506 为平铺目录，非 package）
+_SYS_INSERT = str(CODE_ROOT / "video_clip_cls" / "infer_single_0506")
+if _SYS_INSERT not in sys.path:
+    sys.path.insert(0, _SYS_INSERT)
+import run_segments_consumable_vote as _rsv  # noqa: E402
+
+collect_hand_boxes = _rsv.collect_hand_boxes
+haocai_softmax_probs = _rsv.haocai_softmax_probs
+largest_hand = _rsv.largest_hand
+pad_box = _rsv.pad_box_bottom_only
+passes_good_gate_top1_conf = _rsv.passes_good_gate_top1_conf
+_cls_name = _rsv._cls_name
+
+try:
+    import pandas as pd
+except ImportError as e:
+    raise SystemExit("需要 pandas / openpyxl 读取 Excel：pip install pandas openpyxl") from e
+
+# ---------- 与训练/曾用 infer 对齐的 VideoSwin 参数 ----------
+FEAT_STRIDE_FRAMES = 8
+CLIP_LEN = 16
+FRAME_STRIDE = 1
+INPUT_DIM = 768
+
+
+def log(msg: str) -> None:
+    print(f"[{time.strftime('%H:%M:%S')}] {msg}", flush=True)
+
+
+def load_product_code_map(excel_path: Path) -> dict[str, str]:
+    """商品名称 -> 产品编码。"""
+    df = pd.read_excel(excel_path, sheet_name=0, header=0)
+    col_code = "产品编码"
+    col_name = "商品名称"
+    if col_code not in df.columns or col_name not in df.columns:
+        df = pd.read_excel(excel_path, sheet_name=0, header=None)
+        col_code, col_name = df.columns[1], df.columns[2]
+    m: dict[str, str] = {}
+    for _, row in df.iterrows():
+        name = row[col_name]
+        code = row[col_code]
+        if pd.isna(name) or str(name).strip() == "":
+            continue
+        name_s = str(name).strip()
+        if name_s not in m:
+            m[name_s] = str(code) if not pd.isna(code) else ""
+    return m
+
+
+def mask_probs_whitelist(
+    probs: np.ndarray,
+    allowed: frozenset[int],
+    n_cls: int,
+) -> np.ndarray | None:
+    v = np.asarray(probs, dtype=np.float64).ravel()
+    if v.size < n_cls:
+        v = np.resize(v, n_cls)
+    v = v[:n_cls].copy()
+    out = np.zeros_like(v)
+    for i in allowed:
+        if 0 <= i < n_cls:
+            out[i] = v[i]
+    s = float(np.sum(out))
+    if s < 1e-12:
+        return None
+    return out / s
+
+
+def allowed_indices_from_json_names(
+    allowed_names: list[str], cls_names: dict
+) -> frozenset[int] | None:
+    """None 表示不按名称裁剪（全类）。"""
+    if not allowed_names:
+        return None
+    idx_by_name: dict[str, int] = {}
+    for k, v in cls_names.items():
+        nm = str(v).strip()
+        if nm and nm not in idx_by_name:
+            idx_by_name[nm] = int(k)
+    out: set[int] = set()
+    for n in allowed_names:
+        ns = str(n).strip()
+        if ns in idx_by_name:
+            out.add(idx_by_name[ns])
+    if not out:
+        log("警告: allowed_names 与模型类名无交集，白名单裁剪将不生效（等同全类）。")
+        return None
+    return frozenset(out)
+
+
+def load_whitelist_json(path: Path) -> list[str]:
+    data = json.loads(path.read_text(encoding="utf-8"))
+    if isinstance(data, dict) and "allowed_names" in data:
+        raw = data["allowed_names"]
+    elif isinstance(data, list):
+        raw = data
+    else:
+        raise ValueError("白名单 JSON 应为 {\"allowed_names\": [...]} 或名称数组")
+    return [str(x).strip() for x in raw if str(x).strip()]
+
+
+def run_feature_extraction(
+    *,
+    python_exe: str,
+    data_root: Path,
+    output_dir: Path,
+    meta_file: Path,
+    device: str,
+    batch_size: int,
+) -> None:
+    ext_script = CODE_ROOT / "video_clip_cls" / "extract_videoswin_features.py"
+    cmd = [
+        python_exe,
+        str(ext_script),
+        "--data-root",
+        str(data_root),
+        "--output-dir",
+        str(output_dir),
+        "--meta-file",
+        str(meta_file),
+        "--device",
+        device,
+        "--clip-len",
+        str(CLIP_LEN),
+        "--frame-stride",
+        str(FRAME_STRIDE),
+        "--feat-stride-frames",
+        str(FEAT_STRIDE_FRAMES),
+        "--batch-size",
+        str(batch_size),
+        "--max-videos",
+        "1",
+    ]
+    log("运行 VideoSwin 特征提取…")
+    env = os.environ.copy()
+    env.setdefault("OPENCV_FFMPEG_LOGLEVEL", "8")
+    r = subprocess.run(cmd, cwd=str(CODE_ROOT), env=env, check=False)
+    if r.returncode != 0:
+        raise RuntimeError(f"特征提取失败，exit={r.returncode}")
+
+
+def write_infer_json(
+    out_path: Path,
+    video_id: str,
+    duration: float,
+    fps: float,
+) -> None:
+    payload = {
+        "version": "haocai_infer_single_v1",
+        "taxonomy": [{"nodeName": "Action", "nodeId": 0}],
+        "database": {
+            video_id: {
+                "subset": "val",
+                "duration": float(duration),
+                "fps": float(fps),
+                "annotations": [
+                    {"segment": [0.0, min(1.0, duration)], "label": "Action", "label_id": 0}
+                ],
+            }
+        },
+    }
+    out_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
+
+
+def write_infer_yaml(out_path: Path, json_file: Path, feat_folder: Path) -> None:
+    jf = str(json_file.resolve())
+    ff = str(feat_folder.resolve())
+    nf = CLIP_LEN * FRAME_STRIDE
+    text = f"""dataset_name: thumos
+devices: [0]
+train_split: ['train']
+val_split: ['val']
+
+dataset:
+  json_file: "{jf}"
+  feat_folder: "{ff}"
+  file_prefix: null
+  file_ext: ".npy"
+  num_classes: 1
+  input_dim: {INPUT_DIM}
+  feat_stride: {FEAT_STRIDE_FRAMES}
+  num_frames: {nf}
+  default_fps: null
+  downsample_rate: 1
+  trunc_thresh: 0.5
+  crop_ratio: [0.9, 1.0]
+  max_seq_len: 2304
+  force_upsampling: false
+
+model:
+  fpn_type: identity
+  max_buffer_len_factor: 6.0
+  n_mha_win_size: 19
+  n_head: 4
+  embd_dim: 256
+  fpn_dim: 256
+  head_dim: 256
+  use_abs_pe: false
+
+loader:
+  batch_size: 1
+  num_workers: 2
+
+test_cfg:
+  voting_thresh: 0.75
+  pre_nms_topk: 4000
+  max_seg_num: 600
+  min_score: 0.001
+  iou_threshold: 0.1
+  duration_thresh: 0.05
+  nms_method: soft
+  nms_sigma: 0.5
+  multiclass_nms: true
+"""
+    out_path.write_text(text, encoding="utf-8")
+
+
+def run_actionformer_eval(
+    *,
+    python_exe: str,
+    yaml_path: Path,
+    ckpt_path: Path,
+    copy_pkl_to: Path,
+) -> None:
+    af_dir = _actionformer_release_dir()
+    eval_py = af_dir / "eval.py"
+    cmd = [python_exe, str(eval_py), str(yaml_path), str(ckpt_path), "--saveonly"]
+    log("运行 ActionFormer eval（saveonly）…")
+    r = subprocess.run(cmd, cwd=str(af_dir), check=False)
+    if r.returncode != 0:
+        raise RuntimeError(f"ActionFormer eval 失败，exit={r.returncode}")
+    src_pkl = ckpt_path.parent / "eval_results.pkl"
+    if not src_pkl.is_file():
+        raise FileNotFoundError(f"未找到输出: {src_pkl}")
+    shutil.copy2(src_pkl, copy_pkl_to)
+    log(f"已复制 eval_results.pkl -> {copy_pkl_to}")
+
+
+def segments_overlap(s0: float, e0: float, s1: float, e1: float) -> bool:
+    inter = min(e0, e1) - max(s0, s1)
+    return inter > 1e-6
+
+
+def greedy_mutual_exclusive(
+    items: list[tuple[float, float, float]],
+) -> list[tuple[float, float, float]]:
+    """items: (t_start, t_end, score)。按 score 降序；与已选段重叠则整段丢弃。"""
+    sorted_items = sorted(items, key=lambda x: -x[2])
+    picked: list[tuple[float, float, float]] = []
+    for s, e, sc in sorted_items:
+        if any(segments_overlap(s, e, ps, pe) for ps, pe, _ in picked):
+            continue
+        picked.append((s, e, sc))
+    picked.sort(key=lambda x: x[0])
+    return picked
+
+
+_INTERVAL_EPS = 1e-6
+_IOU_NMS_THRESHOLD = 0.4
+_HYBRID_MIN_LEN = 1.5
+
+
+def segment_iou_1d(s0: float, e0: float, s1: float, e1: float) -> float:
+    """一维时间段 IoU；无交集或 union<=0 时返回 0.0。"""
+    inter = max(0.0, min(e0, e1) - max(s0, s1))
+    if inter <= _INTERVAL_EPS:
+        return 0.0
+    union = max(e0, e1) - min(s0, s1)
+    if union <= _INTERVAL_EPS:
+        return 0.0
+    return inter / union
+
+
+def _subtract_interval(
+    s: float, e: float, ps: float, pe: float
+) -> list[tuple[float, float]]:
+    """从 [s,e] 挖掉 blocker [ps,pe]，返回 0~2 个不重叠子区间。"""
+    if min(e, pe) - max(s, ps) <= _INTERVAL_EPS:
+        return [(s, e)]
+    out: list[tuple[float, float]] = []
+    if ps - s > _INTERVAL_EPS:
+        out.append((s, min(e, ps)))
+    if e - pe > _INTERVAL_EPS:
+        out.append((max(s, pe), e))
+    return out
+
+
+def hybrid_nms_and_trimming(
+    items: list[tuple[float, float, float]],
+    iou_threshold: float = _IOU_NMS_THRESHOLD,
+    min_len: float = _HYBRID_MIN_LEN,
+) -> list[tuple[float, float, float]]:
+    """混合后处理：IoU NMS 去重 → 边界切割 → 最短片段过滤。"""
+    sorted_items = sorted(items, key=lambda x: -x[2])
+    picked: list[tuple[float, float, float]] = []
+    for s, e, sc in sorted_items:
+        if e - s <= _INTERVAL_EPS:
+            continue
+        if any(
+            segment_iou_1d(s, e, ps, pe) > iou_threshold + _INTERVAL_EPS
+            for ps, pe, _ in picked
+        ):
+            continue
+        frags: list[tuple[float, float]] = [(s, e)]
+        for ps, pe, _ in picked:
+            nxt: list[tuple[float, float]] = []
+            for fs, fe in frags:
+                nxt.extend(_subtract_interval(fs, fe, ps, pe))
+            frags = nxt
+            if not frags:
+                break
+        for fs, fe in frags:
+            if fe - fs >= min_len - _INTERVAL_EPS:
+                picked.append((fs, fe, sc))
+    picked.sort(key=lambda x: x[0])
+    return picked
+
+
+def parse_actionformer_pkl(
+    pkl_path: Path, video_id: str
+) -> list[tuple[float, float, float]]:
+    with pkl_path.open("rb") as f:
+        data: dict[str, Any] = pickle.load(f)
+    vids = data["video-id"]
+    t0 = np.asarray(data["t-start"]).reshape(-1)
+    t1 = np.asarray(data["t-end"]).reshape(-1)
+    scores = np.asarray(data["score"]).reshape(-1)
+    # 兼容 str / bytes
+    def _norm(x: object) -> str:
+        if isinstance(x, bytes):
+            return x.decode("utf-8", errors="replace")
+        return str(x)
+
+    mask = np.array([_norm(v) == video_id for v in np.asarray(vids).reshape(-1)])
+    out: list[tuple[float, float, float]] = []
+    for i in np.where(mask)[0]:
+        out.append((float(t0[i]), float(t1[i]), float(scores[i])))
+    return out
+
+
+def aggregate_top3_votes(
+    pairs: list[tuple[str, float]],
+) -> tuple[list[str], list[float]]:
+    """
+    pairs: (类名, 该帧 max softmax)；按置信度做段内加权累计。
+    按累计分数取前三类（同分按类名字典序稳定次序），再以这三类累计分数之和归一化为 top1~3 置信度。
+    """
+    empty = (["", "", ""], [0.0, 0.0, 0.0])
+    if not pairs:
+        return empty
+
+    # 1) 初始化“积分池”：key=类名，value=该类在段内累计得到的置信度积分。
+    score_pool: defaultdict[str, float] = defaultdict(float)
+    # 2) 逐帧累加积分：同一类在不同帧的 top_prob 按加和方式累计。
+    for name, conf in pairs:
+        score_pool[name] += float(conf)
+
+    # 3) 按累计积分降序排序（同分用类名字典序保证结果稳定），取 Top3。
+    ranked = sorted(score_pool.items(), key=lambda x: (-x[1], x[0]))
+    top = ranked[:3]
+    if not top:
+        return empty
+
+    # 4) 仅对 Top3 的累计积分做归一化，得到 top1~top3 置信度（和为 1）。
+    total = float(sum(score for _, score in top))
+    if total <= 0:
+        return empty
+    out_names: list[str] = ["", "", ""]
+    out_conf: list[float] = [0.0, 0.0, 0.0]
+    for i, (nm, score) in enumerate(top):
+        out_names[i] = nm
+        out_conf[i] = float(score) / total
+    return out_names, out_conf
+
+
+def process_segment_e2e(
+    cap: cv2.VideoCapture,
+    det: YOLO,
+    gb: YOLO,
+    cls_m: YOLO,
+    *,
+    start_sec: float,
+    end_sec: float,
+    seek_margin_sec: float,
+    det_conf: float,
+    pad_ratio: float,
+    imgsz_det: int,
+    imgsz_cls: int,
+    frame_stride: int,
+    good_top1_conf_threshold: float,
+    haocai_min_conf: float,
+    gb_names: dict,
+    cls_names: dict,
+    allowed_class_idx: frozenset[int] | None,
+) -> dict[str, Any]:
+    probe_from = float(max(0.0, start_sec - seek_margin_sec))
+    cap.set(cv2.CAP_PROP_POS_MSEC, probe_from * 1000.0)
+    synced_frame: np.ndarray | None = None
+    synced_t: float | None = None
+    tol = 0.04
+    while True:
+        ok0, grab = cap.read()
+        if not ok0 or grab is None:
+            synced_frame, synced_t = None, None
+            break
+        t0 = float(cap.get(cv2.CAP_PROP_POS_MSEC)) / 1000.0
+        if t0 + tol >= start_sec:
+            synced_frame, synced_t = grab, t0
+            break
+
+    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+
+    n_cls_key_max = max(int(k) for k in cls_names.keys())
+    n_cls = n_cls_key_max + 1
+
+    n_hand_frames = 0
+    n_gate_pass = 0
+    pairs: list[tuple[str, float]] = []
+    frames_read_in_segment = 0
+
+    def one_frame(fr: np.ndarray) -> None:
+        nonlocal frames_read_in_segment, n_hand_frames, n_gate_pass, pairs
+        frames_read_in_segment += 1
+        if frame_stride > 1 and (frames_read_in_segment - 1) % frame_stride != 0:
+            return
+
+        r0 = det.predict(fr, conf=det_conf, imgsz=imgsz_det, verbose=False)[0]
+        hands = collect_hand_boxes(det, r0.boxes) if r0.boxes else []
+        if not hands:
+            return
+
+        n_hand_frames += 1
+        xyxy = largest_hand(hands)
+        x1, y1, x2, y2 = pad_box(xyxy, w, h, pad_ratio)
+        crop = fr[y1:y2, x1:x2]
+        if not passes_good_gate_top1_conf(
+            gb, crop, gb_names, imgsz_cls, good_top1_conf_threshold
+        ):
+            return
+        n_gate_pass += 1
+        vec_raw = haocai_softmax_probs(cls_m, crop, imgsz_cls, n_cls)
+        if vec_raw is None:
+            return
+        if allowed_class_idx is not None:
+            vec = mask_probs_whitelist(vec_raw, allowed_class_idx, n_cls)
+        else:
+            vec = vec_raw
+        if vec is None:
+            return
+        top_prob = float(np.max(vec))
+        if top_prob <= haocai_min_conf:
+            return
+        label = int(np.argmax(vec))
+        pairs.append((_cls_name(cls_names, label), top_prob))
+
+    if synced_frame is not None and synced_t is not None and synced_t <= end_sec + 0.08:
+        one_frame(synced_frame)
+
+    while True:
+        ok, frame = cap.read()
+        if not ok or frame is None:
+            break
+        t = float(cap.get(cv2.CAP_PROP_POS_MSEC)) / 1000.0
+        if t > end_sec + 0.08:
+            break
+        if t + 1e-6 < start_sec:
+            continue
+        one_frame(frame)
+
+    if n_hand_frames == 0:
+        return {"ok": False, "reason": "（段内未检测到手部）", "pairs": [], "n_gate_pass": 0}
+    if not pairs:
+        return {
+            "ok": False,
+            "reason": "（无有效耗材帧：好帧/白名单/耗材置信度未全部满足）",
+            "pairs": [],
+            "n_hand_frames": n_hand_frames,
+            "n_gate_pass": n_gate_pass,
+        }
+
+    n1, c1 = aggregate_top3_votes(pairs)
+    return {
+        "ok": True,
+        "top_names": n1,
+        "top_confs": c1,
+        "pairs": pairs,
+        "n_hand_frames": n_hand_frames,
+        "n_gate_pass": n_gate_pass,
+        "n_valid": len(pairs),
+    }
+
+
+def duration_fps_from_meta(meta: dict, video_id: str) -> tuple[float, float]:
+    v = meta.get("videos", {}).get(video_id, {})
+    if v:
+        fps = float(v.get("fps", 25.0))
+        tf = int(v.get("total_frames", 0))
+        if tf > 0 and fps > 0:
+            return tf / fps, fps
+    return 300.0, 25.0
+
+
+def main() -> int:
+    ap = argparse.ArgumentParser(description="ActionFormer 划段 + 耗材端到端（单视频）")
+    ap.add_argument("--video", type=Path, required=True, help="输入 MP4")
+    ap.add_argument("--whitelist-json", type=Path, required=True, help='{"allowed_names":["..."]}')
+    ap.add_argument(
+        "--excel",
+        type=Path,
+        default=_DEFAULT_EXCEL,
+        help="商品名称→产品编码",
+    )
+    ap.add_argument("--out", type=Path, required=True, help="输出制表符 TXT")
+    ap.add_argument(
+        "--work-dir",
+        type=Path,
+        default=None,
+        help="工作目录（默认临时目录；加 --keep-work-dir 可保留）",
+    )
+    ap.add_argument("--keep-work-dir", action="store_true")
+    ap.add_argument(
+        "--actionformer-ckpt",
+        type=Path,
+        default=_DEFAULT_AF_CKPT,
+    )
+    ap.add_argument(
+        "--hand-model",
+        type=Path,
+        default=_DEFAULT_HAND,
+    )
+    ap.add_argument(
+        "--goodbad-model",
+        type=Path,
+        default=_DEFAULT_GOODBAD,
+    )
+    ap.add_argument(
+        "--haocai-model",
+        type=Path,
+        default=_DEFAULT_HAOCAI,
+    )
+    ap.add_argument("--good-top1-conf-threshold", type=float, default=0.9)
+    ap.add_argument("--haocai-min-conf", type=float, default=0.8)
+    ap.add_argument("--af-min-score", type=float, default=0.1, help="划段保留 score 下限（不含等于）")
+    ap.add_argument("--det-conf", type=float, default=0.5)
+    ap.add_argument("--pad-ratio", type=float, default=0.30)
+    ap.add_argument("--imgsz-det", type=int, default=640)
+    ap.add_argument("--imgsz-cls", type=int, default=224)
+    ap.add_argument("--frame-stride", type=int, default=1)
+    ap.add_argument("--seek-margin-sec", type=float, default=3.0)
+    ap.add_argument("--feat-batch-size", type=int, default=1)
+    ap.add_argument("--device", type=str, default="cuda")
+    ap.add_argument(
+        "--python",
+        type=str,
+        default=sys.executable,
+        help="子进程 Python（建议 conda yolo 环境的 python）",
+    )
+    args = ap.parse_args()
+
+    video_path = args.video.resolve()
+    if not video_path.is_file():
+        log(f"找不到视频: {video_path}")
+        return 1
+    if not args.excel.is_file():
+        log(f"找不到 Excel: {args.excel}")
+        return 1
+    if not args.whitelist_json.is_file():
+        log(f"找不到白名单 JSON: {args.whitelist_json}")
+        return 1
+    for p, name in (
+        (args.actionformer_ckpt, "ActionFormer ckpt"),
+        (args.hand_model, "hand"),
+        (args.goodbad_model, "goodbad"),
+        (args.haocai_model, "haocai"),
+    ):
+        if not Path(p).is_file():
+            log(f"缺少{name}: {p}")
+            return 1
+
+    stem = video_path.stem
+    tmp_ctx: tempfile.TemporaryDirectory | None = None
+    if args.work_dir is not None:
+        work = Path(args.work_dir).resolve()
+        work.mkdir(parents=True, exist_ok=True)
+    elif args.keep_work_dir:
+        work = Path(tempfile.mkdtemp(prefix="haocai_e2e_"))
+        log(f"工作目录（保留）: {work}")
+    else:
+        tmp_ctx = tempfile.TemporaryDirectory(prefix="haocai_e2e_")
+        work = Path(tmp_ctx.name)
+
+    try:
+        product_map = load_product_code_map(args.excel.resolve())
+        allowed_names = load_whitelist_json(args.whitelist_json.resolve())
+
+        inp = work / "input"
+        feat_dir = work / "features"
+        inp.mkdir(parents=True, exist_ok=True)
+        feat_dir.mkdir(parents=True, exist_ok=True)
+
+        single_video = inp / video_path.name
+        if single_video.resolve() != video_path.resolve():
+            shutil.copy2(video_path, single_video)
+
+        meta_path = feat_dir / "meta.json"
+        run_feature_extraction(
+            python_exe=args.python,
+            data_root=inp,
+            output_dir=feat_dir,
+            meta_file=meta_path,
+            device=args.device,
+            batch_size=max(1, args.feat_batch_size),
+        )
+
+        meta = json.loads(meta_path.read_text(encoding="utf-8"))
+        duration, fps = duration_fps_from_meta(meta, stem)
+        if stem not in meta.get("videos", {}):
+            # 回退：用文件名 stem 对应 npy
+            log("meta 中未找到 video_id=stem，使用 ffprobe 估 duration…")
+            cap0 = cv2.VideoCapture(str(video_path))
+            if cap0.isOpened():
+                fps = float(cap0.get(cv2.CAP_PROP_FPS)) or fps
+                nfr = int(cap0.get(cv2.CAP_PROP_FRAME_COUNT))
+                cap0.release()
+                if fps > 0 and nfr > 0:
+                    duration = nfr / fps
+
+        npy_path = feat_dir / f"{stem}.npy"
+        if not npy_path.is_file():
+            log(f"特征文件不存在: {npy_path}")
+            return 1
+
+        json_path = work / "infer_single.json"
+        write_infer_json(json_path, stem, duration, fps)
+
+        yaml_path = work / "infer_single.yaml"
+        write_infer_yaml(yaml_path, json_path.resolve(), feat_dir.resolve())
+
+        pkl_dest = work / "eval_results.pkl"
+        run_actionformer_eval(
+            python_exe=args.python,
+            yaml_path=yaml_path.resolve(),
+            ckpt_path=args.actionformer_ckpt.resolve(),
+            copy_pkl_to=pkl_dest,
+        )
+
+        raw_segs = parse_actionformer_pkl(pkl_dest, stem)
+        raw_segs = [(s, e, sc) for s, e, sc in raw_segs if sc > args.af_min_score]
+        segs = greedy_mutual_exclusive(raw_segs)
+        log(f"ActionFormer 候选 {len(raw_segs)} -> 互斥后 {len(segs)} 段（score>{args.af_min_score}）")
+
+        log("加载 YOLO 模型…")
+        det = YOLO(str(args.hand_model))
+        gb = YOLO(str(args.goodbad_model))
+        cls_m = YOLO(str(args.haocai_model))
+        gb_names = gb.names
+        cls_names = cls_m.names
+        allowed_idx = allowed_indices_from_json_names(allowed_names, cls_names)
+
+        cap = cv2.VideoCapture(str(video_path))
+        if not cap.isOpened():
+            log("无法打开视频")
+            return 1
+
+        sep = "\t"
+        header = sep.join(
+            [
+                "rank",
+                "start_sec",
+                "end_sec",
+                "product_id_top1",
+                "top1_name",
+                "top1_conf",
+                "product_id_top2",
+                "top2_name",
+                "top2_conf",
+                "product_id_top3",
+                "top3_name",
+                "top3_conf",
+            ]
+        )
+        lines_out = [header]
+
+        try:
+            for rank, (t0, t1, af_sc) in enumerate(segs, start=1):
+                log(f"段落 rank={rank} [{t0:.3f},{t1:.3f}] score={af_sc:.4f} …")
+                info = process_segment_e2e(
+                    cap,
+                    det,
+                    gb,
+                    cls_m,
+                    start_sec=t0,
+                    end_sec=t1,
+                    seek_margin_sec=args.seek_margin_sec,
+                    det_conf=args.det_conf,
+                    pad_ratio=args.pad_ratio,
+                    imgsz_det=args.imgsz_det,
+                    imgsz_cls=args.imgsz_cls,
+                    frame_stride=max(1, args.frame_stride),
+                    good_top1_conf_threshold=args.good_top1_conf_threshold,
+                    haocai_min_conf=args.haocai_min_conf,
+                    gb_names=gb_names,
+                    cls_names=cls_names,
+                    allowed_class_idx=allowed_idx,
+                )
+                if not info.get("ok"):
+                    reason = str(info.get("reason", ""))
+                    lines_out.append(
+                        sep.join(
+                            [
+                                str(rank),
+                                f"{t0:.6f}",
+                                f"{t1:.6f}",
+                                "",
+                                reason,
+                                "",
+                                "",
+                                "",
+                                "",
+                                "",
+                                "",
+                                "",
+                                "",
+                            ]
+                        )
+                    )
+                    continue
+
+                n1, n2, n3 = info["top_names"]
+                c1, c2, c3 = info["top_confs"]
+                id1 = product_map.get(n1, "") if n1 else ""
+                id2 = product_map.get(n2, "") if n2 else ""
+                id3 = product_map.get(n3, "") if n3 else ""
+                for nm, pid in ((n1, id1), (n2, id2), (n3, id3)):
+                    if nm and not pid:
+                        log(f"警告: 商品表无名称「{nm}」，产品编码置空。")
+
+                lines_out.append(
+                    sep.join(
+                        [
+                            str(rank),
+                            f"{t0:.6f}",
+                            f"{t1:.6f}",
+                            id1,
+                            n1,
+                            f"{c1:.6f}" if n1 else "",
+                            id2,
+                            n2,
+                            f"{c2:.6f}" if n2 else "",
+                            id3,
+                            n3,
+                            f"{c3:.6f}" if n3 else "",
+                        ]
+                    )
+                )
+        finally:
+            cap.release()
+
+        args.out.parent.mkdir(parents=True, exist_ok=True)
+        args.out.write_text("\n".join(lines_out) + "\n", encoding="utf-8")
+        log(f"已写出: {args.out.resolve()}")
+        if args.work_dir is not None or (args.keep_work_dir and args.work_dir is None):
+            log(f"工作目录: {work}")
+    finally:
+        if tmp_ctx is not None:
+            tmp_ctx.cleanup()
+
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/configs/default_config.yaml b/configs/default_config.yaml
new file mode 100644
index 0000000..d775bb6
--- /dev/null
+++ b/configs/default_config.yaml
@@ -0,0 +1,99 @@
+# 篮子接触分段 + 段内耗材识别
+# 入口: main_basket.py / main_basket_stream.py / main_segments_offline.py
+
+io:
+  video: input/sample.mp4
+  excel: input/视频中的商品信息表.xlsx
+  out: output/result.txt
+  # false：段内耗材分类不做 Excel/JSON 白名单裁剪（全 41 类）；Excel 仍用于 product_id 映射
+  use_whitelist: false
+  # use_whitelist=true 时：null 从 excel C 列读白名单；或指定 {"allowed_names":[...]} JSON
+  whitelist_json: null
+
+weights:
+  hand: weights/hand_detect.pt
+  goodbad: weights/goodbad_frame.pt
+  haocai: weights/haocai_classify.pt
+
+runtime:
+  work_dir: null
+  keep_work_dir: false
+  python: null
+
+device:
+  type: cuda
+  half: false
+
+# 段内：手检 → 双手 ROI → 好坏帧门控 → 耗材分类
+phase2:
+  seek_margin_sec: 3.0
+  frame_stride: 1
+  det_conf: 0.6
+  # 双手 union 紧框后仅向下延伸（相对框高）
+  pad_bottom_ratio: 0.5
+  imgsz_det: 1920
+  # 段内需检测到至少两只手才裁 ROI（取最大两只 union）；merge_* 已不使用
+  merge_iou_gt: 0.0
+  merge_center_dist_max_px: null
+  merge_center_dist_max_frac_diag: null
+  tracking_alpha: 0.6
+  tracking_max_lost_frames: 0
+
+classification:
+  imgsz_cls: 224
+  good_top1_conf_threshold: 0.8
+  good_top1_retry_threshold: 0.6
+  haocai_min_conf: 0.8
+  haocai_min_conf_retry: 0.5
+  empty_cache_every: 0
+
+gap_merge:
+  enabled: true
+  max_gap_sec: 2.0
+
+output:
+  legacy_12_col_only: true
+
+doctor_identity:
+  enabled: true
+  checkpoint: doctor_identity_package/doctor_info.pth
+  labels_csv: doctor_identity_package/labels.csv
+  pose_min_detection_confidence: 0.30
+  min_identity_confidence: 0.00
+  middle_seconds: 10.0
+  sample_fps: 3.0
+  pad_frac: 0.15
+
+# 篮子接触分段（main_basket.py / main_basket_stream.py）
+basket:
+  det_conf: 0.6
+  contact_iou_threshold: 0.05
+  contact_iou_on: 0.03
+  contact_iou_off: 0.01
+  confirm_seconds: 0.1
+  cooldown_seconds: 3.0
+  segment_start_offset_sec: 1.0
+  segment_end_offset_sec: 6.0
+  min_segment_sec: 4.0
+  scan_frame_stride: 1
+  roi_frame: first
+  save_roi_json: null
+  load_roi_json: null
+  skip_roi_select: false
+  roi_backend: tkinter
+
+# 推流实时识别（main_basket_stream.py）
+# 接触判定 / 手检 imgsz / 好坏帧 / 耗材阈值：与离线共用 basket + phase2 + classification
+# 段内推理：本地 MP4 回源 4K + phase2.imgsz_det=1920（与离线一致）；RTSP/缓存 fallback 时 JPEG 宽≤1920
+stream:
+  rtsp: null
+  ring_buffer_sec: 10.0
+  cache_max_width: 1920
+  jpeg_quality: 85
+  fps: 25.0
+  # 段窗口与 basket 一致：[contact+1, contact+6]，时长 5s
+  segment_start_offset_sec: 1.0
+  segment_end_offset_sec: 6.0
+  min_segment_sec: 4.0
+  infer_source: file
+  infer_fallback: cache
diff --git a/doctor_identity_package/.mediapipe_models/pose_landmarker_lite.task b/doctor_identity_package/.mediapipe_models/pose_landmarker_lite.task
new file mode 100644
index 0000000..09576a9
Binary files /dev/null and b/doctor_identity_package/.mediapipe_models/pose_landmarker_lite.task differ
diff --git a/doctor_identity_package/doctor_info.pth b/doctor_identity_package/doctor_info.pth
new file mode 100644
index 0000000..d29b396
Binary files /dev/null and b/doctor_identity_package/doctor_info.pth differ
diff --git a/doctor_identity_package/infer_doctor_from_video.py b/doctor_identity_package/infer_doctor_from_video.py
new file mode 100644
index 0000000..22f9b3b
--- /dev/null
+++ b/doctor_identity_package/infer_doctor_from_video.py
@@ -0,0 +1,321 @@
+#!/usr/bin/env python3
+"""Infer doctor identity from one MP4 video.
+
+Pipeline:
+1) Take middle N seconds from input video.
+2) Run MediaPipe Pose to detect human bbox.
+3) Keep the best crop (largest bbox area).
+4) Run doctor ReID checkpoint classification head.
+5) Output one final doctor identity.
+"""
+
+from __future__ import annotations
+
+import argparse
+import csv
+import sys
+import urllib.request
+from pathlib import Path
+
+import cv2
+import mediapipe as mp
+import numpy as np
+import torch
+from PIL import Image
+from torchvision import transforms
+
+# Allow importing local training model definition when running directly.
+THIS_DIR = Path(__file__).resolve().parent
+if str(THIS_DIR) not in sys.path:
+    sys.path.insert(0, str(THIS_DIR))
+
+from train_reid_contrastive import ReIDEmbedModel  # noqa: E402
+
+BaseOptions = mp.tasks.BaseOptions
+PoseLandmarker = mp.tasks.vision.PoseLandmarker
+PoseLandmarkerOptions = mp.tasks.vision.PoseLandmarkerOptions
+VisionRunningMode = mp.tasks.vision.RunningMode
+
+POSE_LITE_URL = (
+    "https://storage.googleapis.com/mediapipe-models/pose_landmarker/"
+    "pose_landmarker_lite/float16/1/pose_landmarker_lite.task"
+)
+POSE_LITE_NAME = "pose_landmarker_lite.task"
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Input mp4 -> middle 10s pose crop -> doctor identity",
+    )
+    parser.add_argument("--video", type=Path, required=True, help="input .mp4 path")
+    parser.add_argument(
+        "--checkpoint",
+        type=Path,
+        default=THIS_DIR / "doctor_info.pth",
+        help="doctor checkpoint path (.pth)",
+    )
+    parser.add_argument(
+        "--labels-csv",
+        type=Path,
+        default=THIS_DIR / "labels.csv",
+        help="person_id to doctor name mapping csv",
+    )
+    parser.add_argument(
+        "--middle-seconds",
+        type=float,
+        default=10.0,
+        help="window length around video center in seconds",
+    )
+    parser.add_argument(
+        "--sample-fps",
+        type=float,
+        default=3.0,
+        help="sampling fps inside the middle window",
+    )
+    parser.add_argument(
+        "--pad-frac",
+        type=float,
+        default=0.15,
+        help="bbox padding ratio",
+    )
+    parser.add_argument(
+        "--save-crop",
+        type=Path,
+        default=None,
+        help="optional path to save best cropped person image",
+    )
+    return parser.parse_args()
+
+
+def _ensure_pose_lite_model(model_dir: Path) -> Path:
+    model_dir.mkdir(parents=True, exist_ok=True)
+    model_path = model_dir / POSE_LITE_NAME
+    if model_path.is_file() and model_path.stat().st_size > 10_000:
+        return model_path
+    print(f"[info] Downloading MediaPipe Pose model -> {model_path}", flush=True)
+    urllib.request.urlretrieve(POSE_LITE_URL, model_path)
+    return model_path
+
+
+def bbox_from_normalized_pose_landmarks(
+    w: int,
+    h: int,
+    landmark_list,
+) -> tuple[int, int, int, int] | None:
+    if not landmark_list:
+        return None
+    xs = [float(lm.x) * w for lm in landmark_list]
+    ys = [float(lm.y) * h for lm in landmark_list]
+    if not xs:
+        return None
+    return int(min(xs)), int(min(ys)), int(max(xs)), int(max(ys))
+
+
+def expand_bbox_with_padding(
+    x1: int,
+    y1: int,
+    x2: int,
+    y2: int,
+    image_w: int,
+    image_h: int,
+    pad_frac: float,
+) -> tuple[int, int, int, int]:
+    bw = max(1, x2 - x1)
+    bh = max(1, y2 - y1)
+    cx = (x1 + x2) / 2.0
+    cy = (y1 + y2) / 2.0
+    nw = bw * (1.0 + pad_frac)
+    nh = bh * (1.0 + pad_frac)
+    nx1 = int(round(cx - nw / 2.0))
+    ny1 = int(round(cy - nh / 2.0))
+    nx2 = int(round(cx + nw / 2.0))
+    ny2 = int(round(cy + nh / 2.0))
+    nx1 = max(0, nx1)
+    ny1 = max(0, ny1)
+    nx2 = min(image_w, nx2)
+    ny2 = min(image_h, ny2)
+    if nx2 <= nx1 or ny2 <= ny1:
+        return 0, 0, min(1, image_w), min(1, image_h)
+    return nx1, ny1, nx2, ny2
+
+
+def sample_middle_timestamps(duration_sec: float, middle_seconds: float, sample_fps: float) -> list[float]:
+    if duration_sec <= 0 or middle_seconds <= 0 or sample_fps <= 0:
+        return []
+    center = duration_sec / 2.0
+    half = middle_seconds / 2.0
+    t0 = max(0.0, center - half)
+    t1 = min(duration_sec, center + half)
+    step = 1.0 / sample_fps
+    ts = []
+    t = t0
+    while t < t1 - 1e-6:
+        ts.append(t)
+        t += step
+    return ts
+
+
+def pick_best_person_crop(
+    video_path: Path,
+    landmarker: PoseLandmarker,
+    middle_seconds: float,
+    sample_fps: float,
+    pad_frac: float,
+) -> np.ndarray:
+    cap = cv2.VideoCapture(str(video_path))
+    if not cap.isOpened():
+        raise RuntimeError(f"Cannot open video: {video_path}")
+
+    fps = float(cap.get(cv2.CAP_PROP_FPS) or 0.0)
+    frame_count = float(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0.0)
+    duration_sec = frame_count / fps if fps > 1e-6 else 0.0
+    timestamps = sample_middle_timestamps(duration_sec, middle_seconds, sample_fps)
+    if not timestamps:
+        cap.release()
+        raise RuntimeError("No valid timestamps from middle window.")
+
+    best_area = -1
+    best_crop: np.ndarray | None = None
+
+    for ts in timestamps:
+        cap.set(cv2.CAP_PROP_POS_MSEC, ts * 1000.0)
+        ok, frame = cap.read()
+        if not ok or frame is None:
+            continue
+        h, w = frame.shape[:2]
+        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        mp_img = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb)
+        res = landmarker.detect(mp_img)
+        if not res.pose_landmarks:
+            continue
+
+        for lmk in res.pose_landmarks:
+            box = bbox_from_normalized_pose_landmarks(w, h, lmk)
+            if box is None:
+                continue
+            ex1, ey1, ex2, ey2 = expand_bbox_with_padding(*box, w, h, pad_frac=pad_frac)
+            crop = frame[ey1:ey2, ex1:ex2]
+            if crop.size == 0:
+                continue
+            area = int((ex2 - ex1) * (ey2 - ey1))
+            if area > best_area:
+                best_area = area
+                best_crop = crop.copy()
+
+    cap.release()
+    if best_crop is None:
+        raise RuntimeError("No person detected in the middle window.")
+    return best_crop
+
+
+def build_label_to_pid(pid_to_label: dict) -> dict[int, str]:
+    label_to_pid: dict[int, str] = {}
+    for raw_pid, label in pid_to_label.items():
+        try:
+            label_int = int(label)
+        except (TypeError, ValueError):
+            continue
+        label_to_pid[label_int] = str(raw_pid)
+    return label_to_pid
+
+
+def load_name_mapping(labels_csv: Path) -> dict[str, str]:
+    if not labels_csv.is_file():
+        return {}
+    mapping: dict[str, str] = {}
+    with labels_csv.open("r", encoding="utf-8-sig", newline="") as f:
+        reader = csv.DictReader(f)
+        for row in reader:
+            pid = str(row.get("person_id", "")).strip()
+            name = str(row.get("医生姓名", "")).strip()
+            if pid and name and pid not in mapping:
+                mapping[pid] = name
+    return mapping
+
+
+def run_inference(crop_bgr: np.ndarray, checkpoint_path: Path) -> tuple[str, float]:
+    if not checkpoint_path.is_file():
+        raise FileNotFoundError(f"Checkpoint not found: {checkpoint_path}")
+
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    ckpt = torch.load(checkpoint_path, map_location=device, weights_only=False)
+    num_classes = int(ckpt["num_classes"])
+    pid_to_label = ckpt.get("pid_to_label", {})
+    if not isinstance(pid_to_label, dict):
+        raise RuntimeError("Checkpoint missing valid pid_to_label dict.")
+
+    model = ReIDEmbedModel(num_classes=num_classes, feat_dim=512).to(device)
+    model.load_state_dict(ckpt["model_state"])
+    model.eval()
+
+    transform = transforms.Compose(
+        [
+            transforms.Resize((256, 128)),
+            transforms.ToTensor(),
+            transforms.Normalize(
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225],
+            ),
+        ]
+    )
+    crop_rgb = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2RGB)
+    inp = transform(Image.fromarray(crop_rgb)).unsqueeze(0).to(device)
+
+    with torch.no_grad():
+        _, logits = model(inp)
+        probs = torch.softmax(logits, dim=1)
+        pred_label = int(torch.argmax(probs, dim=1).item())
+        conf = float(probs[0, pred_label].item())
+
+    label_to_pid = build_label_to_pid(pid_to_label)
+    raw_pid = label_to_pid.get(pred_label)
+    if raw_pid is None:
+        raise RuntimeError(f"Predicted label {pred_label} not found in pid mapping.")
+    return raw_pid, conf
+
+
+def main() -> int:
+    args = parse_args()
+    if not args.video.is_file():
+        print(f"[error] video not found: {args.video}", file=sys.stderr)
+        return 2
+
+    try:
+        model_path = _ensure_pose_lite_model(THIS_DIR / ".mediapipe_models")
+        opts = PoseLandmarkerOptions(
+            base_options=BaseOptions(model_asset_path=str(model_path)),
+            running_mode=VisionRunningMode.IMAGE,
+            min_pose_detection_confidence=0.3,
+        )
+        landmarker = PoseLandmarker.create_from_options(opts)
+        try:
+            best_crop = pick_best_person_crop(
+                video_path=args.video,
+                landmarker=landmarker,
+                middle_seconds=args.middle_seconds,
+                sample_fps=args.sample_fps,
+                pad_frac=args.pad_frac,
+            )
+        finally:
+            landmarker.close()
+
+        if args.save_crop is not None:
+            args.save_crop.parent.mkdir(parents=True, exist_ok=True)
+            cv2.imwrite(str(args.save_crop), best_crop)
+
+        raw_pid, conf = run_inference(best_crop, args.checkpoint)
+        name_map = load_name_mapping(args.labels_csv)
+        doctor_name = name_map.get(str(raw_pid), "")
+
+        if doctor_name:
+            print(f"doctor={doctor_name} (id={raw_pid}, conf={conf:.4f})")
+        else:
+            print(f"doctor_id={raw_pid} (conf={conf:.4f})")
+        return 0
+    except Exception as exc:  # noqa: BLE001
+        print(f"[error] {exc}", file=sys.stderr)
+        return 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/doctor_identity_package/labels.csv b/doctor_identity_package/labels.csv
new file mode 100644
index 0000000..c447e5f
--- /dev/null
+++ b/doctor_identity_package/labels.csv
@@ -0,0 +1,760 @@
+﻿filename,person_id,person_id_file,医生姓名,camera_id,global_index
+24502_c1_s1_00001.jpg,24502,24502,钟光喜,1,1
+24502_c1_s1_00002.jpg,24502,24502,钟光喜,1,2
+24502_c1_s1_00003.jpg,24502,24502,钟光喜,1,3
+24502_c1_s1_00004.jpg,24502,24502,钟光喜,1,4
+24502_c1_s1_00005.jpg,24502,24502,钟光喜,1,5
+24502_c1_s1_00006.jpg,24502,24502,钟光喜,1,6
+24502_c1_s1_00007.jpg,24502,24502,钟光喜,1,7
+24502_c1_s1_00008.jpg,24502,24502,钟光喜,1,8
+24502_c1_s1_00009.jpg,24502,24502,钟光喜,1,9
+24502_c1_s1_00010.jpg,24502,24502,钟光喜,1,10
+24502_c1_s1_00011.jpg,24502,24502,钟光喜,1,11
+24502_c1_s1_00012.jpg,24502,24502,钟光喜,1,12
+24502_c1_s1_00013.jpg,24502,24502,钟光喜,1,13
+24502_c1_s1_00014.jpg,24502,24502,钟光喜,1,14
+24502_c1_s1_00015.jpg,24502,24502,钟光喜,1,15
+24502_c1_s1_00016.jpg,24502,24502,钟光喜,1,16
+24502_c1_s1_00017.jpg,24502,24502,钟光喜,1,17
+24502_c1_s1_00018.jpg,24502,24502,钟光喜,1,18
+24502_c1_s1_00019.jpg,24502,24502,钟光喜,1,19
+24502_c1_s1_00020.jpg,24502,24502,钟光喜,1,20
+24502_c1_s1_00021.jpg,24502,24502,钟光喜,1,21
+24502_c1_s1_00022.jpg,24502,24502,钟光喜,1,22
+24502_c1_s1_00023.jpg,24502,24502,钟光喜,1,23
+24502_c1_s1_00024.jpg,24502,24502,钟光喜,1,24
+24502_c1_s1_00025.jpg,24502,24502,钟光喜,1,25
+24502_c1_s1_00026.jpg,24502,24502,钟光喜,1,26
+24502_c1_s1_00027.jpg,24502,24502,钟光喜,1,27
+24502_c1_s1_00028.jpg,24502,24502,钟光喜,1,28
+24502_c1_s1_00029.jpg,24502,24502,钟光喜,1,29
+24502_c1_s1_00030.jpg,24502,24502,钟光喜,1,30
+24502_c1_s1_00031.jpg,24502,24502,钟光喜,1,31
+24502_c1_s1_00032.jpg,24502,24502,钟光喜,1,32
+24502_c1_s1_00033.jpg,24502,24502,钟光喜,1,33
+24502_c1_s1_00034.jpg,24502,24502,钟光喜,1,34
+24502_c1_s1_00035.jpg,24502,24502,钟光喜,1,35
+24502_c1_s1_00036.jpg,24502,24502,钟光喜,1,36
+24502_c1_s1_00037.jpg,24502,24502,钟光喜,1,37
+24502_c1_s1_00038.jpg,24502,24502,钟光喜,1,38
+24502_c1_s1_00039.jpg,24502,24502,钟光喜,1,39
+24502_c1_s1_00040.jpg,24502,24502,钟光喜,1,40
+24502_c1_s1_00041.jpg,24502,24502,钟光喜,1,41
+24502_c1_s1_00042.jpg,24502,24502,钟光喜,1,42
+24502_c1_s1_00043.jpg,24502,24502,钟光喜,1,43
+24502_c1_s1_00044.jpg,24502,24502,钟光喜,1,44
+24502_c1_s1_00045.jpg,24502,24502,钟光喜,1,45
+24502_c2_s1_00046.jpg,24502,24502,钟光喜,2,46
+24502_c2_s1_00047.jpg,24502,24502,钟光喜,2,47
+24502_c2_s1_00048.jpg,24502,24502,钟光喜,2,48
+24502_c2_s1_00049.jpg,24502,24502,钟光喜,2,49
+24502_c2_s1_00050.jpg,24502,24502,钟光喜,2,50
+24502_c2_s1_00051.jpg,24502,24502,钟光喜,2,51
+24502_c2_s1_00052.jpg,24502,24502,钟光喜,2,52
+24502_c2_s1_00053.jpg,24502,24502,钟光喜,2,53
+24502_c2_s1_00054.jpg,24502,24502,钟光喜,2,54
+24502_c2_s1_00055.jpg,24502,24502,钟光喜,2,55
+24502_c2_s1_00056.jpg,24502,24502,钟光喜,2,56
+24502_c2_s1_00057.jpg,24502,24502,钟光喜,2,57
+24502_c2_s1_00058.jpg,24502,24502,钟光喜,2,58
+24502_c2_s1_00059.jpg,24502,24502,钟光喜,2,59
+24502_c2_s1_00060.jpg,24502,24502,钟光喜,2,60
+24502_c2_s1_00061.jpg,24502,24502,钟光喜,2,61
+24502_c2_s1_00062.jpg,24502,24502,钟光喜,2,62
+24502_c2_s1_00063.jpg,24502,24502,钟光喜,2,63
+24502_c2_s1_00064.jpg,24502,24502,钟光喜,2,64
+24502_c2_s1_00065.jpg,24502,24502,钟光喜,2,65
+24502_c2_s1_00066.jpg,24502,24502,钟光喜,2,66
+24502_c2_s1_00067.jpg,24502,24502,钟光喜,2,67
+24502_c2_s1_00068.jpg,24502,24502,钟光喜,2,68
+24502_c2_s1_00069.jpg,24502,24502,钟光喜,2,69
+24502_c2_s1_00070.jpg,24502,24502,钟光喜,2,70
+24502_c2_s1_00071.jpg,24502,24502,钟光喜,2,71
+24502_c2_s1_00072.jpg,24502,24502,钟光喜,2,72
+24502_c2_s1_00073.jpg,24502,24502,钟光喜,2,73
+24502_c2_s1_00074.jpg,24502,24502,钟光喜,2,74
+24502_c2_s1_00075.jpg,24502,24502,钟光喜,2,75
+24502_c2_s1_00076.jpg,24502,24502,钟光喜,2,76
+24502_c2_s1_00077.jpg,24502,24502,钟光喜,2,77
+24502_c2_s1_00078.jpg,24502,24502,钟光喜,2,78
+24502_c2_s1_00079.jpg,24502,24502,钟光喜,2,79
+24502_c2_s1_00080.jpg,24502,24502,钟光喜,2,80
+24502_c2_s1_00081.jpg,24502,24502,钟光喜,2,81
+24502_c2_s1_00082.jpg,24502,24502,钟光喜,2,82
+24502_c2_s1_00083.jpg,24502,24502,钟光喜,2,83
+24502_c2_s1_00084.jpg,24502,24502,钟光喜,2,84
+24502_c2_s1_00085.jpg,24502,24502,钟光喜,2,85
+24502_c2_s1_00086.jpg,24502,24502,钟光喜,2,86
+24502_c2_s1_00087.jpg,24502,24502,钟光喜,2,87
+24502_c2_s1_00088.jpg,24502,24502,钟光喜,2,88
+24502_c2_s1_00089.jpg,24502,24502,钟光喜,2,89
+24502_c2_s1_00090.jpg,24502,24502,钟光喜,2,90
+24502_c2_s1_00091.jpg,24502,24502,钟光喜,2,91
+24502_c2_s1_00092.jpg,24502,24502,钟光喜,2,92
+24502_c2_s1_00093.jpg,24502,24502,钟光喜,2,93
+24502_c2_s1_00094.jpg,24502,24502,钟光喜,2,94
+24502_c2_s1_00095.jpg,24502,24502,钟光喜,2,95
+24502_c2_s1_00096.jpg,24502,24502,钟光喜,2,96
+24502_c2_s1_00097.jpg,24502,24502,钟光喜,2,97
+24502_c2_s1_00098.jpg,24502,24502,钟光喜,2,98
+24502_c2_s1_00099.jpg,24502,24502,钟光喜,2,99
+24502_c2_s1_00100.jpg,24502,24502,钟光喜,2,100
+24502_c2_s1_00101.jpg,24502,24502,钟光喜,2,101
+24502_c2_s1_00102.jpg,24502,24502,钟光喜,2,102
+24502_c3_s1_00103.jpg,24502,24502,钟光喜,3,103
+24502_c3_s1_00104.jpg,24502,24502,钟光喜,3,104
+24502_c3_s1_00105.jpg,24502,24502,钟光喜,3,105
+24502_c3_s1_00106.jpg,24502,24502,钟光喜,3,106
+24502_c3_s1_00107.jpg,24502,24502,钟光喜,3,107
+24502_c3_s1_00108.jpg,24502,24502,钟光喜,3,108
+24502_c3_s1_00109.jpg,24502,24502,钟光喜,3,109
+24502_c3_s1_00110.jpg,24502,24502,钟光喜,3,110
+24502_c3_s1_00111.jpg,24502,24502,钟光喜,3,111
+24502_c3_s1_00112.jpg,24502,24502,钟光喜,3,112
+24502_c3_s1_00113.jpg,24502,24502,钟光喜,3,113
+24502_c3_s1_00114.jpg,24502,24502,钟光喜,3,114
+24502_c3_s1_00115.jpg,24502,24502,钟光喜,3,115
+24502_c3_s1_00116.jpg,24502,24502,钟光喜,3,116
+24502_c3_s1_00117.jpg,24502,24502,钟光喜,3,117
+24502_c3_s1_00118.jpg,24502,24502,钟光喜,3,118
+24502_c3_s1_00119.jpg,24502,24502,钟光喜,3,119
+24502_c3_s1_00120.jpg,24502,24502,钟光喜,3,120
+24502_c3_s1_00121.jpg,24502,24502,钟光喜,3,121
+24502_c3_s1_00122.jpg,24502,24502,钟光喜,3,122
+24502_c3_s1_00123.jpg,24502,24502,钟光喜,3,123
+24502_c3_s1_00124.jpg,24502,24502,钟光喜,3,124
+24502_c3_s1_00125.jpg,24502,24502,钟光喜,3,125
+24502_c3_s1_00126.jpg,24502,24502,钟光喜,3,126
+24502_c3_s1_00127.jpg,24502,24502,钟光喜,3,127
+24502_c3_s1_00128.jpg,24502,24502,钟光喜,3,128
+24502_c3_s1_00129.jpg,24502,24502,钟光喜,3,129
+24502_c3_s1_00130.jpg,24502,24502,钟光喜,3,130
+24502_c3_s1_00131.jpg,24502,24502,钟光喜,3,131
+24502_c3_s1_00132.jpg,24502,24502,钟光喜,3,132
+24502_c3_s1_00133.jpg,24502,24502,钟光喜,3,133
+24502_c3_s1_00134.jpg,24502,24502,钟光喜,3,134
+24502_c3_s1_00135.jpg,24502,24502,钟光喜,3,135
+24502_c3_s1_00136.jpg,24502,24502,钟光喜,3,136
+24502_c3_s1_00137.jpg,24502,24502,钟光喜,3,137
+24502_c3_s1_00138.jpg,24502,24502,钟光喜,3,138
+24502_c3_s1_00139.jpg,24502,24502,钟光喜,3,139
+24502_c3_s1_00140.jpg,24502,24502,钟光喜,3,140
+24502_c3_s1_00141.jpg,24502,24502,钟光喜,3,141
+24502_c3_s1_00142.jpg,24502,24502,钟光喜,3,142
+24502_c3_s1_00143.jpg,24502,24502,钟光喜,3,143
+24502_c3_s1_00144.jpg,24502,24502,钟光喜,3,144
+24502_c3_s1_00145.jpg,24502,24502,钟光喜,3,145
+24502_c3_s1_00146.jpg,24502,24502,钟光喜,3,146
+24503_c1_s1_00147.jpg,24503,24503,付玉峰,1,147
+24503_c1_s1_00148.jpg,24503,24503,付玉峰,1,148
+24503_c1_s1_00149.jpg,24503,24503,付玉峰,1,149
+24503_c1_s1_00150.jpg,24503,24503,付玉峰,1,150
+24503_c1_s1_00151.jpg,24503,24503,付玉峰,1,151
+24503_c1_s1_00152.jpg,24503,24503,付玉峰,1,152
+24503_c1_s1_00153.jpg,24503,24503,付玉峰,1,153
+24503_c1_s1_00154.jpg,24503,24503,付玉峰,1,154
+24503_c1_s1_00155.jpg,24503,24503,付玉峰,1,155
+24503_c1_s1_00156.jpg,24503,24503,付玉峰,1,156
+24503_c1_s1_00157.jpg,24503,24503,付玉峰,1,157
+24503_c1_s1_00158.jpg,24503,24503,付玉峰,1,158
+24503_c1_s1_00159.jpg,24503,24503,付玉峰,1,159
+24503_c1_s1_00160.jpg,24503,24503,付玉峰,1,160
+24503_c1_s1_00161.jpg,24503,24503,付玉峰,1,161
+24503_c1_s1_00162.jpg,24503,24503,付玉峰,1,162
+24503_c1_s1_00163.jpg,24503,24503,付玉峰,1,163
+24503_c1_s1_00164.jpg,24503,24503,付玉峰,1,164
+24503_c1_s1_00165.jpg,24503,24503,付玉峰,1,165
+24503_c1_s1_00166.jpg,24503,24503,付玉峰,1,166
+24503_c1_s1_00167.jpg,24503,24503,付玉峰,1,167
+24503_c1_s1_00168.jpg,24503,24503,付玉峰,1,168
+24503_c1_s1_00169.jpg,24503,24503,付玉峰,1,169
+24503_c1_s1_00170.jpg,24503,24503,付玉峰,1,170
+24503_c1_s1_00171.jpg,24503,24503,付玉峰,1,171
+24503_c1_s1_00172.jpg,24503,24503,付玉峰,1,172
+24503_c1_s1_00173.jpg,24503,24503,付玉峰,1,173
+24503_c1_s1_00174.jpg,24503,24503,付玉峰,1,174
+24503_c1_s1_00175.jpg,24503,24503,付玉峰,1,175
+24503_c1_s1_00176.jpg,24503,24503,付玉峰,1,176
+24503_c1_s1_00177.jpg,24503,24503,付玉峰,1,177
+24503_c1_s1_00178.jpg,24503,24503,付玉峰,1,178
+24503_c1_s1_00179.jpg,24503,24503,付玉峰,1,179
+24503_c1_s1_00180.jpg,24503,24503,付玉峰,1,180
+24503_c1_s1_00181.jpg,24503,24503,付玉峰,1,181
+24503_c1_s1_00182.jpg,24503,24503,付玉峰,1,182
+24503_c1_s1_00183.jpg,24503,24503,付玉峰,1,183
+24503_c1_s1_00184.jpg,24503,24503,付玉峰,1,184
+24503_c1_s1_00185.jpg,24503,24503,付玉峰,1,185
+24503_c1_s1_00186.jpg,24503,24503,付玉峰,1,186
+24503_c1_s1_00187.jpg,24503,24503,付玉峰,1,187
+24503_c2_s1_00188.jpg,24503,24503,付玉峰,2,188
+24503_c2_s1_00189.jpg,24503,24503,付玉峰,2,189
+24503_c2_s1_00190.jpg,24503,24503,付玉峰,2,190
+24503_c2_s1_00191.jpg,24503,24503,付玉峰,2,191
+24503_c2_s1_00192.jpg,24503,24503,付玉峰,2,192
+24503_c2_s1_00193.jpg,24503,24503,付玉峰,2,193
+24503_c2_s1_00194.jpg,24503,24503,付玉峰,2,194
+24503_c2_s1_00195.jpg,24503,24503,付玉峰,2,195
+24503_c2_s1_00196.jpg,24503,24503,付玉峰,2,196
+24503_c2_s1_00197.jpg,24503,24503,付玉峰,2,197
+24503_c2_s1_00198.jpg,24503,24503,付玉峰,2,198
+24503_c2_s1_00199.jpg,24503,24503,付玉峰,2,199
+24503_c2_s1_00200.jpg,24503,24503,付玉峰,2,200
+24503_c2_s1_00201.jpg,24503,24503,付玉峰,2,201
+24503_c2_s1_00202.jpg,24503,24503,付玉峰,2,202
+24503_c2_s1_00203.jpg,24503,24503,付玉峰,2,203
+24503_c2_s1_00204.jpg,24503,24503,付玉峰,2,204
+24503_c2_s1_00205.jpg,24503,24503,付玉峰,2,205
+24503_c2_s1_00206.jpg,24503,24503,付玉峰,2,206
+24503_c2_s1_00207.jpg,24503,24503,付玉峰,2,207
+24503_c2_s1_00208.jpg,24503,24503,付玉峰,2,208
+24503_c2_s1_00209.jpg,24503,24503,付玉峰,2,209
+24503_c2_s1_00210.jpg,24503,24503,付玉峰,2,210
+24503_c2_s1_00211.jpg,24503,24503,付玉峰,2,211
+24503_c2_s1_00212.jpg,24503,24503,付玉峰,2,212
+24503_c2_s1_00213.jpg,24503,24503,付玉峰,2,213
+24503_c2_s1_00214.jpg,24503,24503,付玉峰,2,214
+24503_c2_s1_00215.jpg,24503,24503,付玉峰,2,215
+24503_c2_s1_00216.jpg,24503,24503,付玉峰,2,216
+24503_c2_s1_00217.jpg,24503,24503,付玉峰,2,217
+24503_c2_s1_00218.jpg,24503,24503,付玉峰,2,218
+24503_c2_s1_00219.jpg,24503,24503,付玉峰,2,219
+24503_c2_s1_00220.jpg,24503,24503,付玉峰,2,220
+24503_c2_s1_00221.jpg,24503,24503,付玉峰,2,221
+24503_c2_s1_00222.jpg,24503,24503,付玉峰,2,222
+24503_c2_s1_00223.jpg,24503,24503,付玉峰,2,223
+24503_c2_s1_00224.jpg,24503,24503,付玉峰,2,224
+24503_c2_s1_00225.jpg,24503,24503,付玉峰,2,225
+24503_c2_s1_00226.jpg,24503,24503,付玉峰,2,226
+24503_c2_s1_00227.jpg,24503,24503,付玉峰,2,227
+24503_c2_s1_00228.jpg,24503,24503,付玉峰,2,228
+24503_c2_s1_00229.jpg,24503,24503,付玉峰,2,229
+24503_c2_s1_00230.jpg,24503,24503,付玉峰,2,230
+24503_c2_s1_00231.jpg,24503,24503,付玉峰,2,231
+24503_c2_s1_00232.jpg,24503,24503,付玉峰,2,232
+24503_c2_s1_00233.jpg,24503,24503,付玉峰,2,233
+24503_c2_s1_00234.jpg,24503,24503,付玉峰,2,234
+24503_c2_s1_00235.jpg,24503,24503,付玉峰,2,235
+24503_c2_s1_00236.jpg,24503,24503,付玉峰,2,236
+24503_c2_s1_00237.jpg,24503,24503,付玉峰,2,237
+24503_c2_s1_00238.jpg,24503,24503,付玉峰,2,238
+24503_c2_s1_00239.jpg,24503,24503,付玉峰,2,239
+24503_c2_s1_00240.jpg,24503,24503,付玉峰,2,240
+24503_c2_s1_00241.jpg,24503,24503,付玉峰,2,241
+24503_c2_s1_00242.jpg,24503,24503,付玉峰,2,242
+24503_c2_s1_00243.jpg,24503,24503,付玉峰,2,243
+24503_c2_s1_00244.jpg,24503,24503,付玉峰,2,244
+24503_c3_s1_00245.jpg,24503,24503,付玉峰,3,245
+24503_c3_s1_00246.jpg,24503,24503,付玉峰,3,246
+24503_c3_s1_00247.jpg,24503,24503,付玉峰,3,247
+24503_c3_s1_00248.jpg,24503,24503,付玉峰,3,248
+24503_c3_s1_00249.jpg,24503,24503,付玉峰,3,249
+24503_c3_s1_00250.jpg,24503,24503,付玉峰,3,250
+24503_c3_s1_00251.jpg,24503,24503,付玉峰,3,251
+24503_c3_s1_00252.jpg,24503,24503,付玉峰,3,252
+24503_c3_s1_00253.jpg,24503,24503,付玉峰,3,253
+24503_c3_s1_00254.jpg,24503,24503,付玉峰,3,254
+24503_c3_s1_00255.jpg,24503,24503,付玉峰,3,255
+24503_c3_s1_00256.jpg,24503,24503,付玉峰,3,256
+24503_c3_s1_00257.jpg,24503,24503,付玉峰,3,257
+24503_c3_s1_00258.jpg,24503,24503,付玉峰,3,258
+24503_c3_s1_00259.jpg,24503,24503,付玉峰,3,259
+24503_c3_s1_00260.jpg,24503,24503,付玉峰,3,260
+24503_c3_s1_00261.jpg,24503,24503,付玉峰,3,261
+24503_c3_s1_00262.jpg,24503,24503,付玉峰,3,262
+24503_c3_s1_00263.jpg,24503,24503,付玉峰,3,263
+24503_c3_s1_00264.jpg,24503,24503,付玉峰,3,264
+24503_c3_s1_00265.jpg,24503,24503,付玉峰,3,265
+24503_c3_s1_00266.jpg,24503,24503,付玉峰,3,266
+24503_c3_s1_00267.jpg,24503,24503,付玉峰,3,267
+24503_c3_s1_00268.jpg,24503,24503,付玉峰,3,268
+24503_c3_s1_00269.jpg,24503,24503,付玉峰,3,269
+24503_c3_s1_00270.jpg,24503,24503,付玉峰,3,270
+24503_c3_s1_00271.jpg,24503,24503,付玉峰,3,271
+24503_c3_s1_00272.jpg,24503,24503,付玉峰,3,272
+24503_c3_s1_00273.jpg,24503,24503,付玉峰,3,273
+24503_c3_s1_00274.jpg,24503,24503,付玉峰,3,274
+24503_c3_s1_00275.jpg,24503,24503,付玉峰,3,275
+24503_c3_s1_00276.jpg,24503,24503,付玉峰,3,276
+24503_c3_s1_00277.jpg,24503,24503,付玉峰,3,277
+24503_c3_s1_00278.jpg,24503,24503,付玉峰,3,278
+24503_c3_s1_00279.jpg,24503,24503,付玉峰,3,279
+24503_c3_s1_00280.jpg,24503,24503,付玉峰,3,280
+24503_c3_s1_00281.jpg,24503,24503,付玉峰,3,281
+24503_c3_s1_00282.jpg,24503,24503,付玉峰,3,282
+24503_c3_s1_00283.jpg,24503,24503,付玉峰,3,283
+24503_c3_s1_00284.jpg,24503,24503,付玉峰,3,284
+24503_c3_s1_00285.jpg,24503,24503,付玉峰,3,285
+24503_c3_s1_00286.jpg,24503,24503,付玉峰,3,286
+24503_c3_s1_00287.jpg,24503,24503,付玉峰,3,287
+24503_c3_s1_00288.jpg,24503,24503,付玉峰,3,288
+24503_c3_s1_00289.jpg,24503,24503,付玉峰,3,289
+24503_c3_s1_00290.jpg,24503,24503,付玉峰,3,290
+24503_c3_s1_00291.jpg,24503,24503,付玉峰,3,291
+24503_c3_s1_00292.jpg,24503,24503,付玉峰,3,292
+24503_c3_s1_00293.jpg,24503,24503,付玉峰,3,293
+24503_c3_s1_00294.jpg,24503,24503,付玉峰,3,294
+24503_c3_s1_00295.jpg,24503,24503,付玉峰,3,295
+24503_c3_s1_00296.jpg,24503,24503,付玉峰,3,296
+24503_c3_s1_00297.jpg,24503,24503,付玉峰,3,297
+24503_c3_s1_00298.jpg,24503,24503,付玉峰,3,298
+24504_c1_s1_00299.jpg,24504,24504,李树华,1,299
+24504_c1_s1_00300.jpg,24504,24504,李树华,1,300
+24504_c1_s1_00301.jpg,24504,24504,李树华,1,301
+24504_c1_s1_00302.jpg,24504,24504,李树华,1,302
+24504_c1_s1_00303.jpg,24504,24504,李树华,1,303
+24504_c1_s1_00304.jpg,24504,24504,李树华,1,304
+24504_c1_s1_00305.jpg,24504,24504,李树华,1,305
+24504_c1_s1_00306.jpg,24504,24504,李树华,1,306
+24504_c1_s1_00307.jpg,24504,24504,李树华,1,307
+24504_c1_s1_00308.jpg,24504,24504,李树华,1,308
+24504_c1_s1_00309.jpg,24504,24504,李树华,1,309
+24504_c1_s1_00310.jpg,24504,24504,李树华,1,310
+24504_c1_s1_00311.jpg,24504,24504,李树华,1,311
+24504_c1_s1_00312.jpg,24504,24504,李树华,1,312
+24504_c1_s1_00313.jpg,24504,24504,李树华,1,313
+24504_c1_s1_00314.jpg,24504,24504,李树华,1,314
+24504_c1_s1_00315.jpg,24504,24504,李树华,1,315
+24504_c1_s1_00316.jpg,24504,24504,李树华,1,316
+24504_c1_s1_00317.jpg,24504,24504,李树华,1,317
+24504_c1_s1_00318.jpg,24504,24504,李树华,1,318
+24504_c1_s1_00319.jpg,24504,24504,李树华,1,319
+24504_c1_s1_00320.jpg,24504,24504,李树华,1,320
+24504_c1_s1_00321.jpg,24504,24504,李树华,1,321
+24504_c1_s1_00322.jpg,24504,24504,李树华,1,322
+24504_c1_s1_00323.jpg,24504,24504,李树华,1,323
+24504_c1_s1_00324.jpg,24504,24504,李树华,1,324
+24504_c1_s1_00325.jpg,24504,24504,李树华,1,325
+24504_c1_s1_00326.jpg,24504,24504,李树华,1,326
+24504_c1_s1_00327.jpg,24504,24504,李树华,1,327
+24504_c1_s1_00328.jpg,24504,24504,李树华,1,328
+24504_c1_s1_00329.jpg,24504,24504,李树华,1,329
+24504_c1_s1_00330.jpg,24504,24504,李树华,1,330
+24504_c1_s1_00331.jpg,24504,24504,李树华,1,331
+24504_c1_s1_00332.jpg,24504,24504,李树华,1,332
+24504_c1_s1_00333.jpg,24504,24504,李树华,1,333
+24504_c1_s1_00334.jpg,24504,24504,李树华,1,334
+24504_c1_s1_00335.jpg,24504,24504,李树华,1,335
+24504_c1_s1_00336.jpg,24504,24504,李树华,1,336
+24504_c1_s1_00337.jpg,24504,24504,李树华,1,337
+24504_c1_s1_00338.jpg,24504,24504,李树华,1,338
+24504_c1_s1_00339.jpg,24504,24504,李树华,1,339
+24504_c1_s1_00340.jpg,24504,24504,李树华,1,340
+24504_c2_s1_00341.jpg,24504,24504,李树华,2,341
+24504_c2_s1_00342.jpg,24504,24504,李树华,2,342
+24504_c2_s1_00343.jpg,24504,24504,李树华,2,343
+24504_c2_s1_00344.jpg,24504,24504,李树华,2,344
+24504_c2_s1_00345.jpg,24504,24504,李树华,2,345
+24504_c2_s1_00346.jpg,24504,24504,李树华,2,346
+24504_c2_s1_00347.jpg,24504,24504,李树华,2,347
+24504_c2_s1_00348.jpg,24504,24504,李树华,2,348
+24504_c2_s1_00349.jpg,24504,24504,李树华,2,349
+24504_c2_s1_00350.jpg,24504,24504,李树华,2,350
+24504_c2_s1_00351.jpg,24504,24504,李树华,2,351
+24504_c2_s1_00352.jpg,24504,24504,李树华,2,352
+24504_c2_s1_00353.jpg,24504,24504,李树华,2,353
+24504_c2_s1_00354.jpg,24504,24504,李树华,2,354
+24504_c2_s1_00355.jpg,24504,24504,李树华,2,355
+24504_c2_s1_00356.jpg,24504,24504,李树华,2,356
+24504_c2_s1_00357.jpg,24504,24504,李树华,2,357
+24504_c2_s1_00358.jpg,24504,24504,李树华,2,358
+24504_c2_s1_00359.jpg,24504,24504,李树华,2,359
+24504_c2_s1_00360.jpg,24504,24504,李树华,2,360
+24504_c2_s1_00361.jpg,24504,24504,李树华,2,361
+24504_c2_s1_00362.jpg,24504,24504,李树华,2,362
+24504_c2_s1_00363.jpg,24504,24504,李树华,2,363
+24504_c2_s1_00364.jpg,24504,24504,李树华,2,364
+24504_c2_s1_00365.jpg,24504,24504,李树华,2,365
+24504_c2_s1_00366.jpg,24504,24504,李树华,2,366
+24504_c2_s1_00367.jpg,24504,24504,李树华,2,367
+24504_c2_s1_00368.jpg,24504,24504,李树华,2,368
+24504_c2_s1_00369.jpg,24504,24504,李树华,2,369
+24504_c2_s1_00370.jpg,24504,24504,李树华,2,370
+24504_c2_s1_00371.jpg,24504,24504,李树华,2,371
+24504_c2_s1_00372.jpg,24504,24504,李树华,2,372
+24504_c2_s1_00373.jpg,24504,24504,李树华,2,373
+24504_c2_s1_00374.jpg,24504,24504,李树华,2,374
+24504_c2_s1_00375.jpg,24504,24504,李树华,2,375
+24504_c2_s1_00376.jpg,24504,24504,李树华,2,376
+24504_c2_s1_00377.jpg,24504,24504,李树华,2,377
+24504_c2_s1_00378.jpg,24504,24504,李树华,2,378
+24504_c2_s1_00379.jpg,24504,24504,李树华,2,379
+24504_c2_s1_00380.jpg,24504,24504,李树华,2,380
+24504_c2_s1_00381.jpg,24504,24504,李树华,2,381
+24504_c2_s1_00382.jpg,24504,24504,李树华,2,382
+24504_c3_s1_00383.jpg,24504,24504,李树华,3,383
+24504_c3_s1_00384.jpg,24504,24504,李树华,3,384
+24504_c3_s1_00385.jpg,24504,24504,李树华,3,385
+24504_c3_s1_00386.jpg,24504,24504,李树华,3,386
+24504_c3_s1_00387.jpg,24504,24504,李树华,3,387
+24504_c3_s1_00388.jpg,24504,24504,李树华,3,388
+24504_c3_s1_00389.jpg,24504,24504,李树华,3,389
+24504_c3_s1_00390.jpg,24504,24504,李树华,3,390
+24504_c3_s1_00391.jpg,24504,24504,李树华,3,391
+24504_c3_s1_00392.jpg,24504,24504,李树华,3,392
+24504_c3_s1_00393.jpg,24504,24504,李树华,3,393
+24504_c3_s1_00394.jpg,24504,24504,李树华,3,394
+24504_c3_s1_00395.jpg,24504,24504,李树华,3,395
+24504_c3_s1_00396.jpg,24504,24504,李树华,3,396
+24504_c3_s1_00397.jpg,24504,24504,李树华,3,397
+24504_c3_s1_00398.jpg,24504,24504,李树华,3,398
+24504_c3_s1_00399.jpg,24504,24504,李树华,3,399
+24504_c3_s1_00400.jpg,24504,24504,李树华,3,400
+24504_c3_s1_00401.jpg,24504,24504,李树华,3,401
+24504_c3_s1_00402.jpg,24504,24504,李树华,3,402
+24504_c3_s1_00403.jpg,24504,24504,李树华,3,403
+24504_c3_s1_00404.jpg,24504,24504,李树华,3,404
+24504_c3_s1_00405.jpg,24504,24504,李树华,3,405
+24504_c3_s1_00406.jpg,24504,24504,李树华,3,406
+24504_c3_s1_00407.jpg,24504,24504,李树华,3,407
+24504_c3_s1_00408.jpg,24504,24504,李树华,3,408
+24504_c3_s1_00409.jpg,24504,24504,李树华,3,409
+24504_c3_s1_00410.jpg,24504,24504,李树华,3,410
+24504_c3_s1_00411.jpg,24504,24504,李树华,3,411
+24504_c3_s1_00412.jpg,24504,24504,李树华,3,412
+24504_c3_s1_00413.jpg,24504,24504,李树华,3,413
+24504_c3_s1_00414.jpg,24504,24504,李树华,3,414
+24504_c3_s1_00415.jpg,24504,24504,李树华,3,415
+24504_c3_s1_00416.jpg,24504,24504,李树华,3,416
+24504_c3_s1_00417.jpg,24504,24504,李树华,3,417
+24504_c3_s1_00418.jpg,24504,24504,李树华,3,418
+24504_c3_s1_00419.jpg,24504,24504,李树华,3,419
+24504_c3_s1_00420.jpg,24504,24504,李树华,3,420
+24505_c1_s1_00421.jpg,24505,24505,刘杰,1,421
+24505_c1_s1_00422.jpg,24505,24505,刘杰,1,422
+24505_c1_s1_00423.jpg,24505,24505,刘杰,1,423
+24505_c1_s1_00424.jpg,24505,24505,刘杰,1,424
+24505_c1_s1_00425.jpg,24505,24505,刘杰,1,425
+24505_c1_s1_00426.jpg,24505,24505,刘杰,1,426
+24505_c1_s1_00427.jpg,24505,24505,刘杰,1,427
+24505_c1_s1_00428.jpg,24505,24505,刘杰,1,428
+24505_c1_s1_00429.jpg,24505,24505,刘杰,1,429
+24505_c1_s1_00430.jpg,24505,24505,刘杰,1,430
+24505_c1_s1_00431.jpg,24505,24505,刘杰,1,431
+24505_c1_s1_00432.jpg,24505,24505,刘杰,1,432
+24505_c1_s1_00433.jpg,24505,24505,刘杰,1,433
+24505_c1_s1_00434.jpg,24505,24505,刘杰,1,434
+24505_c1_s1_00435.jpg,24505,24505,刘杰,1,435
+24505_c1_s1_00436.jpg,24505,24505,刘杰,1,436
+24505_c1_s1_00437.jpg,24505,24505,刘杰,1,437
+24505_c1_s1_00438.jpg,24505,24505,刘杰,1,438
+24505_c1_s1_00439.jpg,24505,24505,刘杰,1,439
+24505_c1_s1_00440.jpg,24505,24505,刘杰,1,440
+24505_c1_s1_00441.jpg,24505,24505,刘杰,1,441
+24505_c1_s1_00442.jpg,24505,24505,刘杰,1,442
+24505_c1_s1_00443.jpg,24505,24505,刘杰,1,443
+24505_c1_s1_00444.jpg,24505,24505,刘杰,1,444
+24505_c1_s1_00445.jpg,24505,24505,刘杰,1,445
+24505_c1_s1_00446.jpg,24505,24505,刘杰,1,446
+24505_c1_s1_00447.jpg,24505,24505,刘杰,1,447
+24505_c1_s1_00448.jpg,24505,24505,刘杰,1,448
+24505_c1_s1_00449.jpg,24505,24505,刘杰,1,449
+24505_c1_s1_00450.jpg,24505,24505,刘杰,1,450
+24505_c1_s1_00451.jpg,24505,24505,刘杰,1,451
+24505_c1_s1_00452.jpg,24505,24505,刘杰,1,452
+24505_c1_s1_00453.jpg,24505,24505,刘杰,1,453
+24505_c1_s1_00454.jpg,24505,24505,刘杰,1,454
+24505_c1_s1_00455.jpg,24505,24505,刘杰,1,455
+24505_c1_s1_00456.jpg,24505,24505,刘杰,1,456
+24505_c1_s1_00457.jpg,24505,24505,刘杰,1,457
+24505_c1_s1_00458.jpg,24505,24505,刘杰,1,458
+24505_c1_s1_00459.jpg,24505,24505,刘杰,1,459
+24505_c1_s1_00460.jpg,24505,24505,刘杰,1,460
+24505_c1_s1_00461.jpg,24505,24505,刘杰,1,461
+24505_c1_s1_00462.jpg,24505,24505,刘杰,1,462
+24505_c1_s1_00463.jpg,24505,24505,刘杰,1,463
+24505_c1_s1_00464.jpg,24505,24505,刘杰,1,464
+24505_c1_s1_00465.jpg,24505,24505,刘杰,1,465
+24505_c1_s1_00466.jpg,24505,24505,刘杰,1,466
+24505_c1_s1_00467.jpg,24505,24505,刘杰,1,467
+24505_c1_s1_00468.jpg,24505,24505,刘杰,1,468
+24505_c1_s1_00469.jpg,24505,24505,刘杰,1,469
+24505_c1_s1_00470.jpg,24505,24505,刘杰,1,470
+24505_c1_s1_00471.jpg,24505,24505,刘杰,1,471
+24505_c1_s1_00472.jpg,24505,24505,刘杰,1,472
+24505_c1_s1_00473.jpg,24505,24505,刘杰,1,473
+24505_c1_s1_00474.jpg,24505,24505,刘杰,1,474
+24505_c1_s1_00475.jpg,24505,24505,刘杰,1,475
+24505_c1_s1_00476.jpg,24505,24505,刘杰,1,476
+24505_c1_s1_00477.jpg,24505,24505,刘杰,1,477
+24505_c1_s1_00478.jpg,24505,24505,刘杰,1,478
+24505_c1_s1_00479.jpg,24505,24505,刘杰,1,479
+24505_c1_s1_00480.jpg,24505,24505,刘杰,1,480
+24505_c1_s1_00481.jpg,24505,24505,刘杰,1,481
+24505_c1_s1_00482.jpg,24505,24505,刘杰,1,482
+24505_c1_s1_00483.jpg,24505,24505,刘杰,1,483
+24505_c2_s1_00484.jpg,24505,24505,刘杰,2,484
+24505_c2_s1_00485.jpg,24505,24505,刘杰,2,485
+24505_c2_s1_00486.jpg,24505,24505,刘杰,2,486
+24505_c2_s1_00487.jpg,24505,24505,刘杰,2,487
+24505_c2_s1_00488.jpg,24505,24505,刘杰,2,488
+24505_c2_s1_00489.jpg,24505,24505,刘杰,2,489
+24505_c2_s1_00490.jpg,24505,24505,刘杰,2,490
+24505_c2_s1_00491.jpg,24505,24505,刘杰,2,491
+24505_c2_s1_00492.jpg,24505,24505,刘杰,2,492
+24505_c2_s1_00493.jpg,24505,24505,刘杰,2,493
+24505_c2_s1_00494.jpg,24505,24505,刘杰,2,494
+24505_c2_s1_00495.jpg,24505,24505,刘杰,2,495
+24505_c2_s1_00496.jpg,24505,24505,刘杰,2,496
+24505_c2_s1_00497.jpg,24505,24505,刘杰,2,497
+24505_c2_s1_00498.jpg,24505,24505,刘杰,2,498
+24505_c2_s1_00499.jpg,24505,24505,刘杰,2,499
+24505_c2_s1_00500.jpg,24505,24505,刘杰,2,500
+24505_c2_s1_00501.jpg,24505,24505,刘杰,2,501
+24505_c2_s1_00502.jpg,24505,24505,刘杰,2,502
+24505_c2_s1_00503.jpg,24505,24505,刘杰,2,503
+24505_c2_s1_00504.jpg,24505,24505,刘杰,2,504
+24505_c2_s1_00505.jpg,24505,24505,刘杰,2,505
+24505_c2_s1_00506.jpg,24505,24505,刘杰,2,506
+24505_c2_s1_00507.jpg,24505,24505,刘杰,2,507
+24505_c2_s1_00508.jpg,24505,24505,刘杰,2,508
+24505_c2_s1_00509.jpg,24505,24505,刘杰,2,509
+24505_c2_s1_00510.jpg,24505,24505,刘杰,2,510
+24505_c2_s1_00511.jpg,24505,24505,刘杰,2,511
+24505_c2_s1_00512.jpg,24505,24505,刘杰,2,512
+24505_c2_s1_00513.jpg,24505,24505,刘杰,2,513
+24505_c2_s1_00514.jpg,24505,24505,刘杰,2,514
+24505_c2_s1_00515.jpg,24505,24505,刘杰,2,515
+24505_c2_s1_00516.jpg,24505,24505,刘杰,2,516
+24505_c2_s1_00517.jpg,24505,24505,刘杰,2,517
+24505_c2_s1_00518.jpg,24505,24505,刘杰,2,518
+24505_c2_s1_00519.jpg,24505,24505,刘杰,2,519
+24505_c2_s1_00520.jpg,24505,24505,刘杰,2,520
+24505_c2_s1_00521.jpg,24505,24505,刘杰,2,521
+24505_c2_s1_00522.jpg,24505,24505,刘杰,2,522
+24505_c2_s1_00523.jpg,24505,24505,刘杰,2,523
+24505_c2_s1_00524.jpg,24505,24505,刘杰,2,524
+24505_c2_s1_00525.jpg,24505,24505,刘杰,2,525
+24505_c2_s1_00526.jpg,24505,24505,刘杰,2,526
+24505_c2_s1_00527.jpg,24505,24505,刘杰,2,527
+24505_c2_s1_00528.jpg,24505,24505,刘杰,2,528
+24505_c2_s1_00529.jpg,24505,24505,刘杰,2,529
+24505_c2_s1_00530.jpg,24505,24505,刘杰,2,530
+24505_c2_s1_00531.jpg,24505,24505,刘杰,2,531
+24505_c2_s1_00532.jpg,24505,24505,刘杰,2,532
+24505_c2_s1_00533.jpg,24505,24505,刘杰,2,533
+24505_c2_s1_00534.jpg,24505,24505,刘杰,2,534
+24505_c2_s1_00535.jpg,24505,24505,刘杰,2,535
+24505_c2_s1_00536.jpg,24505,24505,刘杰,2,536
+24505_c2_s1_00537.jpg,24505,24505,刘杰,2,537
+24505_c2_s1_00538.jpg,24505,24505,刘杰,2,538
+24505_c2_s1_00539.jpg,24505,24505,刘杰,2,539
+24505_c2_s1_00540.jpg,24505,24505,刘杰,2,540
+24505_c2_s1_00541.jpg,24505,24505,刘杰,2,541
+24505_c2_s1_00542.jpg,24505,24505,刘杰,2,542
+24505_c2_s1_00543.jpg,24505,24505,刘杰,2,543
+24505_c2_s1_00544.jpg,24505,24505,刘杰,2,544
+24505_c2_s1_00545.jpg,24505,24505,刘杰,2,545
+24505_c2_s1_00546.jpg,24505,24505,刘杰,2,546
+24505_c2_s1_00547.jpg,24505,24505,刘杰,2,547
+24505_c2_s1_00548.jpg,24505,24505,刘杰,2,548
+24505_c2_s1_00549.jpg,24505,24505,刘杰,2,549
+24505_c2_s1_00550.jpg,24505,24505,刘杰,2,550
+24505_c2_s1_00551.jpg,24505,24505,刘杰,2,551
+24505_c2_s1_00552.jpg,24505,24505,刘杰,2,552
+24505_c2_s1_00553.jpg,24505,24505,刘杰,2,553
+24505_c2_s1_00554.jpg,24505,24505,刘杰,2,554
+24505_c2_s1_00555.jpg,24505,24505,刘杰,2,555
+24505_c3_s1_00556.jpg,24505,24505,刘杰,3,556
+24505_c3_s1_00557.jpg,24505,24505,刘杰,3,557
+24505_c3_s1_00558.jpg,24505,24505,刘杰,3,558
+24505_c3_s1_00559.jpg,24505,24505,刘杰,3,559
+24505_c3_s1_00560.jpg,24505,24505,刘杰,3,560
+24505_c3_s1_00561.jpg,24505,24505,刘杰,3,561
+24505_c3_s1_00562.jpg,24505,24505,刘杰,3,562
+24505_c3_s1_00563.jpg,24505,24505,刘杰,3,563
+24505_c3_s1_00564.jpg,24505,24505,刘杰,3,564
+24505_c3_s1_00565.jpg,24505,24505,刘杰,3,565
+24505_c3_s1_00566.jpg,24505,24505,刘杰,3,566
+24505_c3_s1_00567.jpg,24505,24505,刘杰,3,567
+24505_c3_s1_00568.jpg,24505,24505,刘杰,3,568
+24505_c3_s1_00569.jpg,24505,24505,刘杰,3,569
+24505_c3_s1_00570.jpg,24505,24505,刘杰,3,570
+24505_c3_s1_00571.jpg,24505,24505,刘杰,3,571
+24505_c3_s1_00572.jpg,24505,24505,刘杰,3,572
+24505_c3_s1_00573.jpg,24505,24505,刘杰,3,573
+24505_c3_s1_00574.jpg,24505,24505,刘杰,3,574
+24505_c3_s1_00575.jpg,24505,24505,刘杰,3,575
+24505_c3_s1_00576.jpg,24505,24505,刘杰,3,576
+24505_c3_s1_00577.jpg,24505,24505,刘杰,3,577
+24505_c3_s1_00578.jpg,24505,24505,刘杰,3,578
+24505_c3_s1_00579.jpg,24505,24505,刘杰,3,579
+24505_c3_s1_00580.jpg,24505,24505,刘杰,3,580
+24505_c3_s1_00581.jpg,24505,24505,刘杰,3,581
+24505_c3_s1_00582.jpg,24505,24505,刘杰,3,582
+24505_c3_s1_00583.jpg,24505,24505,刘杰,3,583
+24505_c3_s1_00584.jpg,24505,24505,刘杰,3,584
+24505_c3_s1_00585.jpg,24505,24505,刘杰,3,585
+24505_c3_s1_00586.jpg,24505,24505,刘杰,3,586
+24505_c3_s1_00587.jpg,24505,24505,刘杰,3,587
+24505_c3_s1_00588.jpg,24505,24505,刘杰,3,588
+24505_c3_s1_00589.jpg,24505,24505,刘杰,3,589
+24505_c3_s1_00590.jpg,24505,24505,刘杰,3,590
+24505_c3_s1_00591.jpg,24505,24505,刘杰,3,591
+24505_c3_s1_00592.jpg,24505,24505,刘杰,3,592
+24505_c3_s1_00593.jpg,24505,24505,刘杰,3,593
+24505_c3_s1_00594.jpg,24505,24505,刘杰,3,594
+24505_c3_s1_00595.jpg,24505,24505,刘杰,3,595
+24505_c3_s1_00596.jpg,24505,24505,刘杰,3,596
+24505_c3_s1_00597.jpg,24505,24505,刘杰,3,597
+24505_c3_s1_00598.jpg,24505,24505,刘杰,3,598
+24505_c3_s1_00599.jpg,24505,24505,刘杰,3,599
+24505_c3_s1_00600.jpg,24505,24505,刘杰,3,600
+24505_c3_s1_00601.jpg,24505,24505,刘杰,3,601
+24505_c3_s1_00602.jpg,24505,24505,刘杰,3,602
+24505_c3_s1_00603.jpg,24505,24505,刘杰,3,603
+24505_c3_s1_00604.jpg,24505,24505,刘杰,3,604
+24505_c3_s1_00605.jpg,24505,24505,刘杰,3,605
+24505_c3_s1_00606.jpg,24505,24505,刘杰,3,606
+24505_c3_s1_00607.jpg,24505,24505,刘杰,3,607
+24505_c3_s1_00608.jpg,24505,24505,刘杰,3,608
+24505_c3_s1_00609.jpg,24505,24505,刘杰,3,609
+24505_c3_s1_00610.jpg,24505,24505,刘杰,3,610
+24505_c3_s1_00611.jpg,24505,24505,刘杰,3,611
+24505_c3_s1_00612.jpg,24505,24505,刘杰,3,612
+24505_c3_s1_00613.jpg,24505,24505,刘杰,3,613
+24505_c3_s1_00614.jpg,24505,24505,刘杰,3,614
+24505_c3_s1_00615.jpg,24505,24505,刘杰,3,615
+24505_c3_s1_00616.jpg,24505,24505,刘杰,3,616
+24505_c3_s1_00617.jpg,24505,24505,刘杰,3,617
+24505_c3_s1_00618.jpg,24505,24505,刘杰,3,618
+24505_c3_s1_00619.jpg,24505,24505,刘杰,3,619
+24505_c3_s1_00620.jpg,24505,24505,刘杰,3,620
+24505_c3_s1_00621.jpg,24505,24505,刘杰,3,621
+24505_c3_s1_00622.jpg,24505,24505,刘杰,3,622
+24506_c1_s1_00623.jpg,24506,24506,黄伟斌,1,623
+24506_c1_s1_00624.jpg,24506,24506,黄伟斌,1,624
+24506_c1_s1_00625.jpg,24506,24506,黄伟斌,1,625
+24506_c1_s1_00626.jpg,24506,24506,黄伟斌,1,626
+24506_c1_s1_00627.jpg,24506,24506,黄伟斌,1,627
+24506_c1_s1_00628.jpg,24506,24506,黄伟斌,1,628
+24506_c1_s1_00629.jpg,24506,24506,黄伟斌,1,629
+24506_c1_s1_00630.jpg,24506,24506,黄伟斌,1,630
+24506_c1_s1_00631.jpg,24506,24506,黄伟斌,1,631
+24506_c1_s1_00632.jpg,24506,24506,黄伟斌,1,632
+24506_c1_s1_00633.jpg,24506,24506,黄伟斌,1,633
+24506_c1_s1_00634.jpg,24506,24506,黄伟斌,1,634
+24506_c1_s1_00635.jpg,24506,24506,黄伟斌,1,635
+24506_c1_s1_00636.jpg,24506,24506,黄伟斌,1,636
+24506_c1_s1_00637.jpg,24506,24506,黄伟斌,1,637
+24506_c1_s1_00638.jpg,24506,24506,黄伟斌,1,638
+24506_c1_s1_00639.jpg,24506,24506,黄伟斌,1,639
+24506_c1_s1_00640.jpg,24506,24506,黄伟斌,1,640
+24506_c1_s1_00641.jpg,24506,24506,黄伟斌,1,641
+24506_c1_s1_00642.jpg,24506,24506,黄伟斌,1,642
+24506_c1_s1_00643.jpg,24506,24506,黄伟斌,1,643
+24506_c1_s1_00644.jpg,24506,24506,黄伟斌,1,644
+24506_c1_s1_00645.jpg,24506,24506,黄伟斌,1,645
+24506_c1_s1_00646.jpg,24506,24506,黄伟斌,1,646
+24506_c1_s1_00647.jpg,24506,24506,黄伟斌,1,647
+24506_c1_s1_00648.jpg,24506,24506,黄伟斌,1,648
+24506_c1_s1_00649.jpg,24506,24506,黄伟斌,1,649
+24506_c1_s1_00650.jpg,24506,24506,黄伟斌,1,650
+24506_c1_s1_00651.jpg,24506,24506,黄伟斌,1,651
+24506_c1_s1_00652.jpg,24506,24506,黄伟斌,1,652
+24506_c1_s1_00653.jpg,24506,24506,黄伟斌,1,653
+24506_c1_s1_00654.jpg,24506,24506,黄伟斌,1,654
+24506_c1_s1_00655.jpg,24506,24506,黄伟斌,1,655
+24506_c1_s1_00656.jpg,24506,24506,黄伟斌,1,656
+24506_c1_s1_00657.jpg,24506,24506,黄伟斌,1,657
+24506_c1_s1_00658.jpg,24506,24506,黄伟斌,1,658
+24506_c2_s1_00659.jpg,24506,24506,黄伟斌,2,659
+24506_c2_s1_00660.jpg,24506,24506,黄伟斌,2,660
+24506_c2_s1_00661.jpg,24506,24506,黄伟斌,2,661
+24506_c2_s1_00662.jpg,24506,24506,黄伟斌,2,662
+24506_c2_s1_00663.jpg,24506,24506,黄伟斌,2,663
+24506_c2_s1_00664.jpg,24506,24506,黄伟斌,2,664
+24506_c2_s1_00665.jpg,24506,24506,黄伟斌,2,665
+24506_c2_s1_00666.jpg,24506,24506,黄伟斌,2,666
+24506_c2_s1_00667.jpg,24506,24506,黄伟斌,2,667
+24506_c2_s1_00668.jpg,24506,24506,黄伟斌,2,668
+24506_c2_s1_00669.jpg,24506,24506,黄伟斌,2,669
+24506_c2_s1_00670.jpg,24506,24506,黄伟斌,2,670
+24506_c2_s1_00671.jpg,24506,24506,黄伟斌,2,671
+24506_c2_s1_00672.jpg,24506,24506,黄伟斌,2,672
+24506_c2_s1_00673.jpg,24506,24506,黄伟斌,2,673
+24506_c2_s1_00674.jpg,24506,24506,黄伟斌,2,674
+24506_c2_s1_00675.jpg,24506,24506,黄伟斌,2,675
+24506_c2_s1_00676.jpg,24506,24506,黄伟斌,2,676
+24506_c2_s1_00677.jpg,24506,24506,黄伟斌,2,677
+24506_c2_s1_00678.jpg,24506,24506,黄伟斌,2,678
+24506_c2_s1_00679.jpg,24506,24506,黄伟斌,2,679
+24506_c2_s1_00680.jpg,24506,24506,黄伟斌,2,680
+24506_c2_s1_00681.jpg,24506,24506,黄伟斌,2,681
+24506_c2_s1_00682.jpg,24506,24506,黄伟斌,2,682
+24506_c2_s1_00683.jpg,24506,24506,黄伟斌,2,683
+24506_c2_s1_00684.jpg,24506,24506,黄伟斌,2,684
+24506_c2_s1_00685.jpg,24506,24506,黄伟斌,2,685
+24506_c2_s1_00686.jpg,24506,24506,黄伟斌,2,686
+24506_c2_s1_00687.jpg,24506,24506,黄伟斌,2,687
+24506_c2_s1_00688.jpg,24506,24506,黄伟斌,2,688
+24506_c2_s1_00689.jpg,24506,24506,黄伟斌,2,689
+24506_c2_s1_00690.jpg,24506,24506,黄伟斌,2,690
+24506_c2_s1_00691.jpg,24506,24506,黄伟斌,2,691
+24506_c2_s1_00692.jpg,24506,24506,黄伟斌,2,692
+24506_c2_s1_00693.jpg,24506,24506,黄伟斌,2,693
+24506_c2_s1_00694.jpg,24506,24506,黄伟斌,2,694
+24506_c2_s1_00695.jpg,24506,24506,黄伟斌,2,695
+24506_c2_s1_00696.jpg,24506,24506,黄伟斌,2,696
+24506_c2_s1_00697.jpg,24506,24506,黄伟斌,2,697
+24506_c2_s1_00698.jpg,24506,24506,黄伟斌,2,698
+24506_c2_s1_00699.jpg,24506,24506,黄伟斌,2,699
+24506_c2_s1_00700.jpg,24506,24506,黄伟斌,2,700
+24506_c2_s1_00701.jpg,24506,24506,黄伟斌,2,701
+24506_c2_s1_00702.jpg,24506,24506,黄伟斌,2,702
+24506_c2_s1_00703.jpg,24506,24506,黄伟斌,2,703
+24506_c2_s1_00704.jpg,24506,24506,黄伟斌,2,704
+24506_c2_s1_00705.jpg,24506,24506,黄伟斌,2,705
+24506_c2_s1_00706.jpg,24506,24506,黄伟斌,2,706
+24506_c2_s1_00707.jpg,24506,24506,黄伟斌,2,707
+24506_c2_s1_00708.jpg,24506,24506,黄伟斌,2,708
+24506_c2_s1_00709.jpg,24506,24506,黄伟斌,2,709
+24506_c2_s1_00710.jpg,24506,24506,黄伟斌,2,710
+24506_c2_s1_00711.jpg,24506,24506,黄伟斌,2,711
+24506_c2_s1_00712.jpg,24506,24506,黄伟斌,2,712
+24506_c2_s1_00713.jpg,24506,24506,黄伟斌,2,713
+24506_c2_s1_00714.jpg,24506,24506,黄伟斌,2,714
+24506_c2_s1_00715.jpg,24506,24506,黄伟斌,2,715
+24506_c2_s1_00716.jpg,24506,24506,黄伟斌,2,716
+24506_c2_s1_00717.jpg,24506,24506,黄伟斌,2,717
+24506_c2_s1_00718.jpg,24506,24506,黄伟斌,2,718
+24506_c3_s1_00719.jpg,24506,24506,黄伟斌,3,719
+24506_c3_s1_00720.jpg,24506,24506,黄伟斌,3,720
+24506_c3_s1_00721.jpg,24506,24506,黄伟斌,3,721
+24506_c3_s1_00722.jpg,24506,24506,黄伟斌,3,722
+24506_c3_s1_00723.jpg,24506,24506,黄伟斌,3,723
+24506_c3_s1_00724.jpg,24506,24506,黄伟斌,3,724
+24506_c3_s1_00725.jpg,24506,24506,黄伟斌,3,725
+24506_c3_s1_00726.jpg,24506,24506,黄伟斌,3,726
+24506_c3_s1_00727.jpg,24506,24506,黄伟斌,3,727
+24506_c3_s1_00728.jpg,24506,24506,黄伟斌,3,728
+24506_c3_s1_00729.jpg,24506,24506,黄伟斌,3,729
+24506_c3_s1_00730.jpg,24506,24506,黄伟斌,3,730
+24506_c3_s1_00731.jpg,24506,24506,黄伟斌,3,731
+24506_c3_s1_00732.jpg,24506,24506,黄伟斌,3,732
+24506_c3_s1_00733.jpg,24506,24506,黄伟斌,3,733
+24506_c3_s1_00734.jpg,24506,24506,黄伟斌,3,734
+24506_c3_s1_00735.jpg,24506,24506,黄伟斌,3,735
+24506_c3_s1_00736.jpg,24506,24506,黄伟斌,3,736
+24506_c3_s1_00737.jpg,24506,24506,黄伟斌,3,737
+24506_c3_s1_00738.jpg,24506,24506,黄伟斌,3,738
+24506_c3_s1_00739.jpg,24506,24506,黄伟斌,3,739
+24506_c3_s1_00740.jpg,24506,24506,黄伟斌,3,740
+24506_c3_s1_00741.jpg,24506,24506,黄伟斌,3,741
+24506_c3_s1_00742.jpg,24506,24506,黄伟斌,3,742
+24506_c3_s1_00743.jpg,24506,24506,黄伟斌,3,743
+24506_c3_s1_00744.jpg,24506,24506,黄伟斌,3,744
+24506_c3_s1_00745.jpg,24506,24506,黄伟斌,3,745
+24506_c3_s1_00746.jpg,24506,24506,黄伟斌,3,746
+24506_c3_s1_00747.jpg,24506,24506,黄伟斌,3,747
+24506_c3_s1_00748.jpg,24506,24506,黄伟斌,3,748
+24506_c3_s1_00749.jpg,24506,24506,黄伟斌,3,749
+24506_c3_s1_00750.jpg,24506,24506,黄伟斌,3,750
+24506_c3_s1_00751.jpg,24506,24506,黄伟斌,3,751
+24506_c3_s1_00752.jpg,24506,24506,黄伟斌,3,752
+24506_c3_s1_00753.jpg,24506,24506,黄伟斌,3,753
+24506_c3_s1_00754.jpg,24506,24506,黄伟斌,3,754
+24506_c3_s1_00755.jpg,24506,24506,黄伟斌,3,755
+24506_c3_s1_00756.jpg,24506,24506,黄伟斌,3,756
+24506_c3_s1_00757.jpg,24506,24506,黄伟斌,3,757
+24506_c3_s1_00758.jpg,24506,24506,黄伟斌,3,758
+24506_c3_s1_00759.jpg,24506,24506,黄伟斌,3,759
diff --git a/input/.gitkeep b/input/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/input/视频中的商品信息表.xlsx b/input/视频中的商品信息表.xlsx
new file mode 100644
index 0000000..b2471db
Binary files /dev/null and b/input/视频中的商品信息表.xlsx differ
diff --git a/main_basket.py b/main_basket.py
new file mode 100644
index 0000000..5f16748
--- /dev/null
+++ b/main_basket.py
@@ -0,0 +1,148 @@
+#!/usr/bin/env python3
+"""篮子接触分段入口：OpenCV 框选篮子 → 手篮接触上升沿 → Phase2（跳过 ActionFormer）。"""
+from __future__ import annotations
+
+import argparse
+import os
+import sys
+from pathlib import Path
+
+PACK_ROOT = Path(__file__).resolve().parent
+sys.path.insert(0, str(PACK_ROOT / "src"))
+
+from paths import ensure_code_on_path
+
+ensure_code_on_path(PACK_ROOT)
+
+from config import load_run_config
+from orchestrator import run_basket_pipeline
+
+
+def main() -> int:
+    os.environ.setdefault("OPENCV_FFMPEG_LOGLEVEL", "8")
+    ap = argparse.ArgumentParser(
+        description="手术室耗材篮子接触分段主流程（跳过 ActionFormer）"
+    )
+    ap.add_argument("--video", type=Path, required=True, help="输入 MP4")
+    ap.add_argument(
+        "--excel",
+        type=Path,
+        required=True,
+        help="商品表 Excel（C 列白名单 + 产品编码）",
+    )
+    ap.add_argument("--out", type=Path, required=True, help="输出 TSV")
+    ap.add_argument(
+        "--config",
+        type=Path,
+        default=PACK_ROOT / "configs" / "default_config.yaml",
+        help="继承 weights / phase2 / classification / doctor / basket 的 YAML",
+    )
+    ap.add_argument(
+        "--save-basket-roi",
+        type=Path,
+        default=None,
+        help="框选后将 ROI 保存为 JSON（可选；每次运行仍会先弹窗标框）",
+    )
+    ap.add_argument(
+        "--det-conf",
+        type=float,
+        default=None,
+        help="篮子扫描手部检测置信度（默认读 yaml basket.det_conf）",
+    )
+    ap.add_argument(
+        "--contact-iou-on",
+        type=float,
+        default=None,
+        help="篮子接触 IoU 进入阈值（默认读 yaml basket.contact_iou_on）",
+    )
+    ap.add_argument(
+        "--contact-iou-off",
+        type=float,
+        default=None,
+        help="篮子接触 IoU 退出阈值（默认读 yaml basket.contact_iou_off）",
+    )
+    ap.add_argument(
+        "--confirm-seconds",
+        type=float,
+        default=None,
+        help="连续接触确认时长（秒，默认 0.4）",
+    )
+    ap.add_argument(
+        "--cooldown-seconds",
+        type=float,
+        default=None,
+        help="触发后绝对冷却时长（秒，默认 5.0）",
+    )
+    ap.add_argument(
+        "--contact-iou-threshold",
+        type=float,
+        default=None,
+        help="手框与篮子 IoU 阈值（默认读 yaml basket.contact_iou_threshold）",
+    )
+    ap.add_argument(
+        "--segment-start-offset-sec",
+        type=float,
+        default=None,
+        help="段起点相对接触时刻偏移（秒，默认 1 → contact+1）",
+    )
+    ap.add_argument(
+        "--segment-end-offset-sec",
+        type=float,
+        default=None,
+        help="段终点相对接触时刻偏移（秒，默认 5 → contact+5）",
+    )
+    ap.add_argument(
+        "--min-segment-sec",
+        type=float,
+        default=None,
+        help="截断后段长不足此值则丢弃（秒，默认 4.0；0 表示不过滤）",
+    )
+    ap.add_argument(
+        "--scan-frame-stride",
+        type=int,
+        default=None,
+        help="全片接触扫描帧步长（默认 1）",
+    )
+    args = ap.parse_args()
+
+    cfg_path = args.config.resolve()
+    if not cfg_path.is_file():
+        print("找不到配置:", cfg_path, file=sys.stderr)
+        return 1
+
+    run_cfg = load_run_config(PACK_ROOT, cfg_path)
+    run_cfg.video = args.video.resolve()
+    run_cfg.excel = args.excel.resolve()
+    run_cfg.out = args.out.resolve()
+
+    # 每次运行均在首帧弹窗标框，不从 JSON / yaml 复用 ROI
+    run_cfg.basket_load_roi_json = None
+    run_cfg.basket_skip_roi_select = False
+    if args.save_basket_roi is not None:
+        run_cfg.basket_save_roi_json = args.save_basket_roi.resolve()
+    if args.det_conf is not None:
+        run_cfg.basket_det_conf = float(args.det_conf)
+    if args.contact_iou_on is not None:
+        run_cfg.basket_contact_iou_on = float(args.contact_iou_on)
+    if args.contact_iou_off is not None:
+        run_cfg.basket_contact_iou_off = float(args.contact_iou_off)
+    if args.confirm_seconds is not None:
+        run_cfg.basket_confirm_seconds = float(args.confirm_seconds)
+    if args.cooldown_seconds is not None:
+        run_cfg.basket_cooldown_seconds = float(args.cooldown_seconds)
+    if args.contact_iou_threshold is not None:
+        run_cfg.basket_contact_iou_threshold = float(args.contact_iou_threshold)
+    if args.segment_start_offset_sec is not None:
+        run_cfg.basket_segment_start_offset_sec = float(args.segment_start_offset_sec)
+    if args.segment_end_offset_sec is not None:
+        run_cfg.basket_segment_end_offset_sec = float(args.segment_end_offset_sec)
+    if args.min_segment_sec is not None:
+        run_cfg.basket_min_segment_sec = float(args.min_segment_sec)
+    if args.scan_frame_stride is not None:
+        run_cfg.basket_scan_frame_stride = int(args.scan_frame_stride)
+
+    return int(run_basket_pipeline(run_cfg))
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/main_basket_stream.py b/main_basket_stream.py
new file mode 100644
index 0000000..79fb9bb
--- /dev/null
+++ b/main_basket_stream.py
@@ -0,0 +1,116 @@
+#!/usr/bin/env python3
+"""推流篮子耗材识别：弹窗框选 ROI → RTSP 逐帧触发 → 缓存 [contact+1,contact+6] → 耗材识别。"""
+from __future__ import annotations
+
+import argparse
+import os
+import sys
+from pathlib import Path
+
+PACK_ROOT = Path(__file__).resolve().parent
+sys.path.insert(0, str(PACK_ROOT / "src"))
+
+from paths import ensure_code_on_path
+
+ensure_code_on_path(PACK_ROOT)
+
+from config import load_run_config
+from stream_orchestrator import run_stream_pipeline
+
+
+def main() -> int:
+    os.environ.setdefault("OPENCV_FFMPEG_LOGLEVEL", "8")
+    ap = argparse.ArgumentParser(description="推流篮子耗材识别（无撕膜）")
+    ap.add_argument(
+        "--rtsp",
+        type=str,
+        default=None,
+        help="RTSP/摄像头 URL；本地 mp4 也可用于测试",
+    )
+    ap.add_argument(
+        "--excel",
+        type=Path,
+        required=True,
+        help="商品表 Excel（C 列白名单 + 产品编码）",
+    )
+    ap.add_argument("--out", type=Path, required=True, help="输出 TSV（实时追加）")
+    ap.add_argument(
+        "--config",
+        type=Path,
+        default=PACK_ROOT / "configs" / "default_config.yaml",
+        help="配置文件",
+    )
+    ap.add_argument(
+        "--save-basket-roi",
+        type=Path,
+        default=None,
+        help="框选后将 ROI 保存为 JSON（可选；每次运行仍会先弹窗标框）",
+    )
+    ap.add_argument(
+        "--segment-start-offset-sec",
+        type=float,
+        default=None,
+        help="段起点相对 contact 偏移（默认读 yaml，与 basket 一致 → contact+1）",
+    )
+    ap.add_argument(
+        "--segment-end-offset-sec",
+        type=float,
+        default=None,
+        help="段终点相对 contact 偏移（默认读 yaml，与 basket 一致 → contact+6，窗口 5s）",
+    )
+    ap.add_argument(
+        "--min-segment-sec",
+        type=float,
+        default=None,
+        help="段长不足此值则丢弃（默认 4.0）",
+    )
+    ap.add_argument(
+        "--ring-buffer-sec",
+        type=float,
+        default=None,
+        help="帧环形缓存时长（秒，默认 15）",
+    )
+    ap.add_argument(
+        "--stream-fps",
+        type=float,
+        default=None,
+        help="RTSP 无 FPS 元数据时的假定帧率（默认 25）",
+    )
+    args = ap.parse_args()
+
+    cfg_path = args.config.resolve()
+    if not cfg_path.is_file():
+        print("找不到配置:", cfg_path, file=sys.stderr)
+        return 1
+
+    run_cfg = load_run_config(PACK_ROOT, cfg_path)
+    run_cfg.excel = args.excel.resolve()
+    run_cfg.out = args.out.resolve()
+
+    rtsp = args.rtsp or getattr(run_cfg, "stream_rtsp", None)
+    if not rtsp:
+        print("请指定 --rtsp 或在 yaml stream.rtsp 中配置", file=sys.stderr)
+        return 1
+    run_cfg.stream_rtsp = str(rtsp)
+
+    # 每次运行均在首帧弹窗标框，不从 JSON / yaml 复用 ROI
+    run_cfg.basket_load_roi_json = None
+    run_cfg.basket_skip_roi_select = False
+    if args.save_basket_roi is not None:
+        run_cfg.basket_save_roi_json = args.save_basket_roi.resolve()
+    if args.segment_start_offset_sec is not None:
+        run_cfg.stream_segment_start_offset_sec = float(args.segment_start_offset_sec)
+    if args.segment_end_offset_sec is not None:
+        run_cfg.stream_segment_end_offset_sec = float(args.segment_end_offset_sec)
+    if args.min_segment_sec is not None:
+        run_cfg.stream_min_segment_sec = float(args.min_segment_sec)
+    if args.ring_buffer_sec is not None:
+        run_cfg.stream_ring_buffer_sec = float(args.ring_buffer_sec)
+    if args.stream_fps is not None:
+        run_cfg.stream_fps = float(args.stream_fps)
+
+    return int(run_stream_pipeline(run_cfg))
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/main_segments_offline.py b/main_segments_offline.py
new file mode 100644
index 0000000..c6c44f8
--- /dev/null
+++ b/main_segments_offline.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python3
+"""按结果 TSV 时间段对离线视频做手检 → 耗材分类（跳过分段与撕膜，无好坏帧门控）。"""
+from __future__ import annotations
+
+import argparse
+import os
+import sys
+from pathlib import Path
+
+PACK_ROOT = Path(__file__).resolve().parent
+sys.path.insert(0, str(PACK_ROOT / "src"))
+
+from paths import ensure_code_on_path
+
+ensure_code_on_path(PACK_ROOT)
+
+from config import load_run_config
+from segments_offline_orchestrator import run_segments_offline_pipeline
+
+
+def main() -> int:
+    os.environ.setdefault("OPENCV_FFMPEG_LOGLEVEL", "8")
+    ap = argparse.ArgumentParser(
+        description="TSV 时间段 → 离线视频段内耗材识别（无 ActionFormer / 无篮子分段 / 无撕膜）"
+    )
+    ap.add_argument("--video", type=Path, required=True, help="输入 MP4")
+    ap.add_argument(
+        "--segments-tsv",
+        type=Path,
+        required=True,
+        help="含 start_sec/end_sec 的结果 TSV（如推流输出）",
+    )
+    ap.add_argument(
+        "--excel",
+        type=Path,
+        required=True,
+        help="商品表 Excel（C 列白名单 + 产品编码）",
+    )
+    ap.add_argument("--out", type=Path, required=True, help="输出 TSV")
+    ap.add_argument(
+        "--config",
+        type=Path,
+        default=PACK_ROOT / "configs" / "default_config.yaml",
+        help="配置文件",
+    )
+    ap.add_argument(
+        "--skip-empty-segments",
+        action="store_true",
+        help="跳过 TSV 中 top1_name 为空或为失败文案的行",
+    )
+    args = ap.parse_args()
+
+    cfg_path = args.config.resolve()
+    if not cfg_path.is_file():
+        print("找不到配置:", cfg_path, file=sys.stderr)
+        return 1
+
+    run_cfg = load_run_config(PACK_ROOT, cfg_path)
+    run_cfg.video = args.video.resolve()
+    run_cfg.excel = args.excel.resolve()
+    run_cfg.out = args.out.resolve()
+    run_cfg.segments_tsv = args.segments_tsv.resolve()
+    run_cfg.segments_skip_empty = bool(args.skip_empty_segments)
+
+    return int(run_segments_offline_pipeline(run_cfg))
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/output/.gitkeep b/output/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..37c6276
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,14 @@
+# 篮子离线 + 推流 — 运行依赖
+# 1. 先按 https://pytorch.org 安装与 CUDA 匹配的 torch / torchvision
+# 2. pip install -r requirements.txt
+
+torch>=2.0.0
+torchvision>=0.15.0
+ultralytics>=8.0.0
+opencv-python>=4.8.0
+numpy>=1.23.0
+pandas>=2.0.0
+openpyxl>=3.1.0
+PyYAML>=6.0
+Pillow>=10.0.0
+mediapipe>=0.10.0
diff --git a/scripts/remux_hevc.sh b/scripts/remux_hevc.sh
new file mode 100755
index 0000000..c1ba2c7
--- /dev/null
+++ b/scripts/remux_hevc.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+# HEVC 主视角 MP4 转 H.264，供 VideoSwin 特征提取与 OpenCV 解码。
+# 用法:
+#   ./scripts/remux_hevc.sh /path/to/source.mp4 [output.mp4]
+# 未指定输出时写入 input/remuxed/<stem>_h264.mp4
+
+set -euo pipefail
+
+ROOT="$(cd "$(dirname "$0")/.." && pwd)"
+SRC="${1:?用法: remux_hevc.sh <source.mp4> [output.mp4]}"
+STEM="$(basename "${SRC%.*}")"
+OUT="${2:-${ROOT}/input/remuxed/${STEM}_h264.mp4}"
+
+mkdir -p "$(dirname "$OUT")"
+echo "[remux] ${SRC} -> ${OUT}"
+ffmpeg -y -i "$SRC" -c:v libx264 -preset ultrafast -crf 23 -an "$OUT"
+echo "[done] ${OUT}"
diff --git a/setup.sh b/setup.sh
new file mode 100755
index 0000000..3ea6a85
--- /dev/null
+++ b/setup.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+set -euo pipefail
+cd "$(dirname "$0")"
+
+echo "=== 环境检查 ==="
+if ! python3 -c "import tkinter" 2>/dev/null; then
+  echo "警告: 未检测到 python3-tk，框选篮子 ROI 会失败。"
+  echo "  Ubuntu/Debian: sudo apt install python3-tk"
+fi
+
+if command -v conda >/dev/null 2>&1; then
+  echo "检测到 conda。推荐: conda activate yolo && pip install -r requirements.txt"
+else
+  echo "使用 venv 安装..."
+  python3 -m venv .venv
+  # shellcheck disable=SC1091
+  source .venv/bin/activate
+  pip install -U pip
+  pip install -r requirements.txt
+fi
+
+echo ""
+echo "=== 权重检查 ==="
+for w in hand_detect.pt goodbad_frame.pt haocai_classify.pt; do
+  test -f "weights/$w" && echo "  OK weights/$w" || echo "  缺失 weights/$w"
+done
+test -f doctor_identity_package/doctor_info.pth && echo "  OK doctor_info.pth" || echo "  缺失 doctor_info.pth"
+test -f input/视频中的商品信息表.xlsx && echo "  OK Excel" || echo "  缺失 Excel"
+
+echo ""
+echo "安装说明见 README.md"
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 0000000..ebf7288
--- /dev/null
+++ b/src/__init__.py
@@ -0,0 +1 @@
+# pack 5.11 src package
diff --git a/src/action_trigger_logic.py b/src/action_trigger_logic.py
new file mode 100644
index 0000000..7cd35d6
--- /dev/null
+++ b/src/action_trigger_logic.py
@@ -0,0 +1,120 @@
+"""手篮接触 ActionTriggerLogic：帧防抖 + 上升沿 + 绝对冷却三道锁。"""
+from __future__ import annotations
+
+from pipeline.hand_roi_merge import bbox_iou_xyxy
+
+
+def max_hand_basket_iou(
+    hand_boxes: list[list[float]], basket_xyxy: list[float]
+) -> float:
+    """任意一只手与篮子的最大 IoU；无手则 0.0。"""
+    if not hand_boxes:
+        return 0.0
+    basket = [float(v) for v in basket_xyxy]
+    return max(bbox_iou_xyxy(hb, basket) for hb in hand_boxes)
+
+
+def resolve_contact_iou_thresholds(
+    *,
+    contact_iou_threshold: float | None = None,
+    contact_iou_on: float | None = None,
+    contact_iou_off: float | None = None,
+) -> tuple[float, float]:
+    """由 legacy 单阈值或显式 on/off 解析 IoU 滞回参数。"""
+    legacy = float(contact_iou_threshold if contact_iou_threshold is not None else 0.05)
+    iou_on = float(contact_iou_on if contact_iou_on is not None else legacy)
+    iou_off = float(
+        contact_iou_off if contact_iou_off is not None else max(legacy * 0.6, 0.01)
+    )
+    if iou_off >= iou_on:
+        iou_off = max(iou_on - 0.02, 0.01)
+    return iou_on, iou_off
+
+
+class ActionTriggerLogic:
+    """
+    基于 2D 防区的动作触发状态机。
+
+    三道锁：
+    1. 帧级防抖 — 连续 confirm_frames 帧滞回判定为接触才确认
+    2. 上升沿 — 单次接触会话仅触发一次 Start
+    3. 绝对冷却 — 触发后 cooldown_seconds 内忽略一切信号
+    """
+
+    def __init__(
+        self,
+        fps: float = 25,
+        confirm_seconds: float = 0.4,
+        cooldown_seconds: float = 5.0,
+        threshold_on: float = 0.08,
+        threshold_off: float = 0.03,
+    ) -> None:
+        self.fps = float(fps)
+        self.confirm_seconds = float(confirm_seconds)
+        self.cooldown_seconds = float(cooldown_seconds)
+        self.threshold_on = float(threshold_on)
+        self.threshold_off = float(threshold_off)
+        if self.threshold_off >= self.threshold_on:
+            self.threshold_off = max(self.threshold_on - 0.02, 0.01)
+
+        self._confirm_frames = max(1, int(round(self.confirm_seconds * self.fps)))
+        self._overlap_counter = 0
+        self._debounce_start_t: float | None = None
+        self._hysteresis_inside = False
+        self._armed = True
+        self._last_trigger_t = float("-inf")
+
+    def reset(self) -> None:
+        """换视频或换篮子时清空内部状态。"""
+        self._overlap_counter = 0
+        self._debounce_start_t = None
+        self._hysteresis_inside = False
+        self._armed = True
+        self._last_trigger_t = float("-inf")
+
+    def _is_contacting(self, current_iou: float) -> bool:
+        if not self._hysteresis_inside:
+            return current_iou > self.threshold_on + 1e-12
+        return current_iou > self.threshold_off + 1e-12
+
+    def step_iou(self, current_timestamp: float, current_iou: float) -> float | None:
+        """以预计算 IoU 驱动状态机（供单元测试）；返回 Start 时间戳或 None。"""
+        t = float(current_timestamp)
+        iou = float(current_iou)
+
+        if t - self._last_trigger_t < self.cooldown_seconds - 1e-12:
+            self._overlap_counter = 0
+            self._debounce_start_t = None
+            return None
+
+        is_contacting = self._is_contacting(iou)
+
+        if is_contacting:
+            if self._overlap_counter == 0:
+                self._debounce_start_t = t
+            self._overlap_counter += 1
+            self._hysteresis_inside = True
+        else:
+            self._overlap_counter = 0
+            self._debounce_start_t = None
+            self._hysteresis_inside = False
+            self._armed = True
+
+        if self._overlap_counter >= self._confirm_frames and self._armed:
+            self._armed = False
+            self._last_trigger_t = t
+            start_t = self._debounce_start_t if self._debounce_start_t is not None else t
+            return start_t
+
+        return None
+
+    def process_frame(
+        self,
+        current_timestamp: float,
+        hand_boxes: list[list[float]],
+        basket_box: tuple[float, float, float, float] | list[float],
+    ) -> float | None:
+        """逐帧处理；任意一只手满足条件即可。触发成功返回 Start 时间戳。"""
+        basket = [float(v) for v in basket_box]
+        current_iou = max_hand_basket_iou(hand_boxes, basket)
+        return self.step_iou(current_timestamp, current_iou)
diff --git a/src/actionformer_utils.py b/src/actionformer_utils.py
new file mode 100644
index 0000000..0b02459
--- /dev/null
+++ b/src/actionformer_utils.py
@@ -0,0 +1,96 @@
+"""Phase1：VideoSwin 特征 + ActionFormer 时段（与仓库 main_pipeline.ActionSegmenter 一致）。"""
+from __future__ import annotations
+
+import json
+import shutil
+from pathlib import Path
+from typing import Any
+
+import cv2
+
+import run_haocai_actionformer_consumables_e2e as e2e
+from pack_utils import log
+
+
+class ActionSegmenter:
+    @staticmethod
+    def build_segments(
+        *,
+        video_path: Path,
+        stem: str,
+        work: Path,
+        actionformer_ckpt: Path,
+        af_min_score: float,
+        af_min_seg_seconds: float,
+        python_exe: str,
+        feat_batch_size: int,
+        device: str,
+    ) -> list[tuple[float, float, float]]:
+        inp = work / "input"
+        feat_dir = work / "features"
+        inp.mkdir(parents=True, exist_ok=True)
+        feat_dir.mkdir(parents=True, exist_ok=True)
+        for stale in inp.glob("*.mp4"):
+            stale.unlink(missing_ok=True)
+
+        single_video = inp / video_path.name
+        if single_video.resolve() != video_path.resolve():
+            shutil.copy2(video_path, single_video)
+
+        meta_path = feat_dir / "meta.json"
+        e2e.run_feature_extraction(
+            python_exe=python_exe,
+            data_root=inp,
+            output_dir=feat_dir,
+            meta_file=meta_path,
+            device=device,
+            batch_size=max(1, feat_batch_size),
+        )
+
+        meta = json.loads(meta_path.read_text(encoding="utf-8"))
+        duration, fps = e2e.duration_fps_from_meta(meta, stem)
+        if stem not in meta.get("videos", {}):
+            log("meta 中未找到 video_id=stem，使用 OpenCV 估 duration…")
+            cap0 = cv2.VideoCapture(str(video_path))
+            if cap0.isOpened():
+                fps = float(cap0.get(cv2.CAP_PROP_FPS)) or fps
+                nfr = int(cap0.get(cv2.CAP_PROP_FRAME_COUNT))
+                cap0.release()
+                if fps > 0 and nfr > 0:
+                    duration = nfr / fps
+
+        npy_path = feat_dir / f"{stem}.npy"
+        if not npy_path.is_file():
+            raise FileNotFoundError(f"特征文件不存在: {npy_path}")
+
+        json_path = work / "infer_single.json"
+        e2e.write_infer_json(json_path, stem, duration, fps)
+
+        yaml_path = work / "infer_single.yaml"
+        e2e.write_infer_yaml(yaml_path, json_path.resolve(), feat_dir.resolve())
+
+        pkl_dest = work / "eval_results.pkl"
+        e2e.run_actionformer_eval(
+            python_exe=python_exe,
+            yaml_path=yaml_path.resolve(),
+            ckpt_path=actionformer_ckpt.resolve(),
+            copy_pkl_to=pkl_dest,
+        )
+
+        raw_segs = e2e.parse_actionformer_pkl(pkl_dest, stem)
+        raw_segs = [(s, e, sc) for s, e, sc in raw_segs if sc > af_min_score]
+        segs = e2e.greedy_mutual_exclusive(raw_segs)
+        n_exclusive = len(segs)
+        min_seg = float(af_min_seg_seconds)
+        if min_seg > 0:
+            segs = [(s, e, sc) for s, e, sc in segs if (e - s) >= min_seg - 1e-9]
+        if min_seg > 0:
+            log(
+                f"ActionFormer 候选 {len(raw_segs)} -> 互斥后 {n_exclusive} 段 -> "
+                f"剔除短于 {min_seg:g}s 后 {len(segs)} 段（score>{af_min_score}）"
+            )
+        else:
+            log(
+                f"ActionFormer 候选 {len(raw_segs)} -> 互斥后 {n_exclusive} 段（score>{af_min_score}）"
+            )
+        return segs
diff --git a/src/basket_segmenter.py b/src/basket_segmenter.py
new file mode 100644
index 0000000..e757f7c
--- /dev/null
+++ b/src/basket_segmenter.py
@@ -0,0 +1,616 @@
+"""篮子 ROI 交互选取 + 手篮接触上升沿扫描 → 固定窗口段列表。"""
+from __future__ import annotations
+
+import json
+import subprocess
+from pathlib import Path
+from typing import Any, Callable
+
+import cv2
+from ultralytics import YOLO
+
+from action_trigger_logic import ActionTriggerLogic, resolve_contact_iou_thresholds
+from pipeline.hand_roi_merge import bbox_iou_xyxy
+from run_segments_consumable_vote import collect_hand_boxes
+
+
+def _roi_xyxy_from_select(x: int, y: int, w: int, h: int) -> list[float]:
+    if w <= 0 or h <= 0:
+        raise ValueError("未框选有效区域（宽高须 > 0）")
+    return [float(x), float(y), float(x + w), float(y + h)]
+
+
+def _read_frame_at(cap: cv2.VideoCapture, *, mode: str | float) -> tuple[Any, float]:
+    fps = float(cap.get(cv2.CAP_PROP_FPS) or 25.0)
+    n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0)
+    duration = n_frames / fps if n_frames > 0 and fps > 0 else 0.0
+
+    if isinstance(mode, (int, float)):
+        t_sec = float(mode)
+    elif mode == "first":
+        t_sec = 0.0
+    elif mode == "middle":
+        t_sec = max(0.0, duration * 0.5)
+    else:
+        raise ValueError(f"未知 roi_frame 模式: {mode!r}")
+
+    cap.set(cv2.CAP_PROP_POS_MSEC, t_sec * 1000.0)
+    ok, frame = cap.read()
+    if not ok or frame is None:
+        cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
+        ok, frame = cap.read()
+        if not ok or frame is None:
+            raise RuntimeError("无法从视频读取用于框选 ROI 的帧")
+        t_sec = 0.0
+    else:
+        t_sec = float(cap.get(cv2.CAP_PROP_POS_MSEC)) / 1000.0
+    return frame, t_sec
+
+
+def save_basket_roi_json(path: Path, roi: list[float], *, video_path: Path | None = None) -> None:
+    payload: dict[str, Any] = {"basket_xyxy": [float(v) for v in roi]}
+    if video_path is not None:
+        payload["video"] = str(video_path.resolve())
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(payload, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
+
+
+def load_basket_roi_json(path: Path) -> list[float]:
+    data = json.loads(path.read_text(encoding="utf-8"))
+    roi = data.get("basket_xyxy")
+    if not isinstance(roi, list) or len(roi) != 4:
+        raise ValueError(f"无效的篮子 ROI JSON: {path}")
+    return [float(v) for v in roi]
+
+
+def _scale_frame_for_display(frame, max_display_px: int) -> tuple[Any, float]:
+    orig_h, orig_w = frame.shape[:2]
+    scale = 1.0
+    disp = frame
+    if max(orig_w, orig_h) > max_display_px:
+        scale = max_display_px / float(max(orig_w, orig_h))
+        disp = cv2.resize(
+            frame,
+            (int(round(orig_w * scale)), int(round(orig_h * scale))),
+            interpolation=cv2.INTER_AREA,
+        )
+        print(
+            f"[basket] 4K 预览缩放 scale={scale:.4f} "
+            f"({orig_w}x{orig_h} -> {disp.shape[1]}x{disp.shape[0]})"
+        )
+    return disp, scale
+
+
+def _select_basket_roi_tkinter(
+    disp_bgr,
+    *,
+    t_sec: float,
+    title: str,
+) -> tuple[float, float, float, float]:
+    """Tkinter 弹窗：按住左键拖动画框，点顶部【确认】提交。"""
+    import tkinter as tk
+    from tkinter import messagebox
+
+    from PIL import Image, ImageTk
+
+    rgb = cv2.cvtColor(disp_bgr, cv2.COLOR_BGR2RGB)
+    pil = Image.fromarray(rgb)
+    img_w, img_h = pil.size
+
+    root = tk.Tk()
+    root.title(title)
+    root.attributes("-topmost", True)
+    root.after(300, lambda: root.attributes("-topmost", False))
+
+    sw = int(root.winfo_screenwidth() or 1920)
+    sh = int(root.winfo_screenheight() or 1080)
+    # 预留顶部说明+按钮、窗口边框；画布不超过屏幕可用高度
+    max_canvas_w = max(640, sw - 48)
+    max_canvas_h = max(360, sh - 220)
+    ui_scale = min(max_canvas_w / img_w, max_canvas_h / img_h, 1.0)
+    show_w = int(round(img_w * ui_scale))
+    show_h = int(round(img_h * ui_scale))
+    if (show_w, show_h) != (img_w, img_h):
+        pil = pil.resize((show_w, show_h), Image.Resampling.LANCZOS)
+
+    state: dict[str, float | None] = {"x1": None, "y1": None, "x2": None, "y2": None}
+    start: dict[str, int | None] = {"x": None, "y": None}
+    rect_holder: dict[str, int | None] = {"id": None}
+    cancelled = {"v": False}
+
+    def to_disp_coords(x1: float, y1: float, x2: float, y2: float) -> tuple[float, float, float, float]:
+        inv = 1.0 / ui_scale
+        return x1 * inv, y1 * inv, x2 * inv, y2 * inv
+
+    def on_confirm() -> None:
+        if state["x1"] is None:
+            messagebox.showwarning(title, "请先在图片上按住左键拖动，框选篮子区域")
+            return
+        root.quit()
+        root.destroy()
+
+    def on_cancel() -> None:
+        cancelled["v"] = True
+        root.quit()
+        root.destroy()
+
+    top = tk.Frame(root, padx=12, pady=8)
+    top.pack(side=tk.TOP, fill=tk.X)
+
+    tk.Label(
+        top,
+        text=(
+            f"参考帧 t={t_sec:.2f}s  |  按住左键在图片上拖动画框  |  完成后点【确认】或按 Enter"
+        ),
+        font=("", 12),
+        justify=tk.LEFT,
+        anchor=tk.W,
+    ).pack(fill=tk.X)
+
+    status = tk.Label(top, text="尚未框选", font=("", 11), fg="gray", anchor=tk.W)
+    status.pack(fill=tk.X, pady=(4, 8))
+
+    btn_frame = tk.Frame(top)
+    btn_frame.pack(fill=tk.X)
+    confirm_btn = tk.Button(
+        btn_frame,
+        text="确认",
+        command=on_confirm,
+        font=("", 15, "bold"),
+        width=14,
+        height=1,
+        bg="#4CAF50",
+        fg="white",
+        activebackground="#43A047",
+    )
+    confirm_btn.pack(side=tk.LEFT, padx=(0, 10))
+    tk.Button(
+        btn_frame,
+        text="取消",
+        command=on_cancel,
+        font=("", 14),
+        width=12,
+    ).pack(side=tk.LEFT)
+
+    photo = ImageTk.PhotoImage(pil)
+    canvas = tk.Canvas(
+        root,
+        width=show_w,
+        height=show_h,
+        cursor="crosshair",
+        highlightthickness=1,
+        highlightbackground="#cccccc",
+    )
+    canvas.pack(side=tk.TOP, padx=10, pady=(0, 10))
+    canvas.create_image(0, 0, anchor=tk.NW, image=photo)
+
+    def on_press(event: tk.Event) -> None:
+        start["x"], start["y"] = int(event.x), int(event.y)
+        if rect_holder["id"] is not None:
+            canvas.delete(rect_holder["id"])
+        rect_holder["id"] = canvas.create_rectangle(
+            event.x, event.y, event.x, event.y, outline="red", width=3
+        )
+        status.config(text="正在框选…（松开左键完成矩形）", fg="orange")
+
+    def on_drag(event: tk.Event) -> None:
+        if rect_holder["id"] is not None and start["x"] is not None and start["y"] is not None:
+            canvas.coords(rect_holder["id"], start["x"], start["y"], event.x, event.y)
+
+    def on_release(event: tk.Event) -> None:
+        if start["x"] is None or start["y"] is None:
+            return
+        x1, y1 = min(start["x"], event.x), min(start["y"], event.y)
+        x2, y2 = max(start["x"], event.x), max(start["y"], event.y)
+        if x2 - x1 < 8 or y2 - y1 < 8:
+            status.config(text="框太小，请重新按住左键拖动", fg="red")
+            state["x1"] = state["y1"] = state["x2"] = state["y2"] = None
+            return
+        dx1, dy1, dx2, dy2 = to_disp_coords(float(x1), float(y1), float(x2), float(y2))
+        state["x1"], state["y1"], state["x2"], state["y2"] = dx1, dy1, dx2, dy2
+        status.config(
+            text=f"已框选 {int(dx2 - dx1)}×{int(dy2 - dy1)} 像素 — 请点击上方绿色【确认】或按 Enter",
+            fg="green",
+        )
+
+    canvas.bind("<ButtonPress-1>", on_press)
+    canvas.bind("<B1-Motion>", on_drag)
+    canvas.bind("<ButtonRelease-1>", on_release)
+    root.bind("<Return>", lambda _e: on_confirm())
+    root.bind("<Escape>", lambda _e: on_cancel())
+    confirm_btn.focus_set()
+
+    # 居中并限制窗口不超过屏幕
+    win_w = min(sw - 20, show_w + 24)
+    win_h = min(sh - 20, show_h + 180)
+    x0 = max(0, (sw - win_w) // 2)
+    y0 = max(0, (sh - win_h) // 2)
+    root.geometry(f"{win_w}x{win_h}+{x0}+{y0}")
+    root.minsize(min(win_w, 720), min(win_h, 480))
+
+    print("[basket] 已打开框选窗口：顶部有绿色【确认】按钮；拖框后点确认或按 Enter")
+    root.mainloop()
+
+    if cancelled["v"]:
+        raise ValueError("用户取消框选")
+    if state["x1"] is None or state["x2"] is None or state["y1"] is None or state["y2"] is None:
+        raise ValueError("未框选有效区域：请按住左键拖动画出矩形后点【确认】")
+    x1, y1, x2, y2 = state["x1"], state["y1"], state["x2"], state["y2"]
+    return float(x1), float(y1), float(x2 - x1), float(y2 - y1)
+
+
+def _select_basket_roi_matplotlib(
+    disp_bgr,
+    *,
+    t_sec: float,
+    title: str,
+) -> tuple[float, float, float, float]:
+    """matplotlib 弹窗框选；关闭窗口即确认。"""
+    import matplotlib
+
+    matplotlib.use("TkAgg")
+    import matplotlib.pyplot as plt
+    from matplotlib.widgets import RectangleSelector
+
+    rgb = cv2.cvtColor(disp_bgr, cv2.COLOR_BGR2RGB)
+    h, w = rgb.shape[:2]
+    fig_w = min(16.0, max(8.0, w / 120.0))
+    fig_h = min(9.0, max(4.5, h / 120.0))
+    fig, ax = plt.subplots(figsize=(fig_w, fig_h))
+    ax.imshow(rgb)
+    ax.set_title(
+        f"{title}\n参考帧 t={t_sec:.2f}s | 鼠标左键拖框 | 可拖拽调整 | 关闭窗口确认",
+        fontsize=11,
+    )
+    ax.axis("off")
+
+    box: dict[str, float | None] = {"x1": None, "y1": None, "x2": None, "y2": None}
+
+    def onselect(eclick, erelease) -> None:
+        if eclick.xdata is None or erelease.xdata is None:
+            return
+        if eclick.ydata is None or erelease.ydata is None:
+            return
+        box["x1"] = float(min(eclick.xdata, erelease.xdata))
+        box["y1"] = float(min(eclick.ydata, erelease.ydata))
+        box["x2"] = float(max(eclick.xdata, erelease.xdata))
+        box["y2"] = float(max(eclick.ydata, erelease.ydata))
+
+    RectangleSelector(
+        ax,
+        onselect,
+        useblit=False,
+        button=[1],
+        minspanx=10,
+        minspany=10,
+        spancoords="data",
+        interactive=True,
+    )
+    fig.canvas.manager.set_window_title(title)
+    plt.tight_layout()
+    print("[basket] 已打开 matplotlib 框选窗口：按住左键拖动画框，关闭窗口确认")
+    plt.show()
+
+    if box["x1"] is None or box["x2"] is None or box["y1"] is None or box["y2"] is None:
+        raise ValueError("未框选有效区域：请用鼠标拖出一个矩形后关闭窗口")
+    x1, y1, x2, y2 = box["x1"], box["y1"], box["x2"], box["y2"]
+    if x2 - x1 < 1 or y2 - y1 < 1:
+        raise ValueError("框选区域过小，请重新运行并框选篮子")
+    return x1, y1, x2 - x1, y2 - y1
+
+
+def _select_basket_roi_opencv(
+    disp_bgr,
+    *,
+    title: str,
+) -> tuple[float, float, float, float]:
+    cv2.namedWindow(title, cv2.WINDOW_NORMAL)
+    cv2.resizeWindow(title, disp_bgr.shape[1], disp_bgr.shape[0])
+    rx, ry, rw, rh = cv2.selectROI(title, disp_bgr, showCrosshair=True, fromCenter=False)
+    cv2.destroyWindow(title)
+    cv2.destroyAllWindows()
+    return float(rx), float(ry), float(rw), float(rh)
+
+
+def select_basket_roi(
+    video_path: Path,
+    *,
+    roi_frame: str | float = "middle",
+    window_title: str = "框选耗材篮子",
+    max_display_px: int = 1920,
+    roi_backend: str = "tkinter",
+) -> list[float]:
+    """弹窗框选篮子 ROI。默认 tkinter（按住拖动 + 确认按钮）。"""
+    cap = cv2.VideoCapture(str(video_path))
+    if not cap.isOpened():
+        raise RuntimeError(f"无法打开视频: {video_path}")
+    try:
+        frame, t_sec = _read_frame_at(cap, mode=roi_frame)
+        disp, scale = _scale_frame_for_display(frame, max_display_px)
+
+        backend = str(roi_backend).strip().lower()
+        if backend == "tkinter":
+            rx, ry, rw, rh = _select_basket_roi_tkinter(disp, t_sec=t_sec, title=window_title)
+        elif backend == "matplotlib":
+            rx, ry, rw, rh = _select_basket_roi_matplotlib(
+                disp, t_sec=t_sec, title=window_title
+            )
+        elif backend == "opencv":
+            print(f"[basket] 框选参考帧 t={t_sec:.2f}s，Enter/Space 确认，Esc 取消")
+            rx, ry, rw, rh = _select_basket_roi_opencv(disp, title=window_title)
+        else:
+            raise ValueError(f"未知 roi_backend: {roi_backend!r}，可选 tkinter / matplotlib / opencv")
+
+        if scale != 1.0:
+            rx, ry, rw, rh = rx / scale, ry / scale, rw / scale, rh / scale
+        roi = _roi_xyxy_from_select(int(round(rx)), int(round(ry)), int(round(rw)), int(round(rh)))
+        print(f"[basket] 篮子 ROI xyxy={roi}")
+        return roi
+    finally:
+        cap.release()
+        cv2.destroyAllWindows()
+
+
+def hands_contact_basket(
+    hand_boxes: list[list[float]],
+    basket_xyxy: list[float],
+    iou_threshold: float,
+) -> bool:
+    """任意一只手框与篮子 IoU 严格大于阈值即视为接触。"""
+    thr = float(iou_threshold)
+    for hb in hand_boxes:
+        if bbox_iou_xyxy(hb, basket_xyxy) > thr + 1e-12:
+            return True
+    return False
+
+
+
+def filter_near_contact_starts(
+    starts: list[float],
+    min_interval_sec: float,
+    *,
+    log_fn: Callable[[str], None] | None = None,
+) -> list[float]:
+    """
+    合并时间上过于接近的接触上升沿，保留每簇中的第一个。
+    用于抑制手框抖动导致的重复触发（如 71.0s 与 71.9s）。
+    """
+    gap = float(min_interval_sec)
+    if gap <= 0 or not starts:
+        return list(starts)
+    kept: list[float] = []
+    for t in sorted(starts):
+        if kept and t - kept[-1] < gap - 1e-9:
+            if log_fn:
+                log_fn(
+                    f"[basket] 忽略近距离上升沿 t={t:.3f}s "
+                    f"（距上次 {t - kept[-1]:.3f}s < {gap:g}s）"
+                )
+            continue
+        kept.append(t)
+    return kept
+
+
+def video_duration_sec(cap: cv2.VideoCapture) -> float:
+    fps = float(cap.get(cv2.CAP_PROP_FPS) or 25.0)
+    n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0)
+    if n_frames > 0 and fps > 0:
+        return n_frames / fps
+    ms = float(cap.get(cv2.CAP_PROP_POS_MSEC) or 0.0)
+    cap.set(cv2.CAP_PROP_POS_AVI_RATIO, 1.0)
+    end_ms = float(cap.get(cv2.CAP_PROP_POS_MSEC) or 0.0)
+    cap.set(cv2.CAP_PROP_POS_MSEC, ms)
+    return max(0.0, end_ms / 1000.0)
+
+
+def warn_if_hevc(video_path: Path) -> None:
+    try:
+        out = subprocess.run(
+            [
+                "ffprobe",
+                "-v",
+                "error",
+                "-select_streams",
+                "v:0",
+                "-show_entries",
+                "stream=codec_name",
+                "-of",
+                "default=nw=1",
+                str(video_path),
+            ],
+            capture_output=True,
+            text=True,
+            check=False,
+        )
+        codec = (out.stdout or "").strip().split("=", 1)[-1].lower()
+        if codec in ("hevc", "h265"):
+            print(
+                "[basket] 警告: 检测到 HEVC 编码，VideoSwin 不受影响但 OpenCV 解码可能不稳定；"
+                "建议先运行 scripts/remux_hevc.sh 转 H.264"
+            )
+    except FileNotFoundError:
+        pass
+
+
+def scan_contact_segments(
+    video_path: Path,
+    det_model: YOLO | str | Path,
+    basket_xyxy: list[float],
+    *,
+    contact_iou_threshold: float = 0.05,
+    contact_iou_on: float | None = None,
+    contact_iou_off: float | None = None,
+    confirm_seconds: float = 0.4,
+    cooldown_seconds: float = 5.0,
+    segment_start_offset_sec: float = 1.0,
+    segment_end_offset_sec: float = 5.0,
+    min_segment_sec: float = 4.0,
+    scan_frame_stride: int = 1,
+    det_conf: float = 0.6,
+    imgsz_det: int = 640,
+    device: str = "cuda",
+    half: bool = False,
+    log_fn: Callable[[str], None] | None = print,
+) -> list[tuple[float, float, float]]:
+    """
+    全片扫描手篮接触上升沿，每段 [contact+start_offset, contact+end_offset]（末尾截断至视频时长）。
+    截断后段长短于 min_segment_sec 的段会被丢弃。
+    接触判定经 ActionTriggerLogic（滞回 + 帧防抖 + 上升沿 + 绝对冷却）。
+    返回 (start_sec, end_sec, score) 列表，score 固定 1.0。
+    """
+    iou_on, iou_off = resolve_contact_iou_thresholds(
+        contact_iou_threshold=contact_iou_threshold,
+        contact_iou_on=contact_iou_on,
+        contact_iou_off=contact_iou_off,
+    )
+    model = det_model if isinstance(det_model, YOLO) else YOLO(str(det_model))
+    predict_kw: dict[str, Any] = {"device": device}
+    if half:
+        predict_kw["half"] = True
+
+    stride = max(1, int(scan_frame_stride))
+    t_start_off = float(segment_start_offset_sec)
+    t_end_off = float(segment_end_offset_sec)
+    if t_end_off <= t_start_off + 1e-9:
+        raise ValueError(
+            f"segment_end_offset_sec ({t_end_off}) 须大于 segment_start_offset_sec ({t_start_off})"
+        )
+    basket = [float(v) for v in basket_xyxy]
+
+    cap = cv2.VideoCapture(str(video_path))
+    if not cap.isOpened():
+        raise RuntimeError(f"无法打开视频: {video_path}")
+
+    fps = float(cap.get(cv2.CAP_PROP_FPS) or 25.0)
+    trigger = ActionTriggerLogic(
+        fps=fps,
+        confirm_seconds=float(confirm_seconds),
+        cooldown_seconds=float(cooldown_seconds),
+        threshold_on=iou_on,
+        threshold_off=iou_off,
+    )
+
+    starts: list[float] = []
+    frame_idx = 0
+
+    try:
+        duration = video_duration_sec(cap)
+        cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
+
+        while True:
+            ok, frame = cap.read()
+            if not ok or frame is None:
+                break
+            frame_idx += 1
+            if stride > 1 and (frame_idx - 1) % stride != 0:
+                continue
+
+            t_sec = float(cap.get(cv2.CAP_PROP_POS_MSEC)) / 1000.0
+            r0 = model.predict(frame, conf=det_conf, imgsz=imgsz_det, verbose=False, **predict_kw)[0]
+            hands = collect_hand_boxes(model, r0.boxes) if r0.boxes else []
+            event_t = trigger.process_frame(t_sec, hands, basket)
+            if event_t is not None:
+                starts.append(event_t)
+                if log_fn:
+                    log_fn(f"[basket] 接触上升沿 t={event_t:.3f}s")
+    finally:
+        cap.release()
+
+    starts = filter_near_contact_starts(
+        starts, float(cooldown_seconds), log_fn=log_fn
+    )
+
+    segs: list[tuple[float, float, float]] = []
+    min_seg = float(min_segment_sec)
+    for t_contact in starts:
+        t0 = t_contact + t_start_off
+        t1 = t_contact + t_end_off
+        if duration > 0:
+            t1 = min(t1, duration)
+        if t1 <= t0 + 1e-9:
+            continue
+        seg_len = t1 - t0
+        if min_seg > 0 and seg_len < min_seg - 1e-9:
+            if log_fn:
+                log_fn(
+                    f"[basket] 丢弃截断短段 [{t0:.3f}, {t1:.3f}] "
+                    f"时长 {seg_len:.3f}s < {min_seg:g}s"
+                )
+            continue
+        segs.append((t0, t1, 1.0))
+
+    confirm_frames = max(1, int(round(float(confirm_seconds) * fps)))
+    if log_fn:
+        log_fn(
+            f"[basket] 扫描完成: {len(segs)} 段 "
+            f"([contact+{t_start_off:g}, contact+{t_end_off:g}]s, "
+            f"IoU on>{iou_on:g} off<={iou_off:g}, "
+            f"confirm={float(confirm_seconds):g}s (~{confirm_frames} frames), "
+            f"cooldown={float(cooldown_seconds):g}s"
+            + (f", min_segment>={min_seg:g}s" if min_seg > 0 else "")
+            + ")"
+        )
+    return segs
+
+
+def build_segments_from_basket(
+    video_path: Path,
+    hand_model: Path,
+    *,
+    basket_roi_json: Path | None = None,
+    save_roi_json: Path | None = None,
+    skip_roi_select: bool = False,
+    roi_frame: str | float = "middle",
+    roi_backend: str = "tkinter",
+    contact_iou_threshold: float = 0.05,
+    contact_iou_on: float | None = None,
+    contact_iou_off: float | None = None,
+    confirm_seconds: float = 0.4,
+    cooldown_seconds: float = 5.0,
+    segment_start_offset_sec: float = 1.0,
+    segment_end_offset_sec: float = 5.0,
+    min_segment_sec: float = 4.0,
+    scan_frame_stride: int = 1,
+    det_conf: float = 0.6,
+    imgsz_det: int = 640,
+    device: str = "cuda",
+    half: bool = False,
+    log_fn: Callable[[str], None] | None = print,
+) -> tuple[list[tuple[float, float, float]], list[float]]:
+    """解析/框选 ROI 并扫描接触段。返回 (segments, basket_xyxy)。"""
+    warn_if_hevc(video_path)
+
+    if basket_roi_json is not None and basket_roi_json.is_file():
+        roi = load_basket_roi_json(basket_roi_json)
+        if log_fn:
+            log_fn(f"[basket] 从 JSON 加载 ROI: {basket_roi_json}")
+    elif skip_roi_select:
+        raise ValueError("skip_roi_select 需要有效的 --basket-roi-json")
+    else:
+        roi = select_basket_roi(video_path, roi_frame=roi_frame, roi_backend=roi_backend)
+
+    if save_roi_json is not None:
+        save_basket_roi_json(save_roi_json, roi, video_path=video_path)
+        if log_fn:
+            log_fn(f"[basket] ROI 已保存: {save_roi_json}")
+
+    segs = scan_contact_segments(
+        video_path,
+        hand_model,
+        roi,
+        contact_iou_threshold=contact_iou_threshold,
+        contact_iou_on=contact_iou_on,
+        contact_iou_off=contact_iou_off,
+        confirm_seconds=confirm_seconds,
+        cooldown_seconds=cooldown_seconds,
+        segment_start_offset_sec=segment_start_offset_sec,
+        segment_end_offset_sec=segment_end_offset_sec,
+        min_segment_sec=min_segment_sec,
+        scan_frame_stride=scan_frame_stride,
+        det_conf=det_conf,
+        imgsz_det=imgsz_det,
+        device=device,
+        half=half,
+        log_fn=log_fn,
+    )
+    return segs, roi
diff --git a/src/config.py b/src/config.py
new file mode 100644
index 0000000..9b80ced
--- /dev/null
+++ b/src/config.py
@@ -0,0 +1,172 @@
+"""加载 configs/*.yaml，解析为运行参数 Namespace。"""
+from __future__ import annotations
+
+import sys
+from argparse import Namespace
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+
+def _rel(pack_root: Path, raw: str | None) -> Path | None:
+    if raw is None:
+        return None
+    path = Path(raw)
+    if path.is_absolute():
+        return path.resolve()
+    return (pack_root / path).resolve()
+
+
+def load_run_config(pack_root: Path, config_path: Path) -> Namespace:
+    pack_root = pack_root.resolve()
+    data: dict[str, Any] = yaml.safe_load(config_path.read_text(encoding="utf-8"))
+    io = data["io"]
+    w = data.get("weights", {})
+    rt = data.get("runtime", {})
+    dev = data.get("device", {})
+    p2 = data["phase2"]
+    cl = data["classification"]
+    gm = data.get("gap_merge", {})
+    outopt = data.get("output", {})
+    did = data.get("doctor_identity", {})
+    bk = data.get("basket", {})
+    st = data.get("stream", {})
+
+    py = rt.get("python")
+    python_exe = sys.executable if py is None or str(py).strip() == "" else str(py)
+
+    whitelist_raw = io.get("whitelist_json")
+    whitelist_path = _rel(pack_root, whitelist_raw) if whitelist_raw else None
+
+    work_raw = rt.get("work_dir")
+    work_dir = _rel(pack_root, work_raw) if work_raw else None
+
+    doctor_ckpt_raw = did.get("checkpoint", "doctor_identity_package/doctor_info.pth")
+    doctor_labels_raw = did.get("labels_csv", "doctor_identity_package/labels.csv")
+
+    basket_save_raw = bk.get("save_roi_json")
+    basket_load_raw = bk.get("load_roi_json")
+    basket_roi_frame = bk.get("roi_frame", "middle")
+    if isinstance(basket_roi_frame, (int, float)):
+        basket_roi_frame = float(basket_roi_frame)
+    else:
+        basket_roi_frame = str(basket_roi_frame)
+
+    legacy_contact_iou = float(bk.get("contact_iou_threshold", 0.05))
+    on_raw = bk.get("contact_iou_on")
+    off_raw = bk.get("contact_iou_off")
+    basket_contact_iou_on = float(on_raw) if on_raw is not None else legacy_contact_iou
+    basket_contact_iou_off = (
+        float(off_raw) if off_raw is not None else max(legacy_contact_iou * 0.6, 0.01)
+    )
+    if basket_contact_iou_off >= basket_contact_iou_on:
+        basket_contact_iou_off = max(basket_contact_iou_on - 0.02, 0.01)
+
+    pad_bottom = float(p2.get("pad_bottom_ratio", p2.get("pad_ratio", 0.5)))
+
+    # 篮子/推流默认不用；main.py（ActionFormer）或撕膜合并可在 yaml 中另行配置
+    actionformer_raw = w.get("actionformer")
+    tear_raw = w.get("tear")
+    p1 = data.get("phase1", {})
+    tm = data.get("tear_merge", {})
+
+    return Namespace(
+        video=_rel(pack_root, io["video"]),
+        excel=_rel(pack_root, io["excel"]),
+        out=_rel(pack_root, io["out"]),
+        whitelist_json=whitelist_path,
+        use_whitelist=bool(io.get("use_whitelist", True)),
+        work_dir=work_dir,
+        keep_work_dir=bool(rt.get("keep_work_dir", False)),
+        python=python_exe,
+        actionformer_ckpt=_rel(pack_root, actionformer_raw) if actionformer_raw else None,
+        hand_model=_rel(pack_root, w["hand"]),
+        goodbad_model=_rel(pack_root, w["goodbad"]),
+        haocai_model=_rel(pack_root, w["haocai"]),
+        tear_model=_rel(pack_root, tear_raw) if tear_raw else None,
+        device=str(dev.get("type", "cuda")),
+        half=bool(dev.get("half", False)),
+        af_min_score=float(p1.get("af_min_score", 0.1)),
+        af_min_seg_seconds=float(p1.get("af_min_seg_seconds", 2.0)),
+        feat_batch_size=int(p1.get("feat_batch_size", 1)),
+        seek_margin_sec=float(p2["seek_margin_sec"]),
+        frame_stride=int(p2["frame_stride"]),
+        det_conf=float(p2["det_conf"]),
+        pad_bottom_ratio=pad_bottom,
+        pad_ratio=pad_bottom,
+        imgsz_det=int(p2["imgsz_det"]),
+        merge_iou_gt=float(p2["merge_iou_gt"]),
+        merge_center_dist_max_px=(
+            float(p2["merge_center_dist_max_px"])
+            if p2.get("merge_center_dist_max_px") is not None
+            else None
+        ),
+        merge_center_dist_max_frac_diag=(
+            float(p2["merge_center_dist_max_frac_diag"])
+            if p2.get("merge_center_dist_max_frac_diag") is not None
+            else None
+        ),
+        tracking_alpha=float(p2.get("tracking_alpha", 0.6)),
+        tracking_max_lost_frames=int(p2.get("tracking_max_lost_frames", 0)),
+        imgsz_cls=int(cl["imgsz_cls"]),
+        good_top1_conf_threshold=float(cl["good_top1_conf_threshold"]),
+        good_top1_retry_threshold=float(cl["good_top1_retry_threshold"]),
+        haocai_min_conf=float(cl["haocai_min_conf"]),
+        haocai_min_conf_retry=float(cl["haocai_min_conf_retry"]),
+        empty_cache_every=int(cl.get("empty_cache_every", 0)),
+        legacy_12_col_only=bool(outopt.get("legacy_12_col_only", True)),
+        merge_adjacent_tear=bool(tm.get("merge_adjacent_tear", False)),
+        tear_merge_weights=_rel(pack_root, tm["tear_merge_weights"])
+        if tm.get("tear_merge_weights")
+        else None,
+        tear_merge_class=str(tm.get("tear_merge_class", "tearing")),
+        tear_merge_head_sec=float(tm.get("tear_merge_head_sec", 3.0)),
+        tear_merge_prob=float(tm.get("tear_merge_prob", 0.9)),
+        tear_merge_min_frames=int(tm.get("tear_merge_min_frames", 6)),
+        tear_merge_verbose=bool(tm.get("tear_merge_verbose", False)),
+        tear_merge_full_frame=bool(tm.get("tear_merge_full_frame", False)),
+        gap_merge_enabled=bool(gm.get("enabled", False)),
+        gap_merge_max_gap_sec=float(gm.get("max_gap_sec", 2.0)),
+        doctor_identity_enabled=bool(did.get("enabled", True)),
+        doctor_identity_checkpoint=_rel(pack_root, doctor_ckpt_raw),
+        doctor_identity_labels_csv=_rel(pack_root, doctor_labels_raw),
+        doctor_identity_pose_min_detection_confidence=float(
+            did.get("pose_min_detection_confidence", 0.3)
+        ),
+        doctor_identity_min_identity_confidence=float(did.get("min_identity_confidence", 0.0)),
+        doctor_identity_middle_seconds=float(did.get("middle_seconds", 10.0)),
+        doctor_identity_sample_fps=float(did.get("sample_fps", 3.0)),
+        doctor_identity_pad_frac=float(did.get("pad_frac", 0.15)),
+        basket_det_conf=float(bk.get("det_conf", p2["det_conf"])),
+        basket_contact_iou_threshold=legacy_contact_iou,
+        basket_contact_iou_on=basket_contact_iou_on,
+        basket_contact_iou_off=basket_contact_iou_off,
+        basket_confirm_seconds=float(bk.get("confirm_seconds", 0.4)),
+        basket_cooldown_seconds=float(bk.get("cooldown_seconds", 5.0)),
+        basket_segment_start_offset_sec=float(bk.get("segment_start_offset_sec", 1.0)),
+        basket_segment_end_offset_sec=float(bk.get("segment_end_offset_sec", 5.0)),
+        basket_min_segment_sec=float(bk.get("min_segment_sec", 4.0)),
+        basket_scan_frame_stride=int(bk.get("scan_frame_stride", 1)),
+        basket_roi_frame=basket_roi_frame,
+        basket_save_roi_json=_rel(pack_root, basket_save_raw) if basket_save_raw else None,
+        basket_load_roi_json=_rel(pack_root, basket_load_raw) if basket_load_raw else None,
+        basket_skip_roi_select=bool(bk.get("skip_roi_select", False)),
+        basket_roi_backend=str(bk.get("roi_backend", "tkinter")),
+        stream_rtsp=st.get("rtsp"),
+        stream_ring_buffer_sec=float(st.get("ring_buffer_sec", 10.0)),
+        stream_fps=float(st.get("fps", 25.0)),
+        stream_cache_max_width=int(st.get("cache_max_width", 1920)),
+        stream_jpeg_quality=int(st.get("jpeg_quality", 85)),
+        stream_segment_start_offset_sec=float(
+            st.get("segment_start_offset_sec", bk.get("segment_start_offset_sec", 1.0))
+        ),
+        stream_segment_end_offset_sec=float(
+            st.get("segment_end_offset_sec", bk.get("segment_end_offset_sec", 6.0))
+        ),
+        stream_min_segment_sec=float(
+            st.get("min_segment_sec", bk.get("min_segment_sec", 4.0))
+        ),
+        stream_infer_source=str(st.get("infer_source", "file")).strip().lower(),
+        stream_infer_fallback=str(st.get("infer_fallback", "cache")).strip().lower(),
+    )
diff --git a/src/excel_segments.py b/src/excel_segments.py
new file mode 100644
index 0000000..a5f8bdd
--- /dev/null
+++ b/src/excel_segments.py
@@ -0,0 +1,154 @@
+"""从 Excel 时间段列加载段列表，供 debug 主流程替代 ActionFormer。"""
+from __future__ import annotations
+
+import re
+from pathlib import Path
+from typing import List, Tuple
+
+import cv2
+import pandas as pd
+
+from pack_utils import log
+
+
+def parse_mm_ss_to_seconds(value: str) -> float:
+    text = str(value).strip()
+    if not text:
+        raise ValueError("empty time value")
+    if "." in text:
+        left, right = text.split(".", 1)
+        minutes = int(left) if left else 0
+        seconds = int(right) if right else 0
+        if seconds >= 60:
+            raise ValueError(f"invalid mm.ss seconds >= 60: {text}")
+        return float(minutes * 60 + seconds)
+    return float(int(text))
+
+
+def _is_legacy_mm_dot_ss(token: str) -> bool:
+    if "." not in token:
+        return False
+    a, b = token.split(".", 1)
+    if not a.isdigit() or not b.isdigit():
+        return False
+    return 1 <= len(b) <= 2
+
+
+def parse_time_token(t: str) -> float:
+    t = str(t).strip().replace("：", ":")
+    if not t:
+        raise ValueError("empty token")
+    if ":" in t:
+        parts = [float(x) for x in t.split(":")]
+        if len(parts) == 3:
+            return parts[0] * 3600.0 + parts[1] * 60.0 + parts[2]
+        if len(parts) == 2:
+            return parts[0] * 60.0 + parts[1]
+        raise ValueError(f"bad colon time: {t}")
+    if _is_legacy_mm_dot_ss(t):
+        return parse_mm_ss_to_seconds(t)
+    return float(t)
+
+
+def parse_cell_to_segments_v2(cell: object) -> List[Tuple[float, float]]:
+    """解析单元格内多段「开始-结束」（冒号 / 分.秒 / 纯秒）。"""
+    if cell is None or (isinstance(cell, float) and pd.isna(cell)):
+        return []
+    text = str(cell).strip()
+    if not text:
+        return []
+    text = (
+        text.replace("；", ";")
+        .replace("，", ",")
+        .replace("、", ",")
+        .replace("\n", ";")
+        .replace("：", ":")
+        .replace(" ", "")
+    )
+    chunks = re.split(r"[;,]+", text)
+    segments: List[Tuple[float, float]] = []
+    for ch in chunks:
+        if not ch:
+            continue
+        m = re.match(r"^(.+?)\-(.+)$", ch)
+        if not m:
+            continue
+        left, right = m.group(1), m.group(2)
+        try:
+            s = parse_time_token(left)
+            e = parse_time_token(right)
+        except (ValueError, TypeError):
+            continue
+        if e > s:
+            segments.append((s, e))
+    return segments
+
+
+def _video_duration_sec(video_path: Path | None) -> float | None:
+    if video_path is None:
+        return None
+    cap = cv2.VideoCapture(str(video_path))
+    if not cap.isOpened():
+        return None
+    fps = float(cap.get(cv2.CAP_PROP_FPS)) or 0.0
+    nfr = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    cap.release()
+    if fps > 0 and nfr > 0:
+        return nfr / fps
+    return None
+
+
+def load_segments_from_excel_column_i(
+    excel_path: Path,
+    *,
+    col_index: int = 8,
+    sheet_name: int | str = 0,
+    video_path: Path | None = None,
+    default_score: float = 1.0,
+) -> list[tuple[float, float, float]]:
+    """
+    从 Excel 指定列（默认 I 列 index=8）汇总所有行的时间段，返回 (start, end, score)。
+    """
+    excel_path = excel_path.resolve()
+    df = pd.read_excel(excel_path, sheet_name=sheet_name, header=0)
+
+    if df.shape[1] > col_index:
+        time_series = df.iloc[:, col_index]
+        time_col_name = str(df.columns[col_index])
+    else:
+        cand_cols = [c for c in df.columns if "时间段" in str(c)]
+        if not cand_cols:
+            raise ValueError(
+                f"Excel 列数不足且未找到含「时间段」的列: {excel_path} (cols={df.shape[1]})"
+            )
+        time_col_name = str(cand_cols[0])
+        time_series = df[time_col_name]
+
+    duration = _video_duration_sec(video_path)
+    raw_pairs: list[tuple[float, float]] = []
+    invalid_cnt = 0
+
+    for cell in time_series.tolist():
+        segs = parse_cell_to_segments_v2(cell)
+        for s, e in segs:
+            cs, ce = s, e
+            if duration is not None:
+                cs = max(0.0, min(s, duration))
+                ce = max(0.0, min(e, duration))
+            if ce <= cs:
+                invalid_cnt += 1
+                continue
+            raw_pairs.append((cs, ce))
+
+    raw_pairs.sort(key=lambda x: (x[0], x[1]))
+    segs_out = [(s, e, float(default_score)) for s, e in raw_pairs]
+
+    log(
+        f"[debug] Excel 时间段列「{time_col_name}」(index={col_index}) "
+        f"→ {len(segs_out)} 段"
+        + (f"，丢弃无效 {invalid_cnt} 段" if invalid_cnt else "")
+    )
+    if duration is not None:
+        log(f"[debug] 视频时长 {duration:.3f}s，段已裁剪到 [0, duration]")
+
+    return segs_out
diff --git a/src/orchestrator.py b/src/orchestrator.py
new file mode 100644
index 0000000..dc25b8b
--- /dev/null
+++ b/src/orchestrator.py
@@ -0,0 +1,532 @@
+"""主流程编排：与仓库 main_pipeline.PipelineManager 逻辑一致，参数来自 YAML（SimpleNamespace）。"""
+from __future__ import annotations
+
+import importlib.util
+import tempfile
+from argparse import Namespace
+from pathlib import Path
+from typing import Any
+
+import cv2
+import run_haocai_actionformer_consumables_e2e as e2e
+from actionformer_utils import ActionSegmenter
+from excel_segments import load_segments_from_excel_column_i
+from pipeline.hand_roi_merge import HandMergeConfig, HandRoiGrouper
+from pipeline.segment_processor import (
+    HaocaiOnlyClassifier,
+    process_segment_haocai_from_cap_with_gate_retries,
+)
+from pipeline.gap_adjacent_merge import merge_all_by_gap
+from pipeline.tear_gate_merge import (
+    merge_all,
+    parse_e2e_rows_from_body_lines,
+    tear_class_index,
+)
+from run_segments_consumable_vote import pad_box_bottom_only as _pad_box
+from ultralytics import YOLO
+
+from basket_segmenter import build_segments_from_basket
+from pack_utils import load_allowed_names_from_excel, log, resolve_allowed_class_idx
+from stream_orchestrator import _haocai_infer_kwargs
+
+
+def _load_doctor_module(script_path: Path) -> Any:
+    spec = importlib.util.spec_from_file_location("doctor_identity_runtime", script_path)
+    if spec is None or spec.loader is None:
+        raise RuntimeError(f"无法加载医生识别脚本: {script_path}")
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module
+
+
+def _infer_doctor_text(args: Namespace, video_path: Path) -> str:
+    if not bool(getattr(args, "doctor_identity_enabled", True)):
+        return "未启用"
+
+    checkpoint = Path(args.doctor_identity_checkpoint).resolve()
+    labels_csv = Path(args.doctor_identity_labels_csv).resolve()
+    if not checkpoint.is_file():
+        return f"识别失败（缺少权重: {checkpoint}）"
+    if not labels_csv.is_file():
+        return f"识别失败（缺少标签映射: {labels_csv}）"
+
+    pack_root = Path(__file__).resolve().parent.parent
+    script_path = pack_root / "doctor_identity_package" / "infer_doctor_from_video.py"
+    if not script_path.is_file():
+        return f"识别失败（缺少脚本: {script_path}）"
+
+    try:
+        doctor_mod = _load_doctor_module(script_path)
+        model_path = doctor_mod._ensure_pose_lite_model(script_path.parent / ".mediapipe_models")
+        opts = doctor_mod.PoseLandmarkerOptions(
+            base_options=doctor_mod.BaseOptions(model_asset_path=str(model_path)),
+            running_mode=doctor_mod.VisionRunningMode.IMAGE,
+            min_pose_detection_confidence=float(
+                args.doctor_identity_pose_min_detection_confidence
+            ),
+        )
+        landmarker = doctor_mod.PoseLandmarker.create_from_options(opts)
+        try:
+            best_crop = doctor_mod.pick_best_person_crop(
+                video_path=video_path,
+                landmarker=landmarker,
+                middle_seconds=float(args.doctor_identity_middle_seconds),
+                sample_fps=float(args.doctor_identity_sample_fps),
+                pad_frac=float(args.doctor_identity_pad_frac),
+            )
+        finally:
+            landmarker.close()
+
+        raw_pid, conf = doctor_mod.run_inference(best_crop, checkpoint)
+        min_conf = float(args.doctor_identity_min_identity_confidence)
+        name_map = doctor_mod.load_name_mapping(labels_csv)
+        doctor_name = name_map.get(str(raw_pid), "")
+        suffix = " [低置信度]" if conf < min_conf else ""
+        if doctor_name:
+            return f"{doctor_name} (id={raw_pid}, conf={conf:.4f}){suffix}"
+        return f"doctor_id={raw_pid} (conf={conf:.4f}){suffix}"
+    except Exception as exc:  # noqa: BLE001
+        return f"识别失败（{exc}）"
+
+
+def _resolve_allowed_names(args: Namespace, excel_path: Path) -> list[str] | None:
+    if not getattr(args, "use_whitelist", True):
+        return []
+    if args.whitelist_json is not None:
+        if not args.whitelist_json.is_file():
+            log(f"找不到白名单 JSON: {args.whitelist_json}")
+            return None
+        return e2e.load_whitelist_json(args.whitelist_json.resolve())
+    return load_allowed_names_from_excel(excel_path)
+
+
+def _validate_phase2_weights(args: Namespace, *, require_actionformer: bool) -> bool:
+    checks: list[tuple[Any, str]] = [
+        (args.hand_model, "手部检测"),
+        (args.goodbad_model, "好坏帧"),
+        (args.haocai_model, "耗材分类"),
+    ]
+    if require_actionformer:
+        checks.insert(0, (args.actionformer_ckpt, "ActionFormer ckpt"))
+    if getattr(args, "merge_adjacent_tear", False):
+        checks.append((args.tear_model, "撕膜分类"))
+    for p, lab in checks:
+        if p is None or not Path(p).is_file():
+            log(f"缺少{lab}: {p}")
+            return False
+    if args.merge_adjacent_tear:
+        tmw = (args.tear_merge_weights or args.tear_model).resolve()
+        if not tmw.is_file():
+            log(f"撕膜门控需要权重文件: {tmw}")
+            return False
+    return True
+
+
+def _filter_segments_by_min_length(
+    segs: list[tuple[float, float, float]], min_seg_seconds: float
+) -> list[tuple[float, float, float]]:
+    if min_seg_seconds <= 0:
+        return segs
+    return [(s, e, sc) for s, e, sc in segs if (e - s) >= min_seg_seconds - 1e-9]
+
+
+class PipelineManager:
+    def __init__(self, args: Namespace) -> None:
+        self.args = args
+
+    def run(self) -> int:
+        args = self.args
+        video_path = args.video.resolve()
+        if not video_path.is_file():
+            log(f"找不到视频: {video_path}")
+            return 1
+        excel_path = args.excel.resolve()
+        if not excel_path.is_file():
+            log(f"找不到 Excel: {excel_path}")
+            return 1
+
+        allowed_names = _resolve_allowed_names(args, excel_path)
+        if allowed_names is None:
+            return 1
+        if not _validate_phase2_weights(args, require_actionformer=True):
+            return 1
+
+        stem = video_path.stem
+        tmp_ctx: tempfile.TemporaryDirectory | None = None
+        if args.work_dir is not None:
+            work = Path(args.work_dir).resolve()
+            work.mkdir(parents=True, exist_ok=True)
+        elif args.keep_work_dir:
+            work = Path(tempfile.mkdtemp(prefix="main_pipeline_"))
+            log(f"工作目录（保留）: {work}")
+        else:
+            tmp_ctx = tempfile.TemporaryDirectory(prefix="main_pipeline_")
+            work = Path(tmp_ctx.name)
+
+        try:
+            product_map = e2e.load_product_code_map(excel_path)
+            segs = ActionSegmenter.build_segments(
+                video_path=video_path,
+                stem=stem,
+                work=work,
+                actionformer_ckpt=args.actionformer_ckpt,
+                af_min_score=args.af_min_score,
+                af_min_seg_seconds=args.af_min_seg_seconds,
+                python_exe=args.python,
+                feat_batch_size=args.feat_batch_size,
+                device=args.device,
+            )
+            return self._run_phase2_and_write(
+                segs,
+                video_path=video_path,
+                excel_path=excel_path,
+                allowed_names=allowed_names,
+                product_map=product_map,
+                work_dir_log=work if args.work_dir is not None or args.keep_work_dir else None,
+            )
+        finally:
+            if tmp_ctx is not None:
+                tmp_ctx.cleanup()
+
+    def _run_phase2_and_write(
+        self,
+        segs: list[tuple[float, float, float]],
+        *,
+        video_path: Path,
+        excel_path: Path,
+        allowed_names: list[str],
+        product_map: dict[str, str],
+        work_dir_log: Path | None = None,
+    ) -> int:
+        args = self.args
+
+        predict_kw: dict[str, Any] = {"device": args.device}
+        if args.half:
+            predict_kw["half"] = True
+
+        log("Phase2：加载 YOLO（手 / 好坏帧 / 耗材）…")
+        det = YOLO(str(args.hand_model))
+        gb = YOLO(str(args.goodbad_model))
+        cls_m = YOLO(str(args.haocai_model))
+
+        cls_names = cls_m.names
+        hc = HaocaiOnlyClassifier(
+            cls_m,
+            cls_names=cls_names,
+            imgsz_cls=int(args.imgsz_cls),
+            predict_kw=predict_kw,
+            gb=gb,
+            gb_names=gb.names,
+        )
+        infer_kw = _haocai_infer_kwargs(args, cls_names, None, predict_kw)
+
+        try:
+            allowed_idx = resolve_allowed_class_idx(args, excel_path, cls_names)
+        except FileNotFoundError as exc:
+            log(str(exc))
+            return 1
+        infer_kw["allowed_class_idx"] = allowed_idx
+        if getattr(args, "use_whitelist", True):
+            log(f"白名单启用，{len(allowed_idx or ())} 个类参与投票")
+        else:
+            log("白名单已关闭，使用全 41 类")
+
+        cap = cv2.VideoCapture(str(video_path))
+        if not cap.isOpened():
+            log("无法打开视频")
+            return 1
+
+        sep = "\t"
+        base_cols = [
+            "rank",
+            "start_sec",
+            "end_sec",
+            "product_id_top1",
+            "top1_name",
+            "top1_conf",
+            "product_id_top2",
+            "top2_name",
+            "top2_conf",
+            "product_id_top3",
+            "top3_name",
+            "top3_conf",
+        ]
+        ext_cols = ["tear_top1_name", "tear_top2_name"]
+        header = sep.join(base_cols if args.legacy_12_col_only else base_cols + ext_cols)
+        lines_out = [header]
+        span_to_cells: dict[tuple[float, float], list[str]] = {}
+        span_to_pairs: dict[tuple[float, float], list[tuple[str, float]]] = {}
+
+        def span_key(t0: float, t1: float) -> tuple[float, float]:
+            return (round(float(t0), 6), round(float(t1), 6))
+
+        def infer_one(rank: int, t0: float, t1: float) -> str:
+            info = process_segment_haocai_from_cap_with_gate_retries(
+                cap,
+                det,
+                hc,
+                start_sec=t0,
+                end_sec=t1,
+                seek_margin_sec=float(args.seek_margin_sec),
+                log_fn=log,
+                log_prefix=f"段落 rank={rank}: ",
+                **infer_kw,
+            )
+            if not info.get("ok"):
+                reason = str(info.get("reason", ""))
+                span_to_pairs[span_key(t0, t1)] = []
+                row = [
+                    str(rank),
+                    f"{t0:.6f}",
+                    f"{t1:.6f}",
+                    "",
+                    reason,
+                    "",
+                    "",
+                    "",
+                    "",
+                    "",
+                    "",
+                    "",
+                ]
+                if not args.legacy_12_col_only:
+                    row.extend(["", ""])
+                span_to_cells[span_key(t0, t1)] = row[1:]
+                return sep.join(row)
+
+            n1, n2, n3 = info["top_names"]
+            c1, c2, c3 = info["top_confs"]
+            id1 = product_map.get(n1, "") if n1 else ""
+            id2 = product_map.get(n2, "") if n2 else ""
+            id3 = product_map.get(n3, "") if n3 else ""
+            for nm, pid in ((n1, id1), (n2, id2), (n3, id3)):
+                if nm and not pid:
+                    log(f"警告: 商品表无名称「{nm}」，产品编码置空。")
+
+            row = [
+                str(rank),
+                f"{t0:.6f}",
+                f"{t1:.6f}",
+                id1,
+                n1,
+                f"{c1:.6f}" if n1 else "",
+                id2,
+                n2,
+                f"{c2:.6f}" if n2 else "",
+                id3,
+                n3,
+                f"{c3:.6f}" if n3 else "",
+            ]
+            if not args.legacy_12_col_only:
+                row.extend(["", ""])
+            span_to_cells[span_key(t0, t1)] = row[1:]
+            span_to_pairs[span_key(t0, t1)] = list(info.get("pairs") or [])
+            return sep.join(row)
+
+        try:
+            for rank, (t0, t1, af_sc) in enumerate(segs, start=1):
+                log(f"段落 rank={rank} [{t0:.3f},{t1:.3f}] score={af_sc:.4f} …")
+                lines_out.append(infer_one(rank, t0, t1))
+
+            if args.merge_adjacent_tear:
+                log("撕膜门控：合并相邻同 top1 成功段…")
+                if args.tear_model is None or not Path(args.tear_model).is_file():
+                    log(f"缺少撕膜分类权重，跳过 tear_merge: {args.tear_model}")
+                else:
+                    tw_path = (args.tear_merge_weights or args.tear_model).resolve()
+                    tear_gate_m = YOLO(str(tw_path))
+                    tidx = tear_class_index(tear_gate_m, args.tear_merge_class)
+                    merge_cfg = HandMergeConfig(
+                        merge_iou_gt=args.merge_iou_gt,
+                        merge_center_dist_max_px=args.merge_center_dist_max_px,
+                        merge_center_dist_max_frac_diag=args.merge_center_dist_max_frac_diag,
+                    )
+                    grouper = HandRoiGrouper(
+                        merge_cfg, pad_box_fn=_pad_box, pad_ratio=args.pad_ratio
+                    )
+                    body_lines = lines_out[1:]
+                    e2e_rows = parse_e2e_rows_from_body_lines(body_lines)
+                    mg_det = det if not args.tear_merge_full_frame else None
+                    mg_grouper = grouper if not args.tear_merge_full_frame else None
+                    merged_rows = merge_all(
+                        e2e_rows,
+                        cap,
+                        tear_gate_m,
+                        tidx,
+                        head_sec=float(args.tear_merge_head_sec),
+                        tear_prob=float(args.tear_merge_prob),
+                        tear_min_frames=int(args.tear_merge_min_frames),
+                        imgsz=int(args.imgsz_cls),
+                        predict_kw=predict_kw,
+                        verbose=bool(args.tear_merge_verbose),
+                        det=mg_det,
+                        grouper=mg_grouper,
+                        imgsz_det=int(args.imgsz_det),
+                        det_conf=float(args.det_conf),
+                    )
+                    lines_out = [header]
+                    for j, er in enumerate(merged_rows, start=1):
+                        sk = span_key(er.start_sec, er.end_sec)
+                        if sk in span_to_cells:
+                            lines_out.append(sep.join([str(j)] + span_to_cells[sk]))
+                        else:
+                            log(
+                                f"[tear_merge] 合并窗段全量重推理 rank={j} "
+                                f"[{er.start_sec:.3f},{er.end_sec:.3f}]"
+                            )
+                            lines_out.append(infer_one(j, er.start_sec, er.end_sec))
+
+            if getattr(args, "gap_merge_enabled", False):
+                log("相邻 gap 合并…")
+                body_lines = lines_out[1:]
+                e2e_rows = parse_e2e_rows_from_body_lines(body_lines)
+                gap_merged = merge_all_by_gap(
+                    e2e_rows,
+                    span_to_pairs,
+                    product_map,
+                    max_gap_sec=float(args.gap_merge_max_gap_sec),
+                    log_fn=log,
+                )
+                lines_out = [header]
+                for er in gap_merged:
+                    lines_out.append(er.to_line12(er.rank))
+        finally:
+            cap.release()
+
+        log("医生识别：开始执行…")
+        doctor_text = _infer_doctor_text(args, video_path)
+        log(f"医生识别：{doctor_text}")
+        lines_out.append(f"医生信息：{doctor_text}")
+
+        args.out.parent.mkdir(parents=True, exist_ok=True)
+        args.out.write_text("\n".join(lines_out) + "\n", encoding="utf-8")
+        log(f"已写出: {args.out.resolve()}")
+        if work_dir_log is not None:
+            log(f"工作目录: {work_dir_log}")
+
+        return 0
+
+
+class DebugPipelineManager(PipelineManager):
+    """跳过 ActionFormer，用 Excel 时间段列作为段列表。"""
+
+    def run(self) -> int:
+        args = self.args
+        video_path = args.video.resolve()
+        if not video_path.is_file():
+            log(f"找不到视频: {video_path}")
+            return 1
+        excel_path = args.excel.resolve()
+        if not excel_path.is_file():
+            log(f"找不到 Excel: {excel_path}")
+            return 1
+
+        log("[debug] 使用 Excel 时间段，跳过 ActionFormer")
+        args.merge_adjacent_tear = False
+        log("[debug] 跳过撕膜相邻段合并（merge_adjacent_tear=false）")
+
+        allowed_names = _resolve_allowed_names(args, excel_path)
+        if allowed_names is None:
+            return 1
+        if not _validate_phase2_weights(args, require_actionformer=False):
+            return 1
+
+        col_index = int(getattr(args, "excel_time_col_index", 8))
+        segs = load_segments_from_excel_column_i(
+            excel_path,
+            col_index=col_index,
+            video_path=video_path,
+        )
+        if not segs:
+            log("Excel 未解析到任何有效时间段")
+            return 1
+
+        min_seg = float(getattr(args, "af_min_seg_seconds", 0.0))
+        segs = _filter_segments_by_min_length(segs, min_seg)
+        if not segs:
+            log(f"最短段过滤（>={min_seg:g}s）后无剩余段")
+            return 1
+
+        product_map = e2e.load_product_code_map(excel_path)
+        return self._run_phase2_and_write(
+            segs,
+            video_path=video_path,
+            excel_path=excel_path,
+            allowed_names=allowed_names,
+            product_map=product_map,
+        )
+
+
+class BasketPipelineManager(PipelineManager):
+    """跳过 ActionFormer：OpenCV 框选篮子 + 手篮接触上升沿 → 固定窗口段。"""
+
+    def run(self) -> int:
+        args = self.args
+        video_path = args.video.resolve()
+        if not video_path.is_file():
+            log(f"找不到视频: {video_path}")
+            return 1
+        excel_path = args.excel.resolve()
+        if not excel_path.is_file():
+            log(f"找不到 Excel: {excel_path}")
+            return 1
+
+        log("[basket] 使用篮子接触分段，跳过 ActionFormer")
+        args.merge_adjacent_tear = False
+        log("[basket] 跳过撕膜相邻段合并（merge_adjacent_tear=false）")
+
+        allowed_names = _resolve_allowed_names(args, excel_path)
+        if allowed_names is None:
+            return 1
+        if not _validate_phase2_weights(args, require_actionformer=False):
+            return 1
+
+        save_json = getattr(args, "basket_save_roi_json", None)
+
+        segs, _roi = build_segments_from_basket(
+            video_path,
+            Path(args.hand_model),
+            basket_roi_json=None,
+            save_roi_json=Path(save_json) if save_json else None,
+            skip_roi_select=False,
+            roi_frame=getattr(args, "basket_roi_frame", "middle"),
+            roi_backend=str(getattr(args, "basket_roi_backend", "tkinter")),
+            contact_iou_threshold=float(getattr(args, "basket_contact_iou_threshold", 0.05)),
+            contact_iou_on=float(getattr(args, "basket_contact_iou_on", 0.08)),
+            contact_iou_off=float(getattr(args, "basket_contact_iou_off", 0.03)),
+            confirm_seconds=float(getattr(args, "basket_confirm_seconds", 0.4)),
+            cooldown_seconds=float(getattr(args, "basket_cooldown_seconds", 5.0)),
+            segment_start_offset_sec=float(getattr(args, "basket_segment_start_offset_sec", 1.0)),
+            segment_end_offset_sec=float(getattr(args, "basket_segment_end_offset_sec", 5.0)),
+            min_segment_sec=float(getattr(args, "basket_min_segment_sec", 4.0)),
+            scan_frame_stride=int(getattr(args, "basket_scan_frame_stride", 1)),
+            det_conf=float(getattr(args, "basket_det_conf", args.det_conf)),
+            imgsz_det=int(args.imgsz_det),
+            device=str(args.device),
+            half=bool(args.half),
+            log_fn=log,
+        )
+        if not segs:
+            log("未检测到任何手篮接触上升沿，退出")
+            return 1
+
+        product_map = e2e.load_product_code_map(excel_path)
+        return self._run_phase2_and_write(
+            segs,
+            video_path=video_path,
+            excel_path=excel_path,
+            allowed_names=allowed_names,
+            product_map=product_map,
+        )
+
+
+def run_pipeline(args: Namespace) -> int:
+    return PipelineManager(args).run()
+
+
+def run_debug_pipeline(args: Namespace) -> int:
+    return DebugPipelineManager(args).run()
+
+
+def run_basket_pipeline(args: Namespace) -> int:
+    return BasketPipelineManager(args).run()
diff --git a/src/pack_utils.py b/src/pack_utils.py
new file mode 100644
index 0000000..91feec5
--- /dev/null
+++ b/src/pack_utils.py
@@ -0,0 +1,50 @@
+from __future__ import annotations
+
+import time
+from argparse import Namespace
+from pathlib import Path
+
+
+def log(msg: str) -> None:
+    print(f"[{time.strftime('%H:%M:%S')}] {msg}", flush=True)
+
+
+def resolve_allowed_class_idx(
+    args: Namespace,
+    excel_path: Path,
+    cls_names: dict,
+) -> frozenset[int] | None:
+    """None 表示不裁剪类别（全类参与投票）。"""
+    if not getattr(args, "use_whitelist", True):
+        return None
+    import run_haocai_actionformer_consumables_e2e as e2e
+
+    if args.whitelist_json is not None:
+        wpath = Path(args.whitelist_json)
+        if not wpath.is_file():
+            raise FileNotFoundError(f"找不到白名单 JSON: {wpath}")
+        allowed_names = e2e.load_whitelist_json(wpath.resolve())
+    else:
+        allowed_names = load_allowed_names_from_excel(excel_path)
+    return e2e.allowed_indices_from_json_names(allowed_names, cls_names)
+
+
+def load_allowed_names_from_excel(excel_path: Path) -> list[str]:
+    import pandas as pd
+
+    df = pd.read_excel(excel_path, sheet_name=0, header=0)
+    if df.shape[1] < 3:
+        raise ValueError(f"Excel 至少需要 C 列（第 3 列）: {excel_path}")
+    col = df.iloc[:, 2]
+    names: list[str] = []
+    seen: set[str] = set()
+    for raw in col:
+        if pd.isna(raw):
+            continue
+        s = str(raw).strip()
+        if not s or s == "商品名称":
+            continue
+        if s not in seen:
+            seen.add(s)
+            names.append(s)
+    return names
diff --git a/src/paths.py b/src/paths.py
new file mode 100644
index 0000000..122866d
--- /dev/null
+++ b/src/paths.py
@@ -0,0 +1,23 @@
+"""pack/5.11：将 vendor code 根目录加入 sys.path（顺序与 main_pipeline 一致）。"""
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+
+def ensure_code_on_path(pack_root: Path) -> Path:
+    """
+    pack_root: pack/5.11 根目录。
+    返回 CODE_ROOT（即 pack_root / 'code'）。
+    """
+    code = (pack_root / "code").resolve()
+    if not (code / "repo_root.py").is_file():
+        raise FileNotFoundError(f"缺少 vendor code 根: {code}")
+
+    scripts = code / "video_clip_cls" / "scripts"
+    infer = code / "video_clip_cls" / "infer_single_0506"
+    for p in (infer, scripts, code):
+        s = str(p)
+        if s not in sys.path:
+            sys.path.insert(0, s)
+    return code
diff --git a/src/segments_offline_orchestrator.py b/src/segments_offline_orchestrator.py
new file mode 100644
index 0000000..a6a58e7
--- /dev/null
+++ b/src/segments_offline_orchestrator.py
@@ -0,0 +1,159 @@
+"""按 TSV 时间段对离线视频做手检 → 好帧门控 → 耗材识别（无分段、无撕膜）。"""
+from __future__ import annotations
+
+import gc
+from argparse import Namespace
+from pathlib import Path
+from typing import Any
+
+import cv2
+import run_haocai_actionformer_consumables_e2e as e2e
+from pipeline.segment_processor import (
+    HaocaiOnlyClassifier,
+    process_segment_haocai_from_cap_with_gate_retries,
+)
+from ultralytics import YOLO
+
+from pack_utils import log, resolve_allowed_class_idx
+from stream_orchestrator import (
+    _format_result_row,
+    _maybe_free_gpu,
+    _resolve_haocai_min_conf_retry,
+)
+from tsv_segments import load_segments_from_result_tsv
+
+
+def _validate_haocai_weights(args: Namespace) -> bool:
+    for p, lab in (
+        (args.hand_model, "手部检测"),
+        (args.goodbad_model, "好坏帧"),
+        (args.haocai_model, "耗材分类"),
+    ):
+        if not Path(p).is_file():
+            log(f"缺少{lab}: {p}")
+            return False
+    return True
+
+
+def run_segments_offline_pipeline(args: Namespace) -> int:
+    video_path = Path(args.video).resolve()
+    if not video_path.is_file():
+        log(f"找不到视频: {video_path}")
+        return 1
+
+    excel_path = Path(args.excel).resolve()
+    if not excel_path.is_file():
+        log(f"找不到 Excel: {excel_path}")
+        return 1
+
+    tsv_path = Path(args.segments_tsv).resolve()
+    if not tsv_path.is_file():
+        log(f"找不到时间段 TSV: {tsv_path}")
+        return 1
+
+    if not _validate_haocai_weights(args):
+        return 1
+
+    segs = load_segments_from_result_tsv(
+        tsv_path,
+        skip_empty_top1=bool(getattr(args, "segments_skip_empty", False)),
+    )
+    if not segs:
+        log("TSV 未解析到任何有效时间段")
+        return 1
+
+    product_map = e2e.load_product_code_map(excel_path)
+    out_path = Path(args.out).resolve()
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+
+    predict_kw: dict[str, Any] = {"device": args.device}
+    if args.half:
+        predict_kw["half"] = True
+
+    log("[segments-offline] 加载 YOLO（手 / 好坏帧 / 耗材）…")
+    det = YOLO(str(args.hand_model))
+    gb = YOLO(str(args.goodbad_model))
+    cls_m = YOLO(str(args.haocai_model))
+    hc = HaocaiOnlyClassifier(
+        cls_m,
+        cls_names=cls_m.names,
+        imgsz_cls=int(args.imgsz_cls),
+        predict_kw=predict_kw,
+        gb=gb,
+        gb_names=gb.names,
+    )
+    try:
+        allowed_idx = resolve_allowed_class_idx(args, excel_path, cls_m.names)
+    except FileNotFoundError as exc:
+        log(str(exc))
+        return 1
+    if getattr(args, "use_whitelist", True):
+        log(f"[segments-offline] 白名单启用，{len(allowed_idx or ())} 个类参与投票")
+    else:
+        log("[segments-offline] 白名单已关闭，使用全 41 类")
+
+    cap = cv2.VideoCapture(str(video_path))
+    if not cap.isOpened():
+        log("无法打开视频")
+        return 1
+
+    header = "\t".join(
+        [
+            "rank",
+            "start_sec",
+            "end_sec",
+            "product_id_top1",
+            "top1_name",
+            "top1_conf",
+            "product_id_top2",
+            "top2_name",
+            "top2_conf",
+            "product_id_top3",
+            "top3_name",
+            "top3_conf",
+        ]
+    )
+    lines_out = [header]
+
+    try:
+        for rank, (t0, t1, _sc) in enumerate(segs, start=1):
+            log(f"[segments-offline] rank={rank} [{t0:.3f},{t1:.3f}] …")
+            info = process_segment_haocai_from_cap_with_gate_retries(
+                cap,
+                det,
+                hc,
+                start_sec=t0,
+                end_sec=t1,
+                seek_margin_sec=float(args.seek_margin_sec),
+                det_conf=float(args.det_conf),
+                pad_ratio=float(args.pad_ratio),
+                imgsz_det=int(args.imgsz_det),
+                frame_stride=max(1, int(args.frame_stride)),
+                haocai_min_conf=float(args.haocai_min_conf),
+                haocai_min_conf_retry=_resolve_haocai_min_conf_retry(args),
+                good_top1_conf_threshold=float(args.good_top1_conf_threshold),
+                good_top1_retry_threshold=float(args.good_top1_retry_threshold),
+                cls_names=cls_m.names,
+                allowed_class_idx=allowed_idx,
+                predict_kw=predict_kw,
+                log_fn=log,
+                log_prefix=f"[segments-offline] rank={rank}: ",
+            )
+            lines_out.append(
+                _format_result_row(
+                    rank,
+                    t0,
+                    t1,
+                    info,
+                    product_map,
+                    legacy_12_col=bool(args.legacy_12_col_only),
+                )
+            )
+            _maybe_free_gpu()
+    finally:
+        cap.release()
+        gc.collect()
+
+    out_path.write_text("\n".join(lines_out) + "\n", encoding="utf-8")
+    log(f"[segments-offline] 完成，共 {len(segs)} 段，结果: {out_path}")
+    return 0
diff --git a/src/stream_basket_session.py b/src/stream_basket_session.py
new file mode 100644
index 0000000..e7cac85
--- /dev/null
+++ b/src/stream_basket_session.py
@@ -0,0 +1,149 @@
+"""推流篮子会话：逐帧手部检测 + ActionTriggerLogic + 待识别片段队列。"""
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any, Callable
+
+import numpy as np
+from ultralytics import YOLO
+
+from action_trigger_logic import ActionTriggerLogic
+from run_segments_consumable_vote import collect_hand_boxes
+from stream_frame_buffer import FrameRingBuffer
+
+
+@dataclass
+class CachedClip:
+    """收满窗口后可送耗材识别的片段（已解码帧，识别后应显式释放）。"""
+
+    contact_t: float
+    start_sec: float
+    end_sec: float
+    frames: list[tuple[float, np.ndarray]]
+
+    @property
+    def duration_sec(self) -> float:
+        return max(0.0, self.end_sec - self.start_sec)
+
+
+@dataclass
+class _PendingClip:
+    contact_t: float
+    start_sec: float
+    end_sec: float
+
+
+class StreamBasketSession:
+    """
+    每帧 push_frame：
+    1. 手部检测 + ActionTriggerLogic → 可选 start
+    2. 写入环形缓存
+    3. poll_ready_clips 返回已收满 [start+off0, start+off1] 的片段
+    """
+
+    def __init__(
+        self,
+        basket_xyxy: list[float],
+        hand_model: YOLO,
+        trigger: ActionTriggerLogic,
+        *,
+        segment_start_offset_sec: float = 1.0,
+        segment_end_offset_sec: float = 6.0,
+        min_segment_sec: float = 4.0,
+        ring_buffer_sec: float = 10.0,
+        fps: float = 25.0,
+        cache_max_width: int = 1920,
+        jpeg_quality: int = 85,
+        det_conf: float = 0.6,
+        imgsz_det: int = 640,
+        predict_kw: dict[str, Any] | None = None,
+        log_fn: Callable[[str], None] | None = None,
+    ) -> None:
+        self.basket_xyxy = [float(v) for v in basket_xyxy]
+        self.hand_model = hand_model
+        self.trigger = trigger
+        self.segment_start_offset = float(segment_start_offset_sec)
+        self.segment_end_offset = float(segment_end_offset_sec)
+        self.min_segment_sec = float(min_segment_sec)
+        self.det_conf = float(det_conf)
+        self.imgsz_det = int(imgsz_det)
+        self.predict_kw = dict(predict_kw or {})
+        self.log_fn = log_fn
+
+        self.buffer = FrameRingBuffer(
+            max_seconds=ring_buffer_sec,
+            fps=fps,
+            cache_max_width=cache_max_width,
+            jpeg_quality=jpeg_quality,
+        )
+        self._pending: list[_PendingClip] = []
+        self._current_t = 0.0
+
+    def push_frame(self, t_sec: float, frame: np.ndarray) -> float | None:
+        """处理一帧；若触发 start 返回 contact 时间戳。"""
+        t = float(t_sec)
+        self._current_t = t
+
+        r0 = self.hand_model.predict(
+            frame,
+            conf=self.det_conf,
+            imgsz=self.imgsz_det,
+            verbose=False,
+            **self.predict_kw,
+        )[0]
+        hands = collect_hand_boxes(self.hand_model, r0.boxes) if r0.boxes else []
+        start_t = self.trigger.process_frame(t, hands, self.basket_xyxy)
+
+        self.buffer.append(t, frame)
+        self.buffer.prune_before(t - self.buffer.max_seconds)
+
+        if start_t is not None:
+            contact = float(start_t)
+            seg0 = contact + self.segment_start_offset
+            seg1 = contact + self.segment_end_offset
+            self._pending.append(
+                _PendingClip(contact_t=contact, start_sec=seg0, end_sec=seg1)
+            )
+            if self.log_fn:
+                self.log_fn(f"[stream] 接触上升沿 t={contact:.3f}s → 窗口 [{seg0:.3f}, {seg1:.3f}]s")
+            return contact
+
+        return None
+
+    def poll_ready_clips(self) -> list[CachedClip]:
+        """返回当前时刻已收满窗口、且满足最小时长的片段。"""
+        ready: list[CachedClip] = []
+        still_pending: list[_PendingClip] = []
+
+        for pc in self._pending:
+            if self._current_t + 1e-6 < pc.end_sec:
+                still_pending.append(pc)
+                continue
+
+            frames = self.buffer.slice_decoded(pc.start_sec, pc.end_sec)
+            duration = pc.end_sec - pc.start_sec
+            if duration + 1e-9 < self.min_segment_sec:
+                if self.log_fn:
+                    self.log_fn(
+                        f"[stream] 丢弃短段 [{pc.start_sec:.3f},{pc.end_sec:.3f}] "
+                        f"时长 {duration:.3f}s < {self.min_segment_sec:g}s"
+                    )
+                continue
+            if not frames:
+                if self.log_fn:
+                    self.log_fn(
+                        f"[stream] 丢弃空段 [{pc.start_sec:.3f},{pc.end_sec:.3f}]（缓存无帧）"
+                    )
+                continue
+
+            ready.append(
+                CachedClip(
+                    contact_t=pc.contact_t,
+                    start_sec=pc.start_sec,
+                    end_sec=pc.end_sec,
+                    frames=frames,
+                )
+            )
+
+        self._pending = still_pending
+        return ready
diff --git a/src/stream_frame_buffer.py b/src/stream_frame_buffer.py
new file mode 100644
index 0000000..ac30823
--- /dev/null
+++ b/src/stream_frame_buffer.py
@@ -0,0 +1,99 @@
+"""推流帧环形缓存：JPEG 压缩存储，按时间戳截取片段。"""
+from __future__ import annotations
+
+from collections import deque
+from dataclasses import dataclass
+
+import cv2
+import numpy as np
+
+
+def encode_frame_for_cache(
+    frame: np.ndarray,
+    *,
+    max_width: int = 1920,
+    jpeg_quality: int = 85,
+) -> bytes:
+    """缩放到 max_width 以内后 JPEG 编码，显著降低 4K 推流内存占用。"""
+    img = frame
+    h, w = img.shape[:2]
+    mw = int(max_width)
+    if mw > 0 and w > mw:
+        scale = mw / float(w)
+        img = cv2.resize(
+            img,
+            (int(round(w * scale)), int(round(h * scale))),
+            interpolation=cv2.INTER_AREA,
+        )
+    ok, buf = cv2.imencode(".jpg", img, [int(cv2.IMWRITE_JPEG_QUALITY), int(jpeg_quality)])
+    if not ok:
+        raise RuntimeError("JPEG 编码失败")
+    return buf.tobytes()
+
+
+def decode_cached_frame(data: bytes) -> np.ndarray:
+    arr = np.frombuffer(data, dtype=np.uint8)
+    img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
+    if img is None:
+        raise RuntimeError("JPEG 解码失败")
+    return img
+
+
+@dataclass(frozen=True)
+class TimestampedFrame:
+    t_sec: float
+    frame_jpeg: bytes
+
+
+class FrameRingBuffer:
+    """
+    保留最近 max_seconds 内的 (t, jpeg)。
+    5s 窗口 @25fps 1280 宽 JPEG 约 25~50MB，远小于 4K 原始 BGR 数 GB。
+    """
+
+    def __init__(
+        self,
+        *,
+        max_seconds: float = 10.0,
+        fps: float = 25.0,
+        cache_max_width: int = 1280,
+        jpeg_quality: int = 85,
+    ) -> None:
+        self.max_seconds = max(1.0, float(max_seconds))
+        self.fps = max(1.0, float(fps))
+        self.cache_max_width = int(cache_max_width)
+        self.jpeg_quality = int(jpeg_quality)
+        cap = max(32, int(round(self.max_seconds * self.fps)) + 8)
+        self._items: deque[TimestampedFrame] = deque(maxlen=cap)
+        self._latest_t = 0.0
+
+    @property
+    def latest_t(self) -> float:
+        return self._latest_t
+
+    def append(self, t_sec: float, frame: np.ndarray) -> None:
+        t = float(t_sec)
+        self._latest_t = t
+        jpeg = encode_frame_for_cache(
+            frame,
+            max_width=self.cache_max_width,
+            jpeg_quality=self.jpeg_quality,
+        )
+        self._items.append(TimestampedFrame(t_sec=t, frame_jpeg=jpeg))
+
+    def prune_before(self, t_min: float) -> None:
+        cutoff = float(t_min)
+        while self._items and self._items[0].t_sec < cutoff - 1e-9:
+            self._items.popleft()
+
+    def slice_decoded(self, t0: float, t1: float) -> list[tuple[float, np.ndarray]]:
+        """返回 t0 <= t <= t1 的解码帧（按需解码，用完即弃）。"""
+        lo = float(t0)
+        hi = float(t1)
+        if hi < lo:
+            lo, hi = hi, lo
+        out: list[tuple[float, np.ndarray]] = []
+        for it in self._items:
+            if lo - 1e-6 <= it.t_sec <= hi + 1e-6:
+                out.append((it.t_sec, decode_cached_frame(it.frame_jpeg)))
+        return out
diff --git a/src/stream_orchestrator.py b/src/stream_orchestrator.py
new file mode 100644
index 0000000..1424b26
--- /dev/null
+++ b/src/stream_orchestrator.py
@@ -0,0 +1,462 @@
+"""RTSP 推流篮子耗材识别编排（无撕膜模型 / 无 tear_merge）。"""
+from __future__ import annotations
+
+import gc
+import time
+from argparse import Namespace
+from pathlib import Path
+from typing import Any
+
+import cv2
+import run_haocai_actionformer_consumables_e2e as e2e
+from action_trigger_logic import ActionTriggerLogic
+from pipeline.segment_processor import (
+    HaocaiOnlyClassifier,
+    process_segment_haocai_from_cap_with_gate_retries,
+    process_segment_haocai_from_frames_with_gate_retries,
+)
+from ultralytics import YOLO
+
+from basket_segmenter import (
+    _roi_xyxy_from_select,
+    _scale_frame_for_display,
+    _select_basket_roi_tkinter,
+    save_basket_roi_json,
+)
+from pack_utils import log, resolve_allowed_class_idx
+from stream_basket_session import CachedClip, StreamBasketSession
+
+
+def _validate_stream_weights(args: Namespace) -> bool:
+    for p, lab in (
+        (args.hand_model, "手部检测"),
+        (args.goodbad_model, "好坏帧"),
+        (args.haocai_model, "耗材分类"),
+    ):
+        if not Path(p).is_file():
+            log(f"缺少{lab}: {p}")
+            return False
+    return True
+
+
+def _resolve_basket_roi(
+    args: Namespace,
+    first_frame,
+    *,
+    t_sec: float = 0.0,
+) -> list[float]:
+    backend = str(getattr(args, "basket_roi_backend", "tkinter")).strip().lower()
+    if backend != "tkinter":
+        log(f"[stream] 推流框选暂仅支持 tkinter，当前 {backend!r} 将回退 tkinter")
+    disp, scale = _scale_frame_for_display(first_frame, 1920)
+    log("[stream] 请在弹窗中框选篮子 ROI…")
+    rx, ry, rw, rh = _select_basket_roi_tkinter(
+        disp, t_sec=t_sec, title="框选耗材篮子（推流）"
+    )
+    if scale != 1.0:
+        rx, ry, rw, rh = rx / scale, ry / scale, rw / scale, rh / scale
+    roi = _roi_xyxy_from_select(int(round(rx)), int(round(ry)), int(round(rw)), int(round(rh)))
+    log(f"[stream] 篮子 ROI xyxy={roi}")
+
+    save_json = getattr(args, "basket_save_roi_json", None)
+    if save_json is not None:
+        save_basket_roi_json(Path(save_json), roi)
+        log(f"[stream] ROI 已保存: {save_json}")
+    return roi
+
+
+def _format_result_row(
+    rank: int,
+    t0: float,
+    t1: float,
+    info: dict[str, Any],
+    product_map: dict[str, str],
+    *,
+    legacy_12_col: bool,
+) -> str:
+    sep = "\t"
+    if not info.get("ok"):
+        reason = str(info.get("reason", ""))
+        row = [
+            str(rank),
+            f"{t0:.6f}",
+            f"{t1:.6f}",
+            "",
+            reason,
+            "",
+            "",
+            "",
+            "",
+            "",
+            "",
+            "",
+        ]
+        if not legacy_12_col:
+            row.extend(["", ""])
+        return sep.join(row)
+
+    n1, n2, n3 = info["top_names"]
+    c1, c2, c3 = info["top_confs"]
+    id1 = product_map.get(n1, "") if n1 else ""
+    id2 = product_map.get(n2, "") if n2 else ""
+    id3 = product_map.get(n3, "") if n3 else ""
+    for nm, pid in ((n1, id1), (n2, id2), (n3, id3)):
+        if nm and not pid:
+            log(f"警告: 商品表无名称「{nm}」，产品编码置空。")
+
+    row = [
+        str(rank),
+        f"{t0:.6f}",
+        f"{t1:.6f}",
+        id1,
+        n1,
+        f"{c1:.6f}" if n1 else "",
+        id2,
+        n2,
+        f"{c2:.6f}" if n2 else "",
+        id3,
+        n3,
+        f"{c3:.6f}" if n3 else "",
+    ]
+    if not legacy_12_col:
+        row.extend(["", ""])
+    return sep.join(row)
+
+
+def _maybe_free_gpu() -> None:
+    gc.collect()
+    try:
+        import torch
+
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+    except ImportError:
+        pass
+
+
+def _resolve_haocai_min_conf_retry(args: Namespace) -> float | None:
+    h_retry = getattr(args, "haocai_min_conf_retry", None)
+    if h_retry is None:
+        return None
+    h_retry = float(h_retry)
+    if h_retry <= 0:
+        return None
+    if h_retry >= float(args.haocai_min_conf) - 1e-12:
+        return None
+    return h_retry
+
+
+def _haocai_infer_kwargs(
+    args: Namespace,
+    cls_names: dict,
+    allowed_idx: frozenset[int] | None,
+    predict_kw: dict[str, Any],
+) -> dict[str, Any]:
+    return {
+        "det_conf": float(args.det_conf),
+        "pad_ratio": float(args.pad_ratio),
+        "imgsz_det": int(args.imgsz_det),
+        "frame_stride": max(1, int(args.frame_stride)),
+        "haocai_min_conf": float(args.haocai_min_conf),
+        "haocai_min_conf_retry": _resolve_haocai_min_conf_retry(args),
+        "good_top1_conf_threshold": float(args.good_top1_conf_threshold),
+        "good_top1_retry_threshold": float(args.good_top1_retry_threshold),
+        "cls_names": cls_names,
+        "allowed_class_idx": allowed_idx,
+        "predict_kw": predict_kw,
+    }
+
+
+def _use_file_infer_for_stream(args: Namespace, *, is_file: bool) -> bool:
+    """本地可 seek 文件且 infer_source=file 时，段内识别回源 4K。"""
+    if not is_file:
+        return False
+    mode = str(getattr(args, "stream_infer_source", "file")).strip().lower()
+    return mode in ("file", "auto", "source")
+
+
+def _infer_clip(
+    clip: CachedClip,
+    *,
+    det: YOLO,
+    hc: HaocaiOnlyClassifier,
+    cap: cv2.VideoCapture | None,
+    use_file_infer: bool,
+    args: Namespace,
+    cls_names: dict,
+    allowed_idx: frozenset[int] | None,
+    predict_kw: dict[str, Any],
+    rank: int | None = None,
+) -> dict[str, Any]:
+    log_prefix = f"[stream] rank={rank}: " if rank is not None else "[stream] "
+    infer_kw = _haocai_infer_kwargs(args, cls_names, allowed_idx, predict_kw)
+    try:
+        if use_file_infer and cap is not None:
+            return process_segment_haocai_from_cap_with_gate_retries(
+                cap,
+                det,
+                hc,
+                start_sec=clip.start_sec,
+                end_sec=clip.end_sec,
+                seek_margin_sec=float(args.seek_margin_sec),
+                log_fn=log,
+                log_prefix=log_prefix,
+                **infer_kw,
+            )
+        return process_segment_haocai_from_frames_with_gate_retries(
+            clip.frames,
+            det,
+            hc,
+            start_sec=clip.start_sec,
+            end_sec=clip.end_sec,
+            log_fn=log,
+            log_prefix=log_prefix,
+            **infer_kw,
+        )
+    finally:
+        clip.frames.clear()
+        _maybe_free_gpu()
+
+
+class StreamBasketOrchestrator:
+    def __init__(self, args: Namespace) -> None:
+        self.args = args
+
+    def run(self) -> int:
+        args = self.args
+        source = str(getattr(args, "stream_rtsp", "") or getattr(args, "rtsp", "")).strip()
+        if not source:
+            log("缺少推流地址：--rtsp 或 yaml stream.rtsp")
+            return 1
+
+        excel_path = Path(args.excel).resolve()
+        if not excel_path.is_file():
+            log(f"找不到 Excel: {excel_path}")
+            return 1
+        if not _validate_stream_weights(args):
+            return 1
+
+        product_map = e2e.load_product_code_map(excel_path)
+        out_path = Path(args.out).resolve()
+        out_path.parent.mkdir(parents=True, exist_ok=True)
+
+        predict_kw: dict[str, Any] = {"device": args.device}
+        if args.half:
+            predict_kw["half"] = True
+
+        log("[stream] 加载 YOLO（手 / 好坏帧 / 耗材）…")
+        det = YOLO(str(args.hand_model))
+        gb = YOLO(str(args.goodbad_model))
+        cls_m = YOLO(str(args.haocai_model))
+        cls_names = cls_m.names
+        hc = HaocaiOnlyClassifier(
+            cls_m,
+            cls_names=cls_names,
+            imgsz_cls=int(args.imgsz_cls),
+            predict_kw=predict_kw,
+            gb=gb,
+            gb_names=gb.names,
+        )
+        try:
+            allowed_idx = resolve_allowed_class_idx(args, excel_path, cls_names)
+        except FileNotFoundError as exc:
+            log(str(exc))
+            return 1
+        if getattr(args, "use_whitelist", True):
+            log(f"[stream] 白名单启用，{len(allowed_idx or ())} 个类参与投票")
+        else:
+            log("[stream] 白名单已关闭，使用全 41 类")
+
+        cap = cv2.VideoCapture(source)
+        if not cap.isOpened():
+            log(f"[stream] 无法打开流: {source}")
+            return 1
+
+        is_file = Path(source).is_file()
+        use_file_infer = _use_file_infer_for_stream(args, is_file=is_file)
+        infer_cap: cv2.VideoCapture | None = None
+        if use_file_infer:
+            infer_cap = cv2.VideoCapture(source)
+            if not infer_cap.isOpened():
+                log("[stream] 无法打开回源推理用 VideoCapture，回退 JPEG 缓存识别")
+                use_file_infer = False
+                infer_cap = None
+        fps = float(cap.get(cv2.CAP_PROP_FPS) or 0.0)
+        if fps <= 1e-3:
+            fps = float(getattr(args, "stream_fps", 25.0))
+
+        ok0, first = cap.read()
+        if not ok0 or first is None:
+            log("[stream] 无法读取首帧")
+            cap.release()
+            return 1
+
+        t0 = float(cap.get(cv2.CAP_PROP_POS_MSEC)) / 1000.0 if is_file else 0.0
+        basket_roi = _resolve_basket_roi(args, first, t_sec=t0)
+
+        seg_start_off = float(
+            getattr(
+                args,
+                "stream_segment_start_offset_sec",
+                getattr(args, "basket_segment_start_offset_sec", 1.0),
+            )
+        )
+        seg_end_off = float(
+            getattr(
+                args,
+                "stream_segment_end_offset_sec",
+                getattr(args, "basket_segment_end_offset_sec", 6.0),
+            )
+        )
+        ring_sec = float(getattr(args, "stream_ring_buffer_sec", 10.0))
+        cache_max_w = int(getattr(args, "stream_cache_max_width", 1920))
+        jpeg_q = int(getattr(args, "stream_jpeg_quality", 85))
+
+        trigger = ActionTriggerLogic(
+            fps=fps,
+            confirm_seconds=float(getattr(args, "basket_confirm_seconds", 0.12)),
+            cooldown_seconds=float(getattr(args, "basket_cooldown_seconds", 2.5)),
+            threshold_on=float(getattr(args, "basket_contact_iou_on", 0.04)),
+            threshold_off=float(getattr(args, "basket_contact_iou_off", 0.02)),
+        )
+
+        session = StreamBasketSession(
+            basket_roi,
+            det,
+            trigger,
+            segment_start_offset_sec=seg_start_off,
+            segment_end_offset_sec=seg_end_off,
+            min_segment_sec=float(getattr(args, "stream_min_segment_sec", 4.0)),
+            ring_buffer_sec=ring_sec,
+            fps=fps,
+            cache_max_width=cache_max_w,
+            jpeg_quality=jpeg_q,
+            det_conf=float(getattr(args, "basket_det_conf", args.det_conf)),
+            imgsz_det=int(args.imgsz_det),
+            predict_kw=predict_kw,
+            log_fn=log,
+        )
+
+        log(
+            f"[stream] 帧缓存: ring={ring_sec:g}s, jpeg≤{cache_max_w}px q={jpeg_q} "
+            f"（4K 原始帧不入缓存，防 OOM）"
+        )
+        if use_file_infer:
+            log("[stream] 段内识别: 回源本地文件 4K（infer_source=file，与 TSV 离线一致）")
+        else:
+            fallback = str(getattr(args, "stream_infer_fallback", "cache"))
+            log(f"[stream] 段内识别: JPEG 缓存帧（infer_fallback={fallback}）")
+
+        header = "\t".join(
+            [
+                "rank",
+                "start_sec",
+                "end_sec",
+                "product_id_top1",
+                "top1_name",
+                "top1_conf",
+                "product_id_top2",
+                "top2_name",
+                "top2_conf",
+                "product_id_top3",
+                "top3_name",
+                "top3_conf",
+            ]
+        )
+        out_path.write_text(header + "\n", encoding="utf-8")
+        rank = 0
+        frame_idx = 0
+
+        def process_ready() -> None:
+            nonlocal rank
+            for clip in session.poll_ready_clips():
+                rank += 1
+                log(
+                    f"[stream] 识别 rank={rank} [{clip.start_sec:.3f},{clip.end_sec:.3f}] "
+                    f"({len(clip.frames)} 帧)…"
+                )
+                info = _infer_clip(
+                    clip,
+                    det=det,
+                    hc=hc,
+                    cap=infer_cap,
+                    use_file_infer=use_file_infer,
+                    args=args,
+                    cls_names=cls_names,
+                    allowed_idx=allowed_idx,
+                    predict_kw=predict_kw,
+                    rank=rank,
+                )
+                line = _format_result_row(
+                    rank,
+                    clip.start_sec,
+                    clip.end_sec,
+                    info,
+                    product_map,
+                    legacy_12_col=bool(args.legacy_12_col_only),
+                )
+                with out_path.open("a", encoding="utf-8") as f:
+                    f.write(line + "\n")
+                log(f"[stream] rank={rank} 已写入")
+
+        session.push_frame(t0, first)
+        process_ready()
+        frame_idx = 1
+
+        log(f"[stream] 开始读流: {source} (fps≈{fps:g})")
+        try:
+            while True:
+                ok, frame = cap.read()
+                if not ok or frame is None:
+                    if is_file:
+                        break
+                    time.sleep(0.02)
+                    continue
+
+                if is_file:
+                    t_sec = float(cap.get(cv2.CAP_PROP_POS_MSEC)) / 1000.0
+                else:
+                    t_sec = frame_idx / fps
+
+                session.push_frame(t_sec, frame)
+                del frame
+                process_ready()
+                frame_idx += 1
+        except KeyboardInterrupt:
+            log("[stream] 用户中断")
+        finally:
+            cap.release()
+            if infer_cap is not None:
+                infer_cap.release()
+
+        for clip in session.poll_ready_clips():
+            rank += 1
+            info = _infer_clip(
+                clip,
+                det=det,
+                hc=hc,
+                cap=infer_cap,
+                use_file_infer=use_file_infer,
+                args=args,
+                cls_names=cls_names,
+                allowed_idx=allowed_idx,
+                predict_kw=predict_kw,
+                rank=rank,
+            )
+            line = _format_result_row(
+                rank,
+                clip.start_sec,
+                clip.end_sec,
+                info,
+                product_map,
+                legacy_12_col=bool(args.legacy_12_col_only),
+            )
+            with out_path.open("a", encoding="utf-8") as f:
+                f.write(line + "\n")
+
+        log(f"[stream] 结束，共 {rank} 段，结果: {out_path}")
+        return 0 if rank > 0 or is_file else 0
+
+
+def run_stream_pipeline(args: Namespace) -> int:
+    return StreamBasketOrchestrator(args).run()
diff --git a/src/tsv_segments.py b/src/tsv_segments.py
new file mode 100644
index 0000000..5480708
--- /dev/null
+++ b/src/tsv_segments.py
@@ -0,0 +1,58 @@
+"""从推流/离线结果 TSV 加载时间段列表。"""
+from __future__ import annotations
+
+from pathlib import Path
+
+from pack_utils import log
+
+
+def load_segments_from_result_tsv(
+    tsv_path: Path,
+    *,
+    skip_empty_top1: bool = False,
+) -> list[tuple[float, float, float]]:
+    """
+    解析 rank/start_sec/end_sec 列，返回 (start, end, score=1.0) 列表。
+    skip_empty_top1: 跳过 top1_name 为空或为失败原因文案的行。
+    """
+    tsv_path = tsv_path.resolve()
+    text = tsv_path.read_text(encoding="utf-8")
+    lines = [ln for ln in text.splitlines() if ln.strip()]
+    if len(lines) < 2:
+        log(f"[segments] TSV 无数据行: {tsv_path}")
+        return []
+
+    header = lines[0].split("\t")
+    col = {name.strip(): i for i, name in enumerate(header)}
+    for req in ("start_sec", "end_sec"):
+        if req not in col:
+            raise ValueError(f"TSV 缺少列 {req!r}: {tsv_path}")
+
+    top1_idx = col.get("top1_name")
+    segs: list[tuple[float, float, float]] = []
+    skipped = 0
+
+    for ln in lines[1:]:
+        parts = ln.split("\t")
+        if len(parts) <= col["end_sec"]:
+            continue
+        try:
+            t0 = float(parts[col["start_sec"]].strip())
+            t1 = float(parts[col["end_sec"]].strip())
+        except ValueError:
+            skipped += 1
+            continue
+        if t1 <= t0:
+            skipped += 1
+            continue
+        if skip_empty_top1 and top1_idx is not None and len(parts) > top1_idx:
+            name = parts[top1_idx].strip()
+            if not name or name.startswith("（"):
+                skipped += 1
+                continue
+        segs.append((t0, t1, 1.0))
+
+    log(f"[segments] 从 TSV 加载 {len(segs)} 段: {tsv_path}")
+    if skipped:
+        log(f"[segments] 跳过无效/空行 {skipped} 条")
+    return segs
diff --git a/tests/test_action_trigger_logic.py b/tests/test_action_trigger_logic.py
new file mode 100644
index 0000000..f89a7b8
--- /dev/null
+++ b/tests/test_action_trigger_logic.py
@@ -0,0 +1,174 @@
+"""ActionTriggerLogic 合成 IoU 序列单元测试。"""
+from __future__ import annotations
+
+import sys
+import unittest
+from pathlib import Path
+
+PACK_ROOT = Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(PACK_ROOT / "src"))
+
+from paths import ensure_code_on_path  # noqa: E402
+
+ensure_code_on_path(PACK_ROOT)
+
+from action_trigger_logic import (  # noqa: E402
+    ActionTriggerLogic,
+    max_hand_basket_iou,
+    resolve_contact_iou_thresholds,
+)
+
+
+class TestResolveContactIouThresholds(unittest.TestCase):
+    def test_legacy_threshold_derives_off(self) -> None:
+        on, off = resolve_contact_iou_thresholds(contact_iou_threshold=0.05)
+        self.assertAlmostEqual(on, 0.05)
+        self.assertAlmostEqual(off, 0.03)
+
+    def test_explicit_on_off(self) -> None:
+        on, off = resolve_contact_iou_thresholds(
+            contact_iou_on=0.08, contact_iou_off=0.03
+        )
+        self.assertAlmostEqual(on, 0.08)
+        self.assertAlmostEqual(off, 0.03)
+
+
+class TestActionTriggerLogic(unittest.TestCase):
+    BASKET = [0.0, 0.0, 100.0, 100.0]
+
+    def _run_iou_sequence(
+        self,
+        ious: list[float],
+        *,
+        dt: float = 0.04,
+        **trigger_kw,
+    ) -> list[float]:
+        trigger = ActionTriggerLogic(fps=25, **trigger_kw)
+        events: list[float] = []
+        for i, iou in enumerate(ious):
+            event_t = trigger.step_iou(i * dt, iou)
+            if event_t is not None:
+                events.append(event_t)
+        return events
+
+    def test_oscillation_before_trigger_emits_once(self) -> None:
+        ious = [0.09, 0.02, 0.09, 0.02, 0.09, 0.09, 0.09, 0.09, 0.09, 0.09]
+        events = self._run_iou_sequence(
+            ious,
+            confirm_seconds=0.12,
+            cooldown_seconds=5.0,
+            threshold_on=0.08,
+            threshold_off=0.03,
+        )
+        self.assertEqual(len(events), 1)
+
+    def test_long_dwell_emits_once(self) -> None:
+        ious = [0.0] * 2 + [0.10] * 30
+        events = self._run_iou_sequence(
+            ious,
+            confirm_seconds=0.12,
+            cooldown_seconds=5.0,
+            threshold_on=0.08,
+            threshold_off=0.03,
+        )
+        self.assertEqual(len(events), 1)
+
+    def test_cooldown_suppresses_second_event(self) -> None:
+        ious = (
+            [0.0] * 2
+            + [0.10] * 3
+            + [0.0] * 6
+            + [0.10] * 3
+            + [0.10] * 5
+        )
+        events = self._run_iou_sequence(
+            ious,
+            dt=0.2,
+            confirm_seconds=0.12,
+            cooldown_seconds=5.0,
+            threshold_on=0.08,
+            threshold_off=0.03,
+        )
+        self.assertEqual(len(events), 1)
+
+    def test_real_second_contact_after_gap(self) -> None:
+        ious = (
+            [0.0] * 2
+            + [0.10] * 3
+            + [0.0] * 30
+            + [0.10] * 3
+        )
+        events = self._run_iou_sequence(
+            ious,
+            dt=0.2,
+            confirm_seconds=0.12,
+            cooldown_seconds=5.0,
+            threshold_on=0.08,
+            threshold_off=0.03,
+        )
+        self.assertEqual(len(events), 2)
+
+    def test_start_timestamp_is_streak_first_frame(self) -> None:
+        dt = 0.04
+        trigger = ActionTriggerLogic(
+            fps=25,
+            confirm_seconds=0.12,
+            cooldown_seconds=5.0,
+            threshold_on=0.08,
+            threshold_off=0.03,
+        )
+        events: list[float] = []
+        for i in range(8):
+            event_t = trigger.step_iou(i * dt, 0.10)
+            if event_t is not None:
+                events.append(event_t)
+        self.assertEqual(len(events), 1)
+        self.assertAlmostEqual(events[0], 0.0, places=3)
+
+    def test_multi_hand_any_high_iou_triggers(self) -> None:
+        trigger = ActionTriggerLogic(
+            fps=25,
+            confirm_seconds=0.12,
+            cooldown_seconds=5.0,
+            threshold_on=0.08,
+            threshold_off=0.03,
+        )
+        low_hand = [80.0, 80.0, 90.0, 90.0]
+        high_hand = [0.0, 0.0, 50.0, 50.0]
+        dt = 0.04
+        events: list[float] = []
+        for i in range(5):
+            hands = [low_hand, high_hand] if i >= 1 else [low_hand]
+            event_t = trigger.process_frame(i * dt, hands, self.BASKET)
+            if event_t is not None:
+                events.append(event_t)
+        self.assertEqual(len(events), 1)
+
+    def test_max_hand_basket_iou_picks_best(self) -> None:
+        low = [80.0, 80.0, 90.0, 90.0]
+        high = [0.0, 0.0, 50.0, 50.0]
+        iou = max_hand_basket_iou([low, high], self.BASKET)
+        self.assertGreater(iou, 0.08)
+
+    def test_reset_clears_state(self) -> None:
+        trigger = ActionTriggerLogic(
+            fps=25,
+            confirm_seconds=0.12,
+            cooldown_seconds=5.0,
+            threshold_on=0.08,
+            threshold_off=0.03,
+        )
+        for i in range(2):
+            trigger.step_iou(i * 0.04, 0.10)
+        trigger.reset()
+        events: list[float] = []
+        for i in range(5):
+            event_t = trigger.step_iou(i * 0.04, 0.10)
+            if event_t is not None:
+                events.append(event_t)
+        self.assertEqual(len(events), 1)
+        self.assertAlmostEqual(events[0], 0.0, places=3)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_stream_basket.py b/tests/test_stream_basket.py
new file mode 100644
index 0000000..fc037b5
--- /dev/null
+++ b/tests/test_stream_basket.py
@@ -0,0 +1,84 @@
+"""推流帧缓存单元测试。"""
+from __future__ import annotations
+
+import sys
+import unittest
+from pathlib import Path
+from unittest.mock import MagicMock
+
+import numpy as np
+
+PACK_ROOT = Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(PACK_ROOT / "src"))
+
+from paths import ensure_code_on_path  # noqa: E402
+
+ensure_code_on_path(PACK_ROOT)
+
+from stream_frame_buffer import (  # noqa: E402
+    FrameRingBuffer,
+    decode_cached_frame,
+    encode_frame_for_cache,
+)
+from stream_basket_session import StreamBasketSession  # noqa: E402
+
+
+class TestFrameRingBuffer(unittest.TestCase):
+    def test_jpeg_roundtrip(self) -> None:
+        raw = np.random.randint(0, 255, (1080, 1920, 3), dtype=np.uint8)
+        data = encode_frame_for_cache(raw, max_width=1280)
+        self.assertLess(len(data), raw.nbytes // 4)
+        back = decode_cached_frame(data)
+        self.assertEqual(back.ndim, 3)
+        self.assertLessEqual(back.shape[1], 1280)
+
+    def test_slice_inclusive(self) -> None:
+        buf = FrameRingBuffer(max_seconds=10.0, fps=10.0, cache_max_width=640)
+        for i in range(50):
+            f = np.zeros((64, 64, 3), dtype=np.uint8)
+            buf.append(i * 0.1, f)
+        part = buf.slice_decoded(2.0, 2.5)
+        ts = [p[0] for p in part]
+        self.assertTrue(all(2.0 - 1e-6 <= t <= 2.5 + 1e-6 for t in ts))
+        self.assertGreaterEqual(len(ts), 5)
+
+
+class TestStreamBasketSession(unittest.TestCase):
+    def test_pending_clip_ready_after_window(self) -> None:
+        trigger = MagicMock()
+        trigger.process_frame = MagicMock(
+            side_effect=lambda t, _h, _b: 2.0 if abs(t - 2.0) < 1e-9 else None
+        )
+        hand_model = MagicMock()
+
+        session = StreamBasketSession(
+            [0, 0, 100, 100],
+            hand_model,
+            trigger,
+            segment_start_offset_sec=2.0,
+            segment_end_offset_sec=8.0,
+            min_segment_sec=4.0,
+            ring_buffer_sec=10.0,
+            fps=25.0,
+            cache_max_width=640,
+        )
+
+        frame = np.zeros((64, 64, 3), dtype=np.uint8)
+        contact_t = None
+        for i in range(260):
+            t = i * 0.04
+            start = session.push_frame(t, frame)
+            if start is not None and contact_t is None:
+                contact_t = start
+
+        self.assertAlmostEqual(contact_t, 2.0, places=3)
+        clips = session.poll_ready_clips()
+        self.assertGreaterEqual(len(clips), 1)
+        clip = clips[0]
+        self.assertAlmostEqual(clip.start_sec, 4.0, places=2)
+        self.assertAlmostEqual(clip.end_sec, 10.0, places=2)
+        self.assertGreater(len(clip.frames), 0)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/weights/goodbad_frame.pt b/weights/goodbad_frame.pt
new file mode 100644
index 0000000..fa8b56b
Binary files /dev/null and b/weights/goodbad_frame.pt differ
diff --git a/weights/hand_detect.pt b/weights/hand_detect.pt
new file mode 100644
index 0000000..082a181
Binary files /dev/null and b/weights/hand_detect.pt differ
diff --git a/weights/haocai_classify.pt b/weights/haocai_classify.pt
new file mode 100644
index 0000000..3671ac2
Binary files /dev/null and b/weights/haocai_classify.pt differ