update minio port

2026-05-22 09:35:41 +08:00
parent 153c91f8ff
commit 62b14d7386
22 changed files with 1256 additions and 1170 deletions
--- a/backend/app/algo_host/init.py
+++ b/backend/app/algo_host/init.py
@@ -0,0 +1,24 @@
+"""Thin orchestration around algorithm_subprocesses reference bundles (no in-process inference)."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from app.algo_host.result_adapter import ReferenceDoctorInfo
+
+if TYPE_CHECKING:
+    from app.algo_host.batch_service import BatchAlgorithmService, BatchRunResult
+
+__all__ = ["BatchAlgorithmService", "BatchRunResult", "ReferenceDoctorInfo"]
+
+
+def __getattr__(name: str):
+    if name == "BatchAlgorithmService":
+        from app.algo_host.batch_service import BatchAlgorithmService
+
+        return BatchAlgorithmService
+    if name == "BatchRunResult":
+        from app.algo_host.batch_service import BatchRunResult
+
+        return BatchRunResult
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
--- a/backend/app/algo_host/batch_service.py
+++ b/backend/app/algo_host/batch_service.py
@@ -0,0 +1,250 @@
+"""Offline batch orchestration: spawn 5.15 main.py and optional visualization subprocess."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from pathlib import Path
+
+from loguru import logger
+
+from app.algo_host.bundle import default_reference_bundle_dir, resolve_reference_bundle_dir
+from app.algo_host.job_workspace import prepare_batch_job
+from app.algo_host.result_adapter import (
+    ReferenceDoctorInfo,
+    candidate_cache_key,
+    is_reference_result_complete,
+    parse_reference_doctor_info,
+    parse_reference_tsv,
+    resolve_reference_candidates,
+    sha256_file,
+)
+from app.algo_host.subprocess_runner import run_batch_main, run_visualization_script
+from app.algo_host.transcode import (
+    ensure_batch_pipeline_input_video,
+    is_browser_compatible_mp4,
+    is_readable_mp4,
+    transcode_visualization_for_browser,
+)
+from app.domain.consumption import SurgeryConsumptionStored
+from app.services.video_batch_cleanup import (
+    RAW_VISUALIZATION_FILENAME,
+    purge_visualization_artifacts,
+    purge_visualization_pending,
+    visualization_output_path,
+    visualization_pending_input_path,
+    visualization_pending_result_path,
+)
+
+
+@dataclass(frozen=True)
+class BatchRunResult:
+    video_sha256: str
+    candidate_cache_key: str
+    input_path: Path
+    work_dir: Path
+    output_path: Path
+    details: list[SurgeryConsumptionStored]
+    reused_cache: bool
+    doctor: ReferenceDoctorInfo | None = None
+    visualization_path: Path | None = None
+
+
+def default_batch_root_dir() -> Path:
+    repo_root = Path(__file__).resolve().parents[2]
+    return repo_root / "logs" / "video_batch"
+
+
+class BatchAlgorithmService:
+    def __init__(
+        self,
+        *,
+        bundle_dir: Path | None = None,
+        root_dir: Path | None = None,
+    ) -> None:
+        self._bundle_dir_override = bundle_dir
+        self._root_dir = root_dir or default_batch_root_dir()
+
+    @property
+    def bundle_dir(self) -> Path:
+        if self._bundle_dir_override is not None:
+            return Path(self._bundle_dir_override).expanduser().resolve()
+        return default_reference_bundle_dir()
+
+    @property
+    def root_dir(self) -> Path:
+        return self._root_dir
+
+    def _generate_visualization(
+        self,
+        *,
+        bundle_dir: Path,
+        video_path: Path,
+        result_path: Path,
+        output_video_path: Path,
+    ) -> Path | None:
+        raw_video_path = output_video_path.with_name(RAW_VISUALIZATION_FILENAME)
+        script_path = bundle_dir / "visualize_result_video.py"
+        if not script_path.is_file():
+            logger.warning("reference visualization script not found: {}", script_path)
+            return None
+        if not video_path.is_file() or not result_path.is_file():
+            return None
+        if output_video_path.is_file() and is_browser_compatible_mp4(output_video_path):
+            return output_video_path
+        if raw_video_path.is_file() and not is_readable_mp4(raw_video_path):
+            raw_video_path.unlink(missing_ok=True)
+        if output_video_path.is_file() and not is_browser_compatible_mp4(output_video_path):
+            output_video_path.unlink(missing_ok=True)
+        if raw_video_path.is_file() and is_readable_mp4(raw_video_path):
+            logger.info(
+                "reusing existing visualization source for transcode: {}",
+                raw_video_path,
+            )
+            if transcode_visualization_for_browser(raw_video_path, output_video_path):
+                return output_video_path
+            logger.warning(
+                "transcode from existing source failed; regenerating visualization: {}",
+                raw_video_path,
+            )
+            raw_video_path.unlink(missing_ok=True)
+        try:
+            run_visualization_script(
+                bundle_dir=bundle_dir,
+                video_path=video_path,
+                result_path=result_path,
+                raw_output_video_path=raw_video_path,
+            )
+        except RuntimeError as exc:
+            logger.error("reference visualization failed: {}", exc)
+            return None
+        if not is_readable_mp4(raw_video_path):
+            logger.error("reference visualization produced unreadable mp4: {}", raw_video_path)
+            return None
+        if transcode_visualization_for_browser(raw_video_path, output_video_path):
+            purge_visualization_artifacts(output_video_path.parent)
+            return output_video_path
+        logger.error("reference visualization transcode to browser mp4 failed: {}", output_video_path)
+        return None
+
+    def finalize_visualization(
+        self,
+        *,
+        surgery_id: str,
+        video_path: Path | None = None,
+        result_path: Path | None = None,
+    ) -> Path | None:
+        logger.info(
+            "video batch visualization starting for surgery_id={} (visualize_result_video.py)",
+            surgery_id,
+        )
+        video_path = (video_path or visualization_pending_input_path(self._root_dir, surgery_id)).resolve()
+        result_path = (result_path or visualization_pending_result_path(self._root_dir, surgery_id)).resolve()
+        output_video_path = visualization_output_path(self._root_dir, surgery_id)
+        if not is_reference_result_complete(result_path):
+            logger.warning("skip visualization: incomplete result {}", result_path)
+            purge_visualization_pending(self._root_dir, surgery_id)
+            return None
+        if not video_path.is_file():
+            logger.warning("skip visualization: missing staged video {}", video_path)
+            purge_visualization_pending(self._root_dir, surgery_id)
+            return None
+        bundle_dir = resolve_reference_bundle_dir(self._bundle_dir_override)
+        output_video_path.parent.mkdir(parents=True, exist_ok=True)
+        vis_path = self._generate_visualization(
+            bundle_dir=bundle_dir,
+            video_path=video_path,
+            result_path=result_path,
+            output_video_path=output_video_path,
+        )
+        purge_visualization_pending(self._root_dir, surgery_id)
+        if vis_path is not None:
+            logger.info(
+                "video batch visualization complete for surgery_id={} ({})",
+                surgery_id,
+                vis_path,
+            )
+        else:
+            logger.warning("video batch visualization failed for surgery_id={}", surgery_id)
+        return vis_path
+
+    def latest_visualization_path(self, surgery_id: str) -> Path | None:
+        path = visualization_output_path(self._root_dir, surgery_id)
+        if path.is_file() and path.stat().st_size > 0 and is_browser_compatible_mp4(path):
+            return path
+        return None
+
+    def run(
+        self,
+        *,
+        surgery_id: str,
+        uploaded_video_path: Path,
+        original_filename: str = "video.mp4",
+        candidate_consumables: list[str] | None = None,
+        include_visualization: bool = False,
+    ) -> BatchRunResult:
+        del original_filename, include_visualization
+        bundle_dir = resolve_reference_bundle_dir(self._bundle_dir_override)
+        uploaded_video_path = uploaded_video_path.resolve()
+        digest = sha256_file(uploaded_video_path)
+        candidates = resolve_reference_candidates(candidate_consumables)
+        candidate_key = candidate_cache_key(candidates)
+
+        surgery_input_dir = self._root_dir / surgery_id / "input"
+        surgery_input_dir.mkdir(parents=True, exist_ok=True)
+        surgery_input = surgery_input_dir / f"{digest[:12]}.mp4"
+        ensure_batch_pipeline_input_video(
+            source_path=uploaded_video_path,
+            dest_path=surgery_input,
+        )
+
+        cache_dir = self._root_dir / "cache" / digest / candidate_key
+        job = prepare_batch_job(
+            bundle_dir=self._bundle_dir_override,
+            cache_dir=cache_dir,
+            uploaded_video_path=uploaded_video_path,
+            candidate_consumables=candidates,
+        )
+
+        reused_cache = job.output_path.is_file() and is_reference_result_complete(job.output_path)
+        if reused_cache:
+            logger.info(
+                "reference batch cache hit digest={} candidate_key={} ({})",
+                digest[:12],
+                candidate_key,
+                job.output_path,
+            )
+        else:
+            logger.info(
+                "reference batch starting for surgery_id={} (5.15/main.py, work_dir={})",
+                surgery_id,
+                job.work_dir,
+            )
+            run_batch_main(
+                bundle_dir=bundle_dir,
+                config_path=job.config_path.resolve(),
+                work_dir=job.work_dir.resolve(),
+                output_path=job.output_path.resolve(),
+            )
+            if not is_reference_result_complete(job.output_path):
+                raise RuntimeError(
+                    f"reference bundle finished but result.tsv is incomplete: {job.output_path}"
+                )
+            logger.info(
+                "reference batch complete for surgery_id={} ({})",
+                surgery_id,
+                job.output_path,
+            )
+
+        doctor = parse_reference_doctor_info(job.output_path)
+        details = parse_reference_tsv(job.output_path, doctor=doctor)
+        return BatchRunResult(
+            video_sha256=digest,
+            candidate_cache_key=candidate_key,
+            input_path=surgery_input,
+            work_dir=job.work_dir,
+            output_path=job.output_path,
+            details=details,
+            reused_cache=reused_cache,
+            doctor=doctor,
+            visualization_path=None,
+        )
--- a/backend/app/algo_host/bundle.py
+++ b/backend/app/algo_host/bundle.py
@@ -0,0 +1,59 @@
+"""Resolve reference algorithm bundle paths and default YAML (read-only; no vendor patches)."""
+
+from __future__ import annotations
+
+import copy
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+REPO_ROOT = Path(__file__).resolve().parents[2]
+DEFAULT_REFERENCE_BUNDLE_RELATIVE = "algorithm_subprocesses/5.15"
+
+
+def configured_reference_bundle_relative() -> str:
+    from app.config import Settings
+
+    raw = (Settings().reference_bundle_relative or "").strip()
+    return raw or DEFAULT_REFERENCE_BUNDLE_RELATIVE
+
+
+def default_reference_bundle_dir() -> Path:
+    raw = configured_reference_bundle_relative()
+    path = Path(raw).expanduser()
+    if path.is_absolute():
+        return path.resolve()
+    return (REPO_ROOT / path).resolve()
+
+
+def resolve_reference_bundle_dir(bundle_dir: Path | None = None) -> Path:
+    if bundle_dir is None:
+        label = configured_reference_bundle_relative()
+        root = default_reference_bundle_dir()
+    else:
+        label = str(bundle_dir)
+        root = Path(bundle_dir).expanduser().resolve()
+    if not (root / "main.py").is_file():
+        raise FileNotFoundError(f"reference bundle main.py not found: {label} -> {root}")
+    if not (root / "code" / "repo_root.py").is_file():
+        raise FileNotFoundError(f"reference bundle vendor code not found: {label} -> {root / 'code'}")
+    return root
+
+
+def load_reference_default_config(bundle_dir: Path | None = None) -> dict[str, Any]:
+    root = resolve_reference_bundle_dir(bundle_dir)
+    path = root / "configs" / "default_config.yaml"
+    data = yaml.safe_load(path.read_text(encoding="utf-8"))
+    if not isinstance(data, dict):
+        raise ValueError(f"invalid reference bundle default config: {path}")
+    return copy.deepcopy(data)
+
+
+def resolve_bundle_relative_path(bundle_dir: Path, raw: str) -> Path:
+    p = Path((raw or "").strip())
+    if not str(p):
+        raise ValueError("empty bundle-relative path")
+    if p.is_absolute():
+        return p.resolve()
+    return (bundle_dir / p).resolve()
--- a/backend/app/algo_host/job_workspace.py
+++ b/backend/app/algo_host/job_workspace.py
@@ -0,0 +1,120 @@
+"""Prepare input artifacts expected by algorithm_subprocesses/5.15 main.py."""
+
+from __future__ import annotations
+
+import copy
+import json
+from dataclasses import dataclass
+from pathlib import Path
+
+import yaml
+
+from app.algo_host.bundle import load_reference_default_config, resolve_reference_bundle_dir
+from app.algo_host.transcode import ensure_batch_pipeline_input_video
+from app.consumable_catalog import build_name_mapping
+
+
+@dataclass(frozen=True)
+class BatchJobFiles:
+    config_path: Path
+    excel_path: Path
+    whitelist_path: Path
+    output_path: Path
+    work_dir: Path
+    input_video_path: Path
+
+
+def write_reference_catalog_excel(
+    path: Path,
+    *,
+    candidate_consumables: list[str],
+) -> None:
+    import pandas as pd
+
+    name_to_code = build_name_mapping(candidate_consumables)
+    rows = [
+        {
+            "序号": idx,
+            "产品编码": name_to_code.get(name, name),
+            "商品名称": name,
+        }
+        for idx, name in enumerate(candidate_consumables, start=1)
+    ]
+    path.parent.mkdir(parents=True, exist_ok=True)
+    pd.DataFrame(rows, columns=["序号", "产品编码", "商品名称"]).to_excel(path, index=False)
+
+
+def write_reference_whitelist_json(path: Path, *, candidate_consumables: list[str]) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(
+        json.dumps({"allowed_names": candidate_consumables}, ensure_ascii=False, indent=2),
+        encoding="utf-8",
+    )
+
+
+def build_job_config(
+    *,
+    bundle_dir: Path,
+    video_path: Path,
+    output_path: Path,
+    work_dir: Path,
+    excel_path: Path,
+    whitelist_path: Path,
+) -> dict:
+    cfg = copy.deepcopy(load_reference_default_config(bundle_dir))
+    cfg["io"]["video"] = str(video_path.resolve())
+    cfg["io"]["excel"] = str(excel_path.resolve())
+    cfg["io"]["out"] = str(output_path.resolve())
+    cfg["io"]["whitelist_json"] = str(whitelist_path.resolve())
+    cfg["runtime"]["work_dir"] = str(work_dir.resolve())
+    cfg["runtime"]["keep_work_dir"] = False
+    return cfg
+
+
+def prepare_batch_job(
+    *,
+    bundle_dir: Path | None,
+    cache_dir: Path,
+    uploaded_video_path: Path,
+    candidate_consumables: list[str],
+) -> BatchJobFiles:
+    root = resolve_reference_bundle_dir(bundle_dir)
+    cache_input_dir = cache_dir / "input"
+    cache_output_dir = cache_dir / "output"
+    cache_work_dir = cache_dir / "work"
+    cache_config_dir = cache_dir / "config"
+    for d in (cache_input_dir, cache_output_dir, cache_work_dir, cache_config_dir):
+        d.mkdir(parents=True, exist_ok=True)
+
+    cache_input = cache_input_dir / "input.mp4"
+    ensure_batch_pipeline_input_video(
+        source_path=uploaded_video_path,
+        dest_path=cache_input,
+    )
+    output_path = cache_output_dir / "result.tsv"
+    excel_path = cache_config_dir / "商品信息表.xlsx"
+    whitelist_path = cache_config_dir / "whitelist.json"
+    config_path = cache_config_dir / "config.yaml"
+
+    write_reference_catalog_excel(excel_path, candidate_consumables=candidate_consumables)
+    write_reference_whitelist_json(whitelist_path, candidate_consumables=candidate_consumables)
+    config = build_job_config(
+        bundle_dir=root,
+        video_path=cache_input.resolve(),
+        output_path=output_path.resolve(),
+        work_dir=cache_work_dir.resolve(),
+        excel_path=excel_path.resolve(),
+        whitelist_path=whitelist_path.resolve(),
+    )
+    config_path.write_text(
+        yaml.safe_dump(config, allow_unicode=True, sort_keys=False),
+        encoding="utf-8",
+    )
+    return BatchJobFiles(
+        config_path=config_path,
+        excel_path=excel_path,
+        whitelist_path=whitelist_path,
+        output_path=output_path,
+        work_dir=cache_work_dir,
+        input_video_path=cache_input,
+    )
--- a/backend/app/algo_host/result_adapter.py
+++ b/backend/app/algo_host/result_adapter.py
@@ -0,0 +1,178 @@
+"""Map algorithm_subprocesses/5.15 TSV output to domain objects (orchestration adapter only)."""
+
+from __future__ import annotations
+
+import csv
+import hashlib
+import re
+from dataclasses import dataclass
+from datetime import datetime, timedelta, timezone
+from pathlib import Path
+
+from app.baked import pipeline as bp
+from app.consumable_catalog import (
+    effective_candidate_consumables,
+    normalize_candidate_consumables_raw,
+)
+from app.domain.consumption import SurgeryConsumptionStored
+
+
+@dataclass(frozen=True)
+class ReferenceDoctorInfo:
+    doctor_id: str
+    doctor_name: str | None
+    display: str
+    raw_line: str
+
+
+_DOCTOR_NAME_ID_RE = re.compile(
+    r"^(?P<name>.+?)\s*\(id=(?P<id>[^,\s)]+)(?:,\s*conf=[\d.]+)?\)\s*(?:\[低置信度\])?\s*$"
+)
+_DOCTOR_ID_ONLY_RE = re.compile(
+    r"^doctor_id=(?P<id>[^\s(]+)(?:\s*\(conf=[\d.]+\))?\s*(?:\[低置信度\])?\s*$"
+)
+
+
+def sha256_file(path: Path) -> str:
+    h = hashlib.sha256()
+    with path.open("rb") as f:
+        for chunk in iter(lambda: f.read(1024 * 1024), b""):
+            h.update(chunk)
+    return h.hexdigest()
+
+
+def candidate_cache_key(candidate_consumables: list[str]) -> str:
+    raw = "\n".join(candidate_consumables).encode("utf-8")
+    return hashlib.sha256(raw).hexdigest()[:12]
+
+
+def resolve_reference_candidates(candidate_consumables: list[str] | None) -> list[str]:
+    requested = normalize_candidate_consumables_raw(list(candidate_consumables or []))
+    return effective_candidate_consumables(requested)
+
+
+def parse_reference_doctor_info(path: Path) -> ReferenceDoctorInfo | None:
+    if not path.is_file():
+        return None
+    raw_line = ""
+    for line in path.read_text(encoding="utf-8").splitlines():
+        stripped = line.strip()
+        if stripped.startswith("医生信息：") or stripped.startswith("医生信息:"):
+            raw_line = stripped
+            break
+    if not raw_line:
+        return None
+
+    body = raw_line.split("：", 1)[-1].split(":", 1)[-1].strip()
+    if not body or body == "未启用":
+        return ReferenceDoctorInfo(
+            doctor_id=bp.VIDEO_RESULT_DOCTOR_ID,
+            doctor_name=None,
+            display=body or "未启用",
+            raw_line=raw_line,
+        )
+    if body.startswith("识别失败"):
+        return ReferenceDoctorInfo(
+            doctor_id=bp.VIDEO_RESULT_DOCTOR_ID,
+            doctor_name=None,
+            display=body,
+            raw_line=raw_line,
+        )
+
+    match = _DOCTOR_NAME_ID_RE.match(body)
+    if match:
+        name = match.group("name").strip()
+        did = match.group("id").strip()
+        return ReferenceDoctorInfo(
+            doctor_id=did,
+            doctor_name=name,
+            display=f"{name} ({did})",
+            raw_line=raw_line,
+        )
+
+    match = _DOCTOR_ID_ONLY_RE.match(body)
+    if match:
+        did = match.group("id").strip()
+        return ReferenceDoctorInfo(
+            doctor_id=did,
+            doctor_name=None,
+            display=did,
+            raw_line=raw_line,
+        )
+
+    return ReferenceDoctorInfo(
+        doctor_id=bp.VIDEO_RESULT_DOCTOR_ID,
+        doctor_name=None,
+        display=body,
+        raw_line=raw_line,
+    )
+
+
+def is_reference_result_complete(path: Path) -> bool:
+    if not path.is_file() or path.stat().st_size <= 0:
+        return False
+    lines = [line.strip() for line in path.read_text(encoding="utf-8").splitlines() if line.strip()]
+    if not any(line.lower().startswith("rank\t") for line in lines):
+        return False
+    has_doctor_footer = any(
+        line.startswith("医生信息：") or line.startswith("医生信息:") for line in lines
+    )
+    has_segment_row = False
+    for line in lines:
+        if line.lower().startswith("rank\t"):
+            continue
+        if line.startswith("医生信息"):
+            continue
+        parts = line.split("\t")
+        if len(parts) >= 5 and parts[0].strip().isdigit():
+            has_segment_row = True
+            break
+    return has_doctor_footer and has_segment_row
+
+
+def doctor_id_for_consumption_rows(doctor: ReferenceDoctorInfo | None) -> str:
+    if doctor is None:
+        return bp.VIDEO_RESULT_DOCTOR_ID
+    if doctor.doctor_name:
+        return f"{doctor.doctor_name} ({doctor.doctor_id})"
+    if doctor.doctor_id and doctor.doctor_id != bp.VIDEO_RESULT_DOCTOR_ID:
+        return doctor.doctor_id
+    return bp.VIDEO_RESULT_DOCTOR_ID
+
+
+def parse_reference_tsv(
+    path: Path,
+    *,
+    base_timestamp: datetime | None = None,
+    doctor: ReferenceDoctorInfo | None = None,
+) -> list[SurgeryConsumptionStored]:
+    if base_timestamp is None:
+        base_timestamp = datetime.now(timezone.utc)
+    if doctor is None:
+        doctor = parse_reference_doctor_info(path)
+    row_doctor_id = doctor_id_for_consumption_rows(doctor)
+    out: list[SurgeryConsumptionStored] = []
+    with path.open("r", encoding="utf-8", newline="") as f:
+        reader = csv.DictReader(f, delimiter="\t")
+        for row in reader:
+            name = (row.get("top1_name") or "").strip()
+            if not name or name.startswith("（"):
+                continue
+            if name.startswith("医生信息"):
+                continue
+            item_id = (row.get("product_id_top1") or "").strip() or name
+            try:
+                start_sec = float((row.get("start_sec") or "0").strip() or 0.0)
+            except ValueError:
+                start_sec = 0.0
+            out.append(
+                SurgeryConsumptionStored(
+                    item_id=item_id,
+                    item_name=name,
+                    qty=1,
+                    doctor_id=row_doctor_id,
+                    timestamp=base_timestamp + timedelta(seconds=max(0.0, start_sec)),
+                    source="video_batch",
+                )
+            )
+    return out
--- a/backend/app/algo_host/subprocess_runner.py
+++ b/backend/app/algo_host/subprocess_runner.py
@@ -0,0 +1,180 @@
+"""Spawn reference bundle child processes (main.py, visualize_result_video.py)."""
+
+from __future__ import annotations
+
+import os
+import signal
+import subprocess
+import sys
+from pathlib import Path
+
+from loguru import logger
+
+from app.algo_host.bundle import load_reference_default_config, resolve_bundle_relative_path
+from app.algo_host.transcode import VISUALIZATION_MAX_WIDTH
+
+
+def build_reference_env() -> dict[str, str]:
+    env = os.environ.copy()
+    env["PYTHONFAULTHANDLER"] = "1"
+    env["PYTHONUNBUFFERED"] = "1"
+    return env
+
+
+def build_batch_main_command(*, bundle_dir: Path, config_path: Path) -> list[str]:
+    return [
+        "uv",
+        "run",
+        "python",
+        "-X",
+        "faulthandler",
+        str(bundle_dir / "main.py"),
+        "--config",
+        str(config_path),
+    ]
+
+
+def build_visualization_command(
+    *,
+    bundle_dir: Path,
+    video_path: Path,
+    result_path: Path,
+    output_video_path: Path,
+) -> list[str]:
+    cfg = load_reference_default_config(bundle_dir)
+    weights = cfg.get("weights") if isinstance(cfg.get("weights"), dict) else {}
+    device_cfg = cfg.get("device") if isinstance(cfg.get("device"), dict) else {}
+    hand_raw = str((weights or {}).get("hand") or "weights/hand_detect.pt").strip()
+    hand_model = resolve_bundle_relative_path(bundle_dir, hand_raw)
+    return [
+        sys.executable,
+        "-X",
+        "faulthandler",
+        str((bundle_dir / "visualize_result_video.py").resolve()),
+        "--video",
+        str(video_path.resolve()),
+        "--result-txt",
+        str(result_path.resolve()),
+        "--hand-model",
+        str(hand_model),
+        "--out-video",
+        str(output_video_path.resolve()),
+        "--device",
+        str(device_cfg.get("type") or "cuda"),
+        "--max-width",
+        str(VISUALIZATION_MAX_WIDTH),
+    ]
+
+
+def _signal_name(signum: int) -> str:
+    try:
+        return signal.Signals(signum).name
+    except ValueError:
+        return f"signal {signum}"
+
+
+def describe_batch_returncode(returncode: int) -> str:
+    if returncode < 0:
+        signum = -returncode
+        return f"terminated by {_signal_name(signum)} ({signum})"
+    if returncode > 128:
+        wrapped = returncode - 256
+        if wrapped < 0:
+            signum = -wrapped
+            return f"exit={returncode} (possibly propagated {wrapped}/{_signal_name(signum)})"
+    return f"exit={returncode}"
+
+
+def format_batch_failure(
+    returncode: int,
+    *,
+    stdout: str,
+    stderr: str,
+    work_dir: Path,
+    output_path: Path,
+) -> str:
+    chunks: list[str] = [
+        describe_batch_returncode(returncode),
+        f"work_dir={work_dir}",
+        f"output={output_path}",
+    ]
+    stdout = stdout.strip()
+    stderr = stderr.strip()
+    if stdout:
+        chunks.append(f"stdout:\n{stdout[-3000:]}")
+    if stderr:
+        chunks.append(f"stderr:\n{stderr[-3000:]}")
+    return "\n".join(chunks)
+
+
+def _log_subprocess_output(prefix: str, stdout: str, stderr: str, *, max_lines: int = 40) -> None:
+    for label, text in (("stdout", stdout), ("stderr", stderr)):
+        lines = [ln for ln in (text or "").splitlines() if ln.strip()]
+        if not lines:
+            continue
+        tail = lines[-max_lines:] if len(lines) > max_lines else lines
+        for line in tail:
+            logger.info("{} {}", prefix, line)
+
+
+def run_subprocess(
+    cmd: list[str],
+    *,
+    cwd: Path,
+    work_dir: Path,
+    output_path: Path,
+    log_label: str,
+) -> None:
+    proc = subprocess.run(
+        cmd,
+        cwd=str(cwd),
+        check=False,
+        text=True,
+        capture_output=True,
+        env=build_reference_env(),
+    )
+    if proc.returncode != 0:
+        msg = format_batch_failure(
+            proc.returncode,
+            stdout=proc.stdout or "",
+            stderr=proc.stderr or "",
+            work_dir=work_dir,
+            output_path=output_path,
+        )
+        raise RuntimeError(f"{log_label} failed {msg}")
+    _log_subprocess_output(log_label, proc.stdout or "", proc.stderr or "")
+
+
+def run_batch_main(*, bundle_dir: Path, config_path: Path, work_dir: Path, output_path: Path) -> None:
+    cmd = build_batch_main_command(bundle_dir=bundle_dir, config_path=config_path)
+    logger.info("reference batch starting: {}", " ".join(cmd))
+    run_subprocess(
+        cmd,
+        cwd=bundle_dir,
+        work_dir=work_dir,
+        output_path=output_path,
+        log_label="reference batch",
+    )
+
+
+def run_visualization_script(
+    *,
+    bundle_dir: Path,
+    video_path: Path,
+    result_path: Path,
+    raw_output_video_path: Path,
+) -> None:
+    cmd = build_visualization_command(
+        bundle_dir=bundle_dir,
+        video_path=video_path,
+        result_path=result_path,
+        output_video_path=raw_output_video_path,
+    )
+    logger.info("reference visualization starting: {}", " ".join(cmd))
+    run_subprocess(
+        cmd,
+        cwd=bundle_dir,
+        work_dir=raw_output_video_path.parent,
+        output_path=raw_output_video_path,
+        log_label="visualize",
+    )
--- a/backend/app/algo_host/transcode.py
+++ b/backend/app/algo_host/transcode.py
@@ -0,0 +1,276 @@
+"""FFmpeg/ffprobe helpers for batch uploads and browser-ready MP4 (infrastructure only)."""
+
+from __future__ import annotations
+
+import shutil
+import subprocess
+from pathlib import Path
+
+from loguru import logger
+
+VISUALIZATION_MAX_WIDTH = 1920
+
+
+def visualization_ffmpeg_scale_filter() -> str:
+    return f"scale='min({VISUALIZATION_MAX_WIDTH},iw)':-2"
+
+
+def browser_transcode_tmp_path(output_path: Path) -> Path:
+    return output_path.with_name(f"{output_path.stem}.part{output_path.suffix}")
+
+
+def is_readable_mp4(path: Path) -> bool:
+    ffprobe = shutil.which("ffprobe")
+    if ffprobe is None or not path.is_file() or path.stat().st_size < 4096:
+        return False
+    proc = subprocess.run(
+        [
+            ffprobe,
+            "-v",
+            "error",
+            "-select_streams",
+            "v:0",
+            "-show_entries",
+            "stream=codec_name",
+            "-of",
+            "csv=p=0",
+            str(path),
+        ],
+        check=False,
+        text=True,
+        capture_output=True,
+    )
+    return proc.returncode == 0 and bool((proc.stdout or "").strip())
+
+
+def ffprobe_fields(path: Path, entries: str) -> dict[str, str]:
+    ffprobe = shutil.which("ffprobe")
+    if ffprobe is None or not path.is_file():
+        return {}
+    proc = subprocess.run(
+        [
+            ffprobe,
+            "-v",
+            "error",
+            "-select_streams",
+            "v:0",
+            "-show_entries",
+            entries,
+            "-of",
+            "default=noprint_wrappers=1",
+            str(path),
+        ],
+        check=False,
+        text=True,
+        capture_output=True,
+    )
+    if proc.returncode != 0:
+        return {}
+    fields: dict[str, str] = {}
+    for line in proc.stdout.splitlines():
+        if "=" not in line:
+            continue
+        key, value = line.split("=", 1)
+        fields[key.strip().lower()] = value.strip().lower()
+    return fields
+
+
+def ffprobe_container_format(path: Path) -> str:
+    ffprobe = shutil.which("ffprobe")
+    if ffprobe is None or not path.is_file():
+        return ""
+    proc = subprocess.run(
+        [
+            ffprobe,
+            "-v",
+            "error",
+            "-show_entries",
+            "format=format_name",
+            "-of",
+            "default=noprint_wrappers=1:nokey=1",
+            str(path),
+        ],
+        check=False,
+        text=True,
+        capture_output=True,
+    )
+    if proc.returncode != 0:
+        return ""
+    return (proc.stdout or "").strip().lower()
+
+
+def is_browser_compatible_mp4(path: Path) -> bool:
+    fields = ffprobe_fields(path, "stream=codec_name,pix_fmt")
+    return fields.get("codec_name") == "h264" and fields.get("pix_fmt") in {"yuv420p", "yuvj420p"}
+
+
+def batch_input_needs_normalize(path: Path) -> bool:
+    if not is_readable_mp4(path):
+        return True
+    if not is_browser_compatible_mp4(path):
+        return True
+    container = ffprobe_container_format(path)
+    if container and "mpeg" in container:
+        return True
+    fields = ffprobe_fields(path, "stream=codec_name,width,height")
+    try:
+        width = int(fields.get("width") or "0")
+    except ValueError:
+        width = 0
+    return width > 1920
+
+
+def normalize_batch_input_video(source_path: Path, output_path: Path) -> bool:
+    ffmpeg = shutil.which("ffmpeg")
+    if ffmpeg is None or not source_path.is_file():
+        return False
+    if not is_readable_mp4(source_path):
+        logger.warning("skip batch input normalize: unreadable source {}", source_path)
+        return False
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    tmp_path = browser_transcode_tmp_path(output_path)
+    if tmp_path.exists():
+        tmp_path.unlink()
+    logger.info("ffmpeg batch input normalize starting: {} -> {}", source_path, output_path)
+    proc = subprocess.run(
+        [
+            ffmpeg,
+            "-y",
+            "-hide_banner",
+            "-loglevel",
+            "error",
+            "-fflags",
+            "+genpts",
+            "-i",
+            str(source_path),
+            "-map",
+            "0:v:0",
+            "-an",
+            "-f",
+            "mp4",
+            "-c:v",
+            "libx264",
+            "-pix_fmt",
+            "yuv420p",
+            "-preset",
+            "veryfast",
+            "-crf",
+            "23",
+            "-vf",
+            "scale='min(1920,iw)':-2",
+            "-movflags",
+            "+faststart",
+            str(tmp_path),
+        ],
+        check=False,
+        text=True,
+        capture_output=True,
+    )
+    if proc.returncode != 0:
+        stderr = (proc.stderr or "").strip()
+        logger.warning("ffmpeg batch input normalize failed: {}", stderr[-3000:])
+        if tmp_path.exists():
+            tmp_path.unlink()
+        return False
+    if not tmp_path.is_file() or tmp_path.stat().st_size <= 0:
+        logger.warning("ffmpeg batch input normalize produced empty file: {}", tmp_path)
+        if tmp_path.exists():
+            tmp_path.unlink()
+        return False
+    tmp_path.replace(output_path)
+    if not is_browser_compatible_mp4(output_path):
+        logger.warning("ffmpeg batch input normalize output not h264/yuv420p: {}", output_path)
+        output_path.unlink(missing_ok=True)
+        return False
+    logger.info(
+        "ffmpeg batch input normalize complete: {} ({} bytes)",
+        output_path,
+        output_path.stat().st_size,
+    )
+    return True
+
+
+def ensure_batch_pipeline_input_video(*, source_path: Path, dest_path: Path) -> None:
+    import shutil as sh
+
+    dest_path.parent.mkdir(parents=True, exist_ok=True)
+    if dest_path.is_file() and dest_path.stat().st_size > 0 and not batch_input_needs_normalize(dest_path):
+        return
+    if batch_input_needs_normalize(source_path):
+        if normalize_batch_input_video(source_path, dest_path):
+            return
+        logger.warning(
+            "batch input normalize failed, falling back to raw copy: {} -> {}",
+            source_path,
+            dest_path,
+        )
+    if not dest_path.is_file():
+        sh.copy2(source_path, dest_path)
+
+
+def transcode_visualization_for_browser(source_path: Path, output_path: Path) -> bool:
+    ffmpeg = shutil.which("ffmpeg")
+    if ffmpeg is None or not source_path.is_file():
+        return False
+    if not is_readable_mp4(source_path):
+        logger.warning("skip visualization transcode: unreadable source {}", source_path)
+        return False
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    tmp_path = browser_transcode_tmp_path(output_path)
+    if tmp_path.exists():
+        tmp_path.unlink()
+    logger.info("ffmpeg visualization transcode starting: {} -> {}", source_path, output_path)
+    proc = subprocess.run(
+        [
+            ffmpeg,
+            "-y",
+            "-hide_banner",
+            "-loglevel",
+            "error",
+            "-i",
+            str(source_path),
+            "-map",
+            "0:v:0",
+            "-an",
+            "-f",
+            "mp4",
+            "-c:v",
+            "libx264",
+            "-pix_fmt",
+            "yuv420p",
+            "-preset",
+            "ultrafast",
+            "-crf",
+            "23",
+            "-vf",
+            visualization_ffmpeg_scale_filter(),
+            "-movflags",
+            "+faststart",
+            str(tmp_path),
+        ],
+        check=False,
+        text=True,
+        capture_output=True,
+    )
+    if proc.returncode != 0:
+        stderr = (proc.stderr or "").strip()
+        logger.warning("ffmpeg visualization transcode failed: {}", stderr[-3000:])
+        if tmp_path.exists():
+            tmp_path.unlink()
+        return False
+    if not tmp_path.is_file() or tmp_path.stat().st_size <= 0:
+        logger.warning("ffmpeg visualization transcode produced empty file: {}", tmp_path)
+        if tmp_path.exists():
+            tmp_path.unlink()
+        return False
+    tmp_path.replace(output_path)
+    if not is_browser_compatible_mp4(output_path):
+        logger.warning("ffmpeg output is not browser-compatible h264/yuv420p: {}", output_path)
+        output_path.unlink(missing_ok=True)
+        return False
+    logger.info(
+        "ffmpeg visualization transcode complete: {} ({} bytes)",
+        output_path,
+        output_path.stat().st_size,
+    )
+    return True