fix docker build
This commit is contained in:
@@ -24,3 +24,6 @@ algorithm_subprocesses/**/*.xlsx
|
||||
algorithm_subprocesses/**/*.xls
|
||||
*.md
|
||||
.dockerignore
|
||||
# Offline bake inputs: keep *.pth / *.task; ignore partials and accidental duplicates.
|
||||
weights/**/*.part
|
||||
weights/**/*.pth.*
|
||||
|
||||
@@ -4,11 +4,11 @@
|
||||
#
|
||||
# 5-6 ActionFormer 实时算法(默认开启):
|
||||
# - app/resources/actionformer_epoch_045.pth.tar 必须存在(离线下发,~110MB,未入 git)。
|
||||
# - VideoSwin Swin3D-T 权重在 Docker 构建时预下载到 /app/.cache/torch(见 scripts/bake_torch_hub_checkpoint.py);
|
||||
# 运行时不再访问 pytorch.org。「首次运行」指 torch 缓存为空时才会联网下载;现已改为构建时烘焙进镜像。
|
||||
# 国内 PyPI 镜像(南大/清华/阿里)不同步 /models/*.pth,构建默认先试 uv.agentsmirror.com 再回退官方源。
|
||||
# 离线/弱网:先 wget 权重到 backend/weights/swin3d_t-7615ae03.pth,再 docker compose build api。
|
||||
# 或:docker compose build --build-arg PYTORCH_MODELS_URL=https://your-mirror/.../swin3d_t-7615ae03.pth api
|
||||
# - torchvision 预训练 hub 权重在 Docker 构建时烘焙到 /app/.cache/torch(scripts/bake_pretrained_weights.py):
|
||||
# swin3d_t-7615ae03.pth(VideoSwin)、resnet50-0676ba61.pth(医生 ReID);运行时不再访问 pytorch.org。
|
||||
# 国内 PyPI 镜像不同步 /models/*.pth;构建默认 uv.agentsmirror.com 再回退官方源。
|
||||
# 离线/弱网:将上述文件放入 backend/weights/ 后 docker compose build api(本地有则跳过下载)。
|
||||
# 或:docker compose build --build-arg PYTORCH_MODELS_MIRROR=https://your-mirror/download.pytorch.org api
|
||||
# - Linux GPU 机:镜像内 torch / torchvision / torchaudio 为 cu130 wheel;
|
||||
# 宿主机需 NVIDIA 驱动 + NVIDIA Container Toolkit;`api` 服务已配置 `gpus: all`。
|
||||
# 启动后可验证:docker compose exec api python -c "import torch; print(torch.cuda.is_available())"
|
||||
|
||||
@@ -56,11 +56,20 @@ COPY algorithm_subprocesses ./algorithm_subprocesses/
|
||||
# Bake runtime patches/assets so non-root api never writes the read-only bundle tree.
|
||||
COPY app/algorithm_runner/actionformer_release/libs/utils/nms.py \
|
||||
algorithm_subprocesses/5.15/code/actionformer_release/libs/utils/nms.py
|
||||
RUN mkdir -p algorithm_subprocesses/5.15/doctor_identity_package/.mediapipe_models && \
|
||||
curl -fsSL --retry 3 \
|
||||
-o algorithm_subprocesses/5.15/doctor_identity_package/.mediapipe_models/pose_landmarker_lite.task \
|
||||
"https://storage.googleapis.com/mediapipe-models/pose_landmarker/pose_landmarker_lite/float16/1/pose_landmarker_lite.task" && \
|
||||
test -s algorithm_subprocesses/5.15/doctor_identity_package/.mediapipe_models/pose_landmarker_lite.task
|
||||
# Optional offline assets (torch hub + MediaPipe); see backend/weights/.gitkeep
|
||||
COPY weights ./weights/
|
||||
RUN set -eux; \
|
||||
MP_DEST=algorithm_subprocesses/5.15/doctor_identity_package/.mediapipe_models/pose_landmarker_lite.task; \
|
||||
mkdir -p "$(dirname "$MP_DEST")"; \
|
||||
if [ -s weights/pose_landmarker_lite.task ]; then \
|
||||
cp weights/pose_landmarker_lite.task "$MP_DEST"; \
|
||||
echo "mediapipe pose model from local weights/"; \
|
||||
else \
|
||||
curl -fsSL --retry 3 -o "$MP_DEST" \
|
||||
"https://storage.googleapis.com/mediapipe-models/pose_landmarker/pose_landmarker_lite/float16/1/pose_landmarker_lite.task"; \
|
||||
echo "mediapipe pose model downloaded"; \
|
||||
fi; \
|
||||
test -s "$MP_DEST"
|
||||
|
||||
# uv.lock pins uv.agentsmirror.com artifact URLs. Rewrite to mainland mirrors (same /packages/... paths).
|
||||
# PyPI: Tsinghua | PyTorch wheel index: 南大 (syncs download.pytorch.org / download-r2)
|
||||
@@ -73,29 +82,28 @@ RUN sed -i \
|
||||
|
||||
ENV UV_DEFAULT_INDEX=https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
|
||||
# VideoSwin (Swin3D-T) hub weights: bake at build time so RTSP batch jobs never hit pytorch.org at runtime.
|
||||
# Domestic PyPI mirrors (NJU/Tsinghua/Aliyun) only sync pip wheels, not /models/*.pth; default tries
|
||||
# uv.agentsmirror.com (same ecosystem as uv.lock) then download.pytorch.org. Optional offline bake:
|
||||
# backend/weights/swin3d_t-7615ae03.pth (see weights/.gitkeep)
|
||||
# Override: --build-arg PYTORCH_MODELS_URL=... or PYTORCH_MODELS_MIRROR=...
|
||||
# Torchvision hub weights (Swin3D-T + ResNet50): bake at build so runtime never hits pytorch.org.
|
||||
# Domestic PyPI mirrors do not sync /models/*.pth. Optional offline: backend/weights/*.pth (see .gitkeep).
|
||||
# Override: --build-arg PYTORCH_MODELS_MIRROR=... or PYTORCH_MODELS_URL=... (swin only).
|
||||
ARG PYTORCH_MODELS_MIRROR=
|
||||
ARG PYTORCH_MODELS_URL=
|
||||
ENV PYTORCH_MODELS_MIRROR=${PYTORCH_MODELS_MIRROR} \
|
||||
PYTORCH_MODELS_URL=${PYTORCH_MODELS_URL}
|
||||
# Optional offline weight (only .gitkeep by default; add swin3d_t-7615ae03.pth before build if needed).
|
||||
COPY weights ./weights/
|
||||
|
||||
# Do not cache-mount /app/.cache/torch: BuildKit cache mounts are not exported into the image.
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
uv sync --frozen --no-dev --no-compile --refresh-package numpy --refresh-package mediapipe && \
|
||||
.venv/bin/python -c "import alembic" && \
|
||||
.venv/bin/python -c "import numpy; import numpy.lib._index_tricks_impl" && \
|
||||
.venv/bin/python -c "import mediapipe as mp; print('mediapipe', mp.__version__)" && \
|
||||
mkdir -p /app/.cache/ultralytics && \
|
||||
PYTORCH_MODELS_LOCAL_PATH=/app/weights/swin3d_t-7615ae03.pth \
|
||||
.venv/bin/python scripts/bake_torch_hub_checkpoint.py && \
|
||||
mkdir -p /app/.cache/ultralytics /app/.cache/torch/hub/checkpoints && \
|
||||
PYTORCH_MODELS_LOCAL_DIR=/app/weights \
|
||||
.venv/bin/python scripts/bake_pretrained_weights.py && \
|
||||
test -s /app/.cache/torch/hub/checkpoints/swin3d_t-7615ae03.pth && \
|
||||
test -s /app/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth && \
|
||||
rm -rf /app/weights && \
|
||||
TORCH_HOME=/app/.cache/torch .venv/bin/python -c "from torchvision.models.video import Swin3D_T_Weights, swin3d_t; swin3d_t(weights=Swin3D_T_Weights.KINETICS400_V1); print('swin3d_t cached ok')" && \
|
||||
chmod -R a+rX /app/.venv /app/algorithm_subprocesses /app/.cache/torch /app/.cache/ultralytics
|
||||
chmod -R a+rX /app/.venv /app/algorithm_subprocesses && \
|
||||
chmod -R a+rwX /app/.cache/torch /app/.cache/ultralytics
|
||||
|
||||
ENV PATH="/app/.venv/bin:$PATH"
|
||||
|
||||
|
||||
212
backend/scripts/bake_pretrained_weights.py
Normal file
212
backend/scripts/bake_pretrained_weights.py
Normal file
@@ -0,0 +1,212 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Bake torchvision / torch.hub checkpoints into TORCH_HOME during Docker image build.
|
||||
|
||||
Runtime batch jobs and doctor identity must not download from pytorch.org when the
|
||||
api container runs as a non-root compose user (read-only /app/.cache/torch).
|
||||
|
||||
Place optional offline copies under backend/weights/ before build:
|
||||
- swin3d_t-7615ae03.pth (VideoSwin / Swin3D-T, ~110MB)
|
||||
- resnet50-0676ba61.pth (doctor ReID backbone, ~98MB)
|
||||
|
||||
Override mirrors: PYTORCH_MODELS_MIRROR, PYTORCH_MODELS_URL (per-file URL not supported).
|
||||
Legacy single-file: PYTORCH_MODELS_LOCAL_PATH (applies only to swin3d_t).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
OFFICIAL_PREFIX = "https://download.pytorch.org/models"
|
||||
AGENTSMIRROR_PREFIX = "https://uv.agentsmirror.com/download.pytorch.org/models"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class HubCheckpoint:
|
||||
filename: str
|
||||
min_bytes: int
|
||||
label: str
|
||||
|
||||
|
||||
# All torchvision hub files downloaded at runtime by production paths (as of 5.15 bundle).
|
||||
HUB_CHECKPOINTS: tuple[HubCheckpoint, ...] = (
|
||||
HubCheckpoint(
|
||||
filename="swin3d_t-7615ae03.pth",
|
||||
min_bytes=100_000_000,
|
||||
label="VideoSwin Swin3D-T (Kinetics-400)",
|
||||
),
|
||||
HubCheckpoint(
|
||||
filename="resnet50-0676ba61.pth",
|
||||
min_bytes=90_000_000,
|
||||
label="doctor ReID ResNet50 (ImageNet-1K)",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _candidate_urls(filename: str) -> list[str]:
|
||||
explicit = (os.environ.get("PYTORCH_MODELS_URL") or "").strip()
|
||||
# Legacy single-URL override applied only to VideoSwin (other files use mirror list).
|
||||
if explicit and filename == "swin3d_t-7615ae03.pth":
|
||||
return [explicit]
|
||||
|
||||
raw = (os.environ.get("PYTORCH_MODELS_MIRROR") or "").strip().rstrip("/")
|
||||
prefixes = [p for p in (raw, AGENTSMIRROR_PREFIX) if p]
|
||||
urls: list[str] = []
|
||||
for prefix in prefixes:
|
||||
urls.append(f"{prefix}/models/{filename}")
|
||||
urls.append(f"{OFFICIAL_PREFIX}/{filename}")
|
||||
|
||||
seen: set[str] = set()
|
||||
ordered: list[str] = []
|
||||
for url in urls:
|
||||
if url in seen:
|
||||
continue
|
||||
seen.add(url)
|
||||
ordered.append(url)
|
||||
return ordered
|
||||
|
||||
|
||||
def _local_source(filename: str) -> Path | None:
|
||||
legacy = (os.environ.get("PYTORCH_MODELS_LOCAL_PATH") or "").strip()
|
||||
if legacy and filename == "swin3d_t-7615ae03.pth":
|
||||
path = Path(legacy)
|
||||
if path.is_file():
|
||||
return path
|
||||
|
||||
local_dir = (os.environ.get("PYTORCH_MODELS_LOCAL_DIR") or "").strip()
|
||||
if local_dir:
|
||||
path = Path(local_dir) / filename
|
||||
if path.is_file():
|
||||
return path
|
||||
return None
|
||||
|
||||
|
||||
def _copy_local(src: Path, dest: Path, *, min_bytes: int) -> None:
|
||||
size = src.stat().st_size
|
||||
if size < min_bytes:
|
||||
raise OSError(f"local file too small ({size} bytes): {src}")
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(src, dest)
|
||||
|
||||
|
||||
def _download_with_curl(url: str, dest: Path, *, min_bytes: int) -> None:
|
||||
curl = shutil.which("curl")
|
||||
if curl is None:
|
||||
raise RuntimeError("curl not found")
|
||||
tmp = dest.with_suffix(dest.suffix + ".part")
|
||||
tmp.unlink(missing_ok=True)
|
||||
proc = subprocess.run(
|
||||
[
|
||||
curl,
|
||||
"-fL",
|
||||
"--retry",
|
||||
"5",
|
||||
"--retry-all-errors",
|
||||
"--retry-delay",
|
||||
"3",
|
||||
"--connect-timeout",
|
||||
"30",
|
||||
"--max-time",
|
||||
"1800",
|
||||
"-o",
|
||||
str(tmp),
|
||||
url,
|
||||
],
|
||||
check=False,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
if proc.returncode != 0:
|
||||
tmp.unlink(missing_ok=True)
|
||||
detail = (proc.stderr or proc.stdout or "").strip() or f"exit={proc.returncode}"
|
||||
raise RuntimeError(detail)
|
||||
size = tmp.stat().st_size
|
||||
if size < min_bytes:
|
||||
tmp.unlink(missing_ok=True)
|
||||
raise OSError(f"download too small ({size} bytes)")
|
||||
tmp.replace(dest)
|
||||
|
||||
|
||||
def _download_with_torch(url: str, dest: Path, *, min_bytes: int) -> None:
|
||||
import torch
|
||||
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
torch.hub.download_url_to_file(url, str(dest), progress=True)
|
||||
size = dest.stat().st_size
|
||||
if size < min_bytes:
|
||||
dest.unlink(missing_ok=True)
|
||||
raise OSError(f"download too small ({size} bytes)")
|
||||
|
||||
|
||||
def bake_hub_checkpoint(
|
||||
spec: HubCheckpoint,
|
||||
*,
|
||||
torch_home: Path,
|
||||
) -> None:
|
||||
dest = torch_home / "hub" / "checkpoints" / spec.filename
|
||||
if dest.is_file() and dest.stat().st_size >= spec.min_bytes:
|
||||
print(f"already baked [{spec.label}]: {dest} ({dest.stat().st_size} bytes)")
|
||||
return
|
||||
|
||||
local = _local_source(spec.filename)
|
||||
if local is not None:
|
||||
try:
|
||||
_copy_local(local, dest, min_bytes=spec.min_bytes)
|
||||
print(f"baked [{spec.label}] {dest} ({dest.stat().st_size} bytes) from local {local}")
|
||||
return
|
||||
except OSError as exc:
|
||||
print(f"local copy failed for {spec.filename}: {exc}", file=sys.stderr)
|
||||
|
||||
errors: list[str] = []
|
||||
for url in _candidate_urls(spec.filename):
|
||||
try:
|
||||
print(f"downloading [{spec.label}] {url}")
|
||||
_download_with_curl(url, dest, min_bytes=spec.min_bytes)
|
||||
print(f"baked [{spec.label}] {dest} ({dest.stat().st_size} bytes) from {url}")
|
||||
return
|
||||
except (OSError, RuntimeError) as exc:
|
||||
errors.append(f"curl {url}: {exc}")
|
||||
dest.unlink(missing_ok=True)
|
||||
|
||||
official = f"{OFFICIAL_PREFIX}/{spec.filename}"
|
||||
try:
|
||||
print(f"torch.hub fallback [{spec.label}]: {official}")
|
||||
_download_with_torch(official, dest, min_bytes=spec.min_bytes)
|
||||
print(f"baked [{spec.label}] {dest} ({dest.stat().st_size} bytes) from {official}")
|
||||
return
|
||||
except (OSError, RuntimeError) as exc:
|
||||
errors.append(f"torch {official}: {exc}")
|
||||
dest.unlink(missing_ok=True)
|
||||
|
||||
print(f"failed to bake {spec.label} ({spec.filename}):", file=sys.stderr)
|
||||
for line in errors:
|
||||
print(f" - {line}", file=sys.stderr)
|
||||
raise SystemExit(1)
|
||||
|
||||
|
||||
def warm_torchvision_hub_models() -> None:
|
||||
"""Load models so torchvision verifies hub checkpoints (no network if baked)."""
|
||||
from torchvision.models import ResNet50_Weights, resnet50
|
||||
from torchvision.models.video import Swin3D_T_Weights, swin3d_t
|
||||
|
||||
resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)
|
||||
print("resnet50 IMAGENET1K_V1 ok")
|
||||
swin3d_t(weights=Swin3D_T_Weights.KINETICS400_V1)
|
||||
print("swin3d_t KINETICS400_V1 ok")
|
||||
|
||||
|
||||
def main() -> int:
|
||||
torch_home = Path(os.environ.get("TORCH_HOME", "/app/.cache/torch"))
|
||||
for spec in HUB_CHECKPOINTS:
|
||||
bake_hub_checkpoint(spec, torch_home=torch_home)
|
||||
warm_torchvision_hub_models()
|
||||
print("all pretrained hub weights baked")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -1,150 +1,11 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Bake torchvision hub checkpoints into TORCH_HOME during Docker image build."""
|
||||
"""Backward-compatible entrypoint; bakes all hub weights (see bake_pretrained_weights.py)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import runpy
|
||||
from pathlib import Path
|
||||
|
||||
CHECKPOINT = "swin3d_t-7615ae03.pth"
|
||||
OFFICIAL_URL = f"https://download.pytorch.org/models/{CHECKPOINT}"
|
||||
AGENTSMIRROR_URL = f"https://uv.agentsmirror.com/download.pytorch.org/models/{CHECKPOINT}"
|
||||
MIN_BYTES = 100_000_000
|
||||
|
||||
|
||||
def _candidate_urls() -> list[str]:
|
||||
explicit = (os.environ.get("PYTORCH_MODELS_URL") or "").strip()
|
||||
if explicit:
|
||||
return [explicit]
|
||||
|
||||
raw = (os.environ.get("PYTORCH_MODELS_MIRROR") or "").strip().rstrip("/")
|
||||
prefixes = [p for p in (raw, "https://uv.agentsmirror.com/download.pytorch.org") if p]
|
||||
urls: list[str] = []
|
||||
for prefix in prefixes:
|
||||
urls.append(f"{prefix}/models/{CHECKPOINT}")
|
||||
urls.extend([AGENTSMIRROR_URL, OFFICIAL_URL])
|
||||
|
||||
seen: set[str] = set()
|
||||
ordered: list[str] = []
|
||||
for url in urls:
|
||||
if url in seen:
|
||||
continue
|
||||
seen.add(url)
|
||||
ordered.append(url)
|
||||
return ordered
|
||||
|
||||
|
||||
def _copy_local(src: Path, dest: Path) -> None:
|
||||
size = src.stat().st_size
|
||||
if size < MIN_BYTES:
|
||||
raise OSError(f"local file too small ({size} bytes): {src}")
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(src, dest)
|
||||
|
||||
|
||||
def _download_with_curl(url: str, dest: Path) -> None:
|
||||
curl = shutil.which("curl")
|
||||
if curl is None:
|
||||
raise RuntimeError("curl not found")
|
||||
tmp = dest.with_suffix(dest.suffix + ".part")
|
||||
tmp.unlink(missing_ok=True)
|
||||
proc = subprocess.run(
|
||||
[
|
||||
curl,
|
||||
"-fL",
|
||||
"--retry",
|
||||
"5",
|
||||
"--retry-all-errors",
|
||||
"--retry-delay",
|
||||
"3",
|
||||
"--connect-timeout",
|
||||
"30",
|
||||
"--max-time",
|
||||
"1800",
|
||||
"-o",
|
||||
str(tmp),
|
||||
url,
|
||||
],
|
||||
check=False,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
if proc.returncode != 0:
|
||||
tmp.unlink(missing_ok=True)
|
||||
detail = (proc.stderr or proc.stdout or "").strip() or f"exit={proc.returncode}"
|
||||
raise RuntimeError(detail)
|
||||
size = tmp.stat().st_size
|
||||
if size < MIN_BYTES:
|
||||
tmp.unlink(missing_ok=True)
|
||||
raise OSError(f"download too small ({size} bytes)")
|
||||
tmp.replace(dest)
|
||||
|
||||
|
||||
def _download_with_torch(url: str, dest: Path) -> None:
|
||||
import torch
|
||||
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
torch.hub.download_url_to_file(url, str(dest), progress=True)
|
||||
size = dest.stat().st_size
|
||||
if size < MIN_BYTES:
|
||||
dest.unlink(missing_ok=True)
|
||||
raise OSError(f"download too small ({size} bytes)")
|
||||
|
||||
|
||||
def main() -> int:
|
||||
torch_home = Path(os.environ.get("TORCH_HOME", "/app/.cache/torch"))
|
||||
dest = torch_home / "hub" / "checkpoints" / CHECKPOINT
|
||||
if dest.is_file() and dest.stat().st_size >= MIN_BYTES:
|
||||
print(f"already baked: {dest} ({dest.stat().st_size} bytes)")
|
||||
return 0
|
||||
|
||||
local_raw = (os.environ.get("PYTORCH_MODELS_LOCAL_PATH") or "").strip()
|
||||
if local_raw:
|
||||
local = Path(local_raw)
|
||||
if local.is_file():
|
||||
try:
|
||||
_copy_local(local, dest)
|
||||
print(f"baked {dest} ({dest.stat().st_size} bytes) from local {local}")
|
||||
return 0
|
||||
except OSError as exc:
|
||||
print(f"local copy failed: {exc}", file=sys.stderr)
|
||||
|
||||
errors: list[str] = []
|
||||
for url in _candidate_urls():
|
||||
try:
|
||||
print(f"downloading {url}")
|
||||
_download_with_curl(url, dest)
|
||||
print(f"baked {dest} ({dest.stat().st_size} bytes) from {url}")
|
||||
return 0
|
||||
except (OSError, RuntimeError) as exc:
|
||||
errors.append(f"curl {url}: {exc}")
|
||||
dest.unlink(missing_ok=True)
|
||||
|
||||
for url in (OFFICIAL_URL,):
|
||||
try:
|
||||
print(f"torch.hub fallback: {url}")
|
||||
_download_with_torch(url, dest)
|
||||
print(f"baked {dest} ({dest.stat().st_size} bytes) from {url}")
|
||||
return 0
|
||||
except (OSError, RuntimeError) as exc:
|
||||
errors.append(f"torch {url}: {exc}")
|
||||
dest.unlink(missing_ok=True)
|
||||
|
||||
print("failed to bake VideoSwin checkpoint:", file=sys.stderr)
|
||||
for line in errors:
|
||||
print(f" - {line}", file=sys.stderr)
|
||||
print(
|
||||
"hint: domestic PyPI mirrors (NJU/Tsinghua/Aliyun) do not sync /models/*.pth; "
|
||||
"pre-download once and set PYTORCH_MODELS_LOCAL_PATH, or pass "
|
||||
"PYTORCH_MODELS_URL / --build-arg PYTORCH_MODELS_MIRROR to a mirror that hosts "
|
||||
f"/models/{CHECKPOINT}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
target = Path(__file__).resolve().parent / "bake_pretrained_weights.py"
|
||||
runpy.run_path(str(target), run_name="__main__")
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
#!/usr/bin/env bash
|
||||
# Clear BuildKit cache and rebuild the API image (fixes corrupted layer / unpigz errors).
|
||||
# Clear BuildKit cache and rebuild the API image.
|
||||
# Fixes export errors such as:
|
||||
# archive/tar: invalid tar header
|
||||
# unpigz: corrupted -- invalid deflate data
|
||||
set -euo pipefail
|
||||
cd "$(dirname "$0")/.."
|
||||
|
||||
@@ -8,12 +11,20 @@ docker builder prune -af
|
||||
docker buildx prune -af 2>/dev/null || true
|
||||
docker rmi -f backend-api:latest 2>/dev/null || true
|
||||
|
||||
if [[ "${AGGRESSIVE_PRUNE:-0}" == "1" ]]; then
|
||||
echo "Aggressive prune (dangling images + build cache)..."
|
||||
docker system prune -af 2>/dev/null || true
|
||||
fi
|
||||
|
||||
if [[ "${RESTART_DOCKER:-0}" == "1" ]]; then
|
||||
echo "Restarting Docker..."
|
||||
sudo systemctl restart docker
|
||||
sleep 2
|
||||
fi
|
||||
|
||||
echo "Building api image (--no-cache)..."
|
||||
docker compose build api --no-cache
|
||||
echo "Building api image (--no-cache, no attestations)..."
|
||||
export DOCKER_BUILDKIT=1
|
||||
export COMPOSE_BAKE=false
|
||||
docker compose build api --no-cache --provenance=false --sbom=false
|
||||
|
||||
echo "Done. Recreate container: docker compose up -d --force-recreate api"
|
||||
|
||||
67
backend/tests/test_bake_pretrained_weights.py
Normal file
67
backend/tests/test_bake_pretrained_weights.py
Normal file
@@ -0,0 +1,67 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib.util
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def _load_module():
|
||||
import sys
|
||||
|
||||
path = Path(__file__).resolve().parents[1] / "scripts" / "bake_pretrained_weights.py"
|
||||
spec = importlib.util.spec_from_file_location("bake_pretrained_weights", path)
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
assert spec.loader is not None
|
||||
sys.modules[spec.name] = mod
|
||||
spec.loader.exec_module(mod)
|
||||
return mod
|
||||
|
||||
|
||||
def test_candidate_urls_default(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
mod = _load_module()
|
||||
monkeypatch.delenv("PYTORCH_MODELS_URL", raising=False)
|
||||
monkeypatch.delenv("PYTORCH_MODELS_MIRROR", raising=False)
|
||||
urls = mod._candidate_urls("resnet50-0676ba61.pth")
|
||||
assert urls[0].endswith("/models/resnet50-0676ba61.pth")
|
||||
assert urls[-1] == f"{mod.OFFICIAL_PREFIX}/resnet50-0676ba61.pth"
|
||||
|
||||
|
||||
def test_candidate_urls_explicit_override_applies_to_swin_only(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
mod = _load_module()
|
||||
monkeypatch.setenv("PYTORCH_MODELS_URL", "https://example.com/swin3d_t-7615ae03.pth")
|
||||
assert mod._candidate_urls("swin3d_t-7615ae03.pth") == [
|
||||
"https://example.com/swin3d_t-7615ae03.pth"
|
||||
]
|
||||
assert "example.com" not in mod._candidate_urls("resnet50-0676ba61.pth")[0]
|
||||
|
||||
|
||||
def test_local_source_prefers_legacy_swin_path(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
|
||||
mod = _load_module()
|
||||
swin = tmp_path / "swin3d_t-7615ae03.pth"
|
||||
swin.write_bytes(b"x" * mod.HUB_CHECKPOINTS[0].min_bytes)
|
||||
monkeypatch.setenv("PYTORCH_MODELS_LOCAL_PATH", str(swin))
|
||||
assert mod._local_source("swin3d_t-7615ae03.pth") == swin
|
||||
assert mod._local_source("resnet50-0676ba61.pth") is None
|
||||
|
||||
|
||||
def test_local_source_uses_local_dir(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
|
||||
mod = _load_module()
|
||||
weights = tmp_path / "weights"
|
||||
weights.mkdir()
|
||||
resnet = weights / "resnet50-0676ba61.pth"
|
||||
resnet.write_bytes(b"x")
|
||||
monkeypatch.setenv("PYTORCH_MODELS_LOCAL_DIR", str(weights))
|
||||
assert mod._local_source("resnet50-0676ba61.pth") == resnet
|
||||
|
||||
|
||||
def test_bake_skips_when_dest_already_valid(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
mod = _load_module()
|
||||
monkeypatch.setenv("TORCH_HOME", str(tmp_path / "torch"))
|
||||
spec = mod.HUB_CHECKPOINTS[1]
|
||||
dest = tmp_path / "torch" / "hub" / "checkpoints" / spec.filename
|
||||
dest.parent.mkdir(parents=True)
|
||||
dest.write_bytes(b"x" * spec.min_bytes)
|
||||
mod.bake_hub_checkpoint(spec, torch_home=tmp_path / "torch")
|
||||
@@ -1,28 +1,13 @@
|
||||
"""Legacy entrypoint must delegate to bake_pretrained_weights."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib.util
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _load_module():
|
||||
path = Path(__file__).resolve().parents[1] / "scripts" / "bake_torch_hub_checkpoint.py"
|
||||
spec = importlib.util.spec_from_file_location("bake_torch_hub_checkpoint", path)
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
assert spec.loader is not None
|
||||
spec.loader.exec_module(mod)
|
||||
return mod
|
||||
|
||||
|
||||
def test_candidate_urls_default(monkeypatch):
|
||||
mod = _load_module()
|
||||
monkeypatch.delenv("PYTORCH_MODELS_URL", raising=False)
|
||||
monkeypatch.delenv("PYTORCH_MODELS_MIRROR", raising=False)
|
||||
urls = mod._candidate_urls()
|
||||
assert urls[0].startswith("https://uv.agentsmirror.com/download.pytorch.org/models/")
|
||||
assert urls[-1] == mod.OFFICIAL_URL
|
||||
|
||||
|
||||
def test_candidate_urls_explicit_override(monkeypatch):
|
||||
mod = _load_module()
|
||||
monkeypatch.setenv("PYTORCH_MODELS_URL", "https://example.com/swin3d_t-7615ae03.pth")
|
||||
assert mod._candidate_urls() == ["https://example.com/swin3d_t-7615ae03.pth"]
|
||||
def test_bake_torch_hub_checkpoint_delegates() -> None:
|
||||
text = (
|
||||
Path(__file__).resolve().parents[1] / "scripts" / "bake_torch_hub_checkpoint.py"
|
||||
).read_text(encoding="utf-8")
|
||||
assert "bake_pretrained_weights.py" in text
|
||||
assert "runpy.run_path" in text
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
# Optional offline assets for Docker build (see scripts/bake_pretrained_weights.py).
|
||||
# If a file exists here, image build copies it instead of downloading.
|
||||
#
|
||||
# Torchvision hub (→ /app/.cache/torch/hub/checkpoints/):
|
||||
# swin3d_t-7615ae03.pth (~110MB) VideoSwin / batch Phase1
|
||||
# resnet50-0676ba61.pth (~98MB) doctor ReID backbone (ImageNet)
|
||||
#
|
||||
# MediaPipe (→ algorithm_subprocesses/5.15/doctor_identity_package/.mediapipe_models/):
|
||||
# pose_landmarker_lite.task (~6MB)
|
||||
#
|
||||
# wget https://download.pytorch.org/models/swin3d_t-7615ae03.pth
|
||||
# wget https://download.pytorch.org/models/resnet50-0676ba61.pth
|
||||
# curl -fsSL -o pose_landmarker_lite.task \
|
||||
# "https://storage.googleapis.com/mediapipe-models/pose_landmarker/pose_landmarker_lite/float16/1/pose_landmarker_lite.task"
|
||||
#
|
||||
# Bundled in image via COPY (not downloaded at build):
|
||||
# algorithm_subprocesses/5.15/weights/*.pt, actionformer_epoch_045.pth.tar
|
||||
# doctor_identity_package/doctor_info.pth
|
||||
|
||||
BIN
backend/weights/pose_landmarker_lite.task
Normal file
BIN
backend/weights/pose_landmarker_lite.task
Normal file
Binary file not shown.
BIN
backend/weights/resnet50-0676ba61.pth
Normal file
BIN
backend/weights/resnet50-0676ba61.pth
Normal file
Binary file not shown.
BIN
backend/weights/swin3d_t-7615ae03.pth.1
Normal file
BIN
backend/weights/swin3d_t-7615ae03.pth.1
Normal file
Binary file not shown.
@@ -28,7 +28,12 @@ operation-room-monitor/
|
||||
- 算法子进程包:`backend/algorithm_subprocesses/5.15/`(含 `main.py` 与 `weights/`;镜像构建时会 `COPY` 进容器,勿在 `.dockerignore` 中整目录排除)
|
||||
- 标注视频中文字体:镜像内已安装 `fonts-noto-cjk`、`fonts-wqy-microhei`(供 `visualize_result_video.py` 绘制耗材标签)
|
||||
- 医生识别(MediaPipe Pose):镜像内已安装 `libgles2`、`libegl1`、`libegl-mesa0`、`libglx-mesa0`、`libgl1-mesa-dri` 等 Mesa/GLVND 库;构建阶段会 `import mediapipe` 校验 `libGLESv2.so.2` 可用。子进程强制 CPU delegate。若仍见该错误,请 **`docker compose build --no-cache api`** 后重启(勿沿用旧 tarball 镜像)
|
||||
- 可选备用权重:`backend/app/resources/actionformer_epoch_045.pth.tar`
|
||||
- **构建时预下载的预训练资源**(`scripts/bake_pretrained_weights.py`,本地有则跳过,见 `backend/weights/.gitkeep`):
|
||||
- `swin3d_t-7615ae03.pth`(VideoSwin / batch Phase1)
|
||||
- `resnet50-0676ba61.pth`(医生 ReID 骨干)
|
||||
- `pose_landmarker_lite.task`(MediaPipe Pose)
|
||||
- 业务权重随 `algorithm_subprocesses/5.15/weights/` 打进镜像(YOLO / ActionFormer 等,非 hub 下载)
|
||||
- 可选备用:`backend/app/resources/actionformer_epoch_045.pth.tar`
|
||||
|
||||
---
|
||||
|
||||
@@ -58,31 +63,6 @@ docker compose down
|
||||
docker compose down -v # 删除 PostgreSQL / MinIO 卷
|
||||
```
|
||||
|
||||
### 构建 API 镜像失败:`invalid tar header` / `unpigz: corrupted`
|
||||
|
||||
`uv sync` 已成功,但在 **exporting / unpacking** 阶段报错时,通常是 **Docker 本地层缓存或存储损坏**,与 Dockerfile 无关。
|
||||
|
||||
按顺序处理:
|
||||
|
||||
```bash
|
||||
cd backend
|
||||
chmod +x scripts/rebuild-api-image.sh
|
||||
|
||||
# 清缓存并重建(推荐)
|
||||
./scripts/rebuild-api-image.sh
|
||||
|
||||
# 仍失败时:重启 Docker 后再跑
|
||||
RESTART_DOCKER=1 ./scripts/rebuild-api-image.sh
|
||||
|
||||
# 再失败:改用旧版构建器(无 BuildKit)
|
||||
COMPOSE_DOCKER_CLI_BUILD=0 DOCKER_BUILDKIT=0 docker compose build api --no-cache
|
||||
docker compose up -d --force-recreate api
|
||||
```
|
||||
|
||||
手动等价步骤:`docker builder prune -af` → `docker rmi -f backend-api:latest` → `docker compose build api --no-cache`。
|
||||
|
||||
确认根分区剩余空间充足(建议 ≥ 20GB);空间不足时大层导出也容易损坏。
|
||||
|
||||
### RTSP 切片在宿主机无法用 VLC 打开
|
||||
|
||||
默认情况下 API 容器以 **root** 写入 `./logs`,切片属主为 `root:root`。普通用户虽可用 `cat` 读取,但 **Snap 版 VLC** 等沙箱应用常会报 Permission denied。
|
||||
|
||||
Reference in New Issue
Block a user