diff --git a/backend/Dockerfile b/backend/Dockerfile index c2a7205..fd1147c 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -82,16 +82,18 @@ ARG PYTORCH_MODELS_MIRROR= ARG PYTORCH_MODELS_URL= ENV PYTORCH_MODELS_MIRROR=${PYTORCH_MODELS_MIRROR} \ PYTORCH_MODELS_URL=${PYTORCH_MODELS_URL} +# Optional offline weight (only .gitkeep by default; add swin3d_t-7615ae03.pth before build if needed). +COPY weights ./weights/ RUN --mount=type=cache,target=/root/.cache/uv \ - --mount=type=bind,source=weights/swin3d_t-7615ae03.pth,target=/tmp/prebaked/swin3d_t-7615ae03.pth,readonly,required=false \ uv sync --frozen --no-dev --no-compile --refresh-package numpy --refresh-package mediapipe && \ .venv/bin/python -c "import alembic" && \ .venv/bin/python -c "import numpy; import numpy.lib._index_tricks_impl" && \ .venv/bin/python -c "import mediapipe as mp; print('mediapipe', mp.__version__)" && \ mkdir -p /app/.cache/ultralytics && \ - PYTORCH_MODELS_LOCAL_PATH=/tmp/prebaked/swin3d_t-7615ae03.pth \ + PYTORCH_MODELS_LOCAL_PATH=/app/weights/swin3d_t-7615ae03.pth \ .venv/bin/python scripts/bake_torch_hub_checkpoint.py && \ + rm -rf /app/weights && \ TORCH_HOME=/app/.cache/torch .venv/bin/python -c "from torchvision.models.video import Swin3D_T_Weights, swin3d_t; swin3d_t(weights=Swin3D_T_Weights.KINETICS400_V1); print('swin3d_t cached ok')" && \ chmod -R a+rX /app/.venv /app/algorithm_subprocesses /app/.cache/torch /app/.cache/ultralytics diff --git a/backend/algorithm_subprocesses/5.15/code/video_clip_cls/scripts/run_haocai_actionformer_consumables_e2e.py b/backend/algorithm_subprocesses/5.15/code/video_clip_cls/scripts/run_haocai_actionformer_consumables_e2e.py index aa3c68a..33ba636 100755 --- a/backend/algorithm_subprocesses/5.15/code/video_clip_cls/scripts/run_haocai_actionformer_consumables_e2e.py +++ b/backend/algorithm_subprocesses/5.15/code/video_clip_cls/scripts/run_haocai_actionformer_consumables_e2e.py @@ -296,6 +296,12 @@ def run_actionformer_eval( src_pkl = ckpt_path.parent / "eval_results.pkl" if not src_pkl.is_file(): raise FileNotFoundError(f"未找到输出: {src_pkl}") + try: + if src_pkl.samefile(copy_pkl_to): + log(f"eval_results.pkl 已在目标路径: {copy_pkl_to}") + return + except OSError: + pass shutil.copy2(src_pkl, copy_pkl_to) log(f"已复制 eval_results.pkl -> {copy_pkl_to}") diff --git a/backend/algorithm_subprocesses/5.15/src/actionformer_utils.py b/backend/algorithm_subprocesses/5.15/src/actionformer_utils.py index 04425f4..b4b2ee0 100755 --- a/backend/algorithm_subprocesses/5.15/src/actionformer_utils.py +++ b/backend/algorithm_subprocesses/5.15/src/actionformer_utils.py @@ -70,10 +70,12 @@ class ActionSegmenter: e2e.write_infer_yaml(yaml_path, json_path.resolve(), feat_dir.resolve()) pkl_dest = work / "eval_results.pkl" + # Do not resolve(): staged ckpt may symlink into bundle weights; eval.py + # writes eval_results.pkl beside ckpt and needs a writable parent dir. e2e.run_actionformer_eval( python_exe=python_exe, yaml_path=yaml_path.resolve(), - ckpt_path=actionformer_ckpt.resolve(), + ckpt_path=actionformer_ckpt, copy_pkl_to=pkl_dest, ) diff --git a/backend/tests/test_algo_host_batch.py b/backend/tests/test_algo_host_batch.py index bec7d21..1085d94 100644 --- a/backend/tests/test_algo_host_batch.py +++ b/backend/tests/test_algo_host_batch.py @@ -104,6 +104,25 @@ def test_stage_actionformer_checkpoint_survives_bundle_config_resolve(tmp_path: assert loaded.name == source.name +def test_staged_actionformer_ckpt_must_not_resolve_before_eval(tmp_path: Path) -> None: + """eval.py writes eval_results.pkl next to ckpt; resolve() breaks staged symlinks.""" + bundle = tmp_path / "bundle" + write_minimal_reference_bundle(bundle) + work_dir = tmp_path / "work" + work_dir.mkdir() + source = bundle / "weights" / "actionformer_epoch_045.pth.tar" + staged = stage_actionformer_checkpoint(bundle_dir=bundle, work_dir=work_dir) + assert staged.is_symlink() or staged.parent == work_dir + + def eval_pkl_parent(ckpt: Path) -> Path: + return ckpt.parent # mirrors eval.py: dirname(ckpt) / eval_results.pkl + + assert eval_pkl_parent(staged.resolve()) == source.parent + assert eval_pkl_parent(staged) == work_dir + # actionformer_utils uses the same path for copy_pkl_to; run_actionformer_eval must skip copy2. + assert eval_pkl_parent(staged) / "eval_results.pkl" == work_dir / "eval_results.pkl" + + def test_build_reference_env_sets_container_safe_defaults(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.delenv("TORCHINDUCTOR_CACHE_DIR", raising=False) monkeypatch.delenv("USER", raising=False) diff --git a/backend/weights/swin3d_t-7615ae03.pth b/backend/weights/swin3d_t-7615ae03.pth index 5482b51..ea9ba20 100644 Binary files a/backend/weights/swin3d_t-7615ae03.pth and b/backend/weights/swin3d_t-7615ae03.pth differ