Files
operating-room-monitor-server/backend/app/services/video_batch_cleanup.py
Kevin 09885b4184 实现 video batch 自动清理与按需标注视频,并补充子进程调用测试。
batch 完成后仅保留数据库文本结果,勾选时才生成临时标注视频(24h TTL);新增 FastAPI 到 reference bundle 与 algorithm_runner 的单元测试。

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-21 16:30:48 +08:00

168 lines
5.1 KiB
Python

"""Purge temporary video-batch artifacts; labeled previews live under ``vis/`` with TTL."""
from __future__ import annotations
import shutil
import time
from pathlib import Path
from loguru import logger
VISUALIZATION_FILENAME = "result_vis.mp4"
RAW_VISUALIZATION_FILENAME = "result_vis_source.mp4"
PENDING_INPUT_FILENAME = "input.mp4"
PENDING_RESULT_FILENAME = "result.tsv"
def visualization_output_path(root_dir: Path, surgery_id: str) -> Path:
return root_dir / "vis" / surgery_id / VISUALIZATION_FILENAME
def visualization_pending_dir(root_dir: Path, surgery_id: str) -> Path:
return root_dir / "vis_pending" / surgery_id
def visualization_pending_input_path(root_dir: Path, surgery_id: str) -> Path:
return visualization_pending_dir(root_dir, surgery_id) / PENDING_INPUT_FILENAME
def visualization_pending_result_path(root_dir: Path, surgery_id: str) -> Path:
return visualization_pending_dir(root_dir, surgery_id) / PENDING_RESULT_FILENAME
def stage_visualization_pending(
root_dir: Path,
surgery_id: str,
*,
source_mp4: Path,
result_tsv: Path,
) -> tuple[Path, Path]:
"""Copy pipeline input + TSV so batch cache can be purged before background vis runs."""
pending = visualization_pending_dir(root_dir, surgery_id)
pending.mkdir(parents=True, exist_ok=True)
input_path = visualization_pending_input_path(root_dir, surgery_id)
result_path = visualization_pending_result_path(root_dir, surgery_id)
shutil.copy2(source_mp4, input_path)
shutil.copy2(result_tsv, result_path)
logger.info(
"staged visualization inputs surgery_id={} mp4={} tsv={}",
surgery_id,
input_path,
result_path,
)
return input_path, result_path
def _safe_rmtree(path: Path) -> None:
if not path.exists():
return
try:
shutil.rmtree(path)
except OSError as exc:
logger.warning("failed to remove {}: {}", path, exc)
def _prune_empty_parents(path: Path, *, stop_at: Path) -> None:
current = path
stop_at = stop_at.resolve()
while current != stop_at and current.is_dir():
try:
next(current.iterdir())
except StopIteration:
parent = current.parent
_safe_rmtree(current)
current = parent
continue
except OSError:
break
break
def purge_batch_artifacts(
root_dir: Path,
surgery_id: str,
*,
digest: str,
candidate_key: str,
) -> None:
"""Remove one cache entry plus surgery upload/input copies."""
cache_entry = root_dir / "cache" / surgery_id / digest / candidate_key
_safe_rmtree(cache_entry)
_prune_empty_parents(cache_entry.parent, stop_at=root_dir / "cache")
for rel in (Path(surgery_id) / "upload", Path(surgery_id) / "input"):
_safe_rmtree(root_dir / rel)
logger.info(
"purged video batch artifacts surgery_id={} digest={} candidate_key={}",
surgery_id,
digest[:12],
candidate_key,
)
def purge_surgery_batch_tree(root_dir: Path, surgery_id: str) -> None:
"""Remove leftover ``{surgery_id}/`` tree (upload + input)."""
_safe_rmtree(root_dir / surgery_id)
def purge_visualization_pending(root_dir: Path, surgery_id: str) -> None:
pending = visualization_pending_dir(root_dir, surgery_id)
_safe_rmtree(pending)
_prune_empty_parents(pending.parent, stop_at=root_dir)
def purge_visualization_artifacts(output_dir: Path) -> None:
"""Drop intermediate encode files; keep browser ``result_vis.mp4``."""
for name in (RAW_VISUALIZATION_FILENAME, f"{Path(VISUALIZATION_FILENAME).stem}.part{Path(VISUALIZATION_FILENAME).suffix}"):
path = output_dir / name
if path.is_file():
path.unlink(missing_ok=True)
def purge_expired_visualizations(root_dir: Path, *, ttl_hours: float = 24.0) -> int:
"""Delete ``vis/{surgery_id}/result_vis.mp4`` older than *ttl_hours*."""
vis_root = root_dir / "vis"
if not vis_root.is_dir():
return 0
cutoff = time.time() - float(ttl_hours) * 3600.0
removed = 0
for mp4 in vis_root.rglob(VISUALIZATION_FILENAME):
if not mp4.is_file():
continue
try:
if mp4.stat().st_mtime >= cutoff:
continue
except OSError:
continue
mp4.unlink(missing_ok=True)
purge_visualization_artifacts(mp4.parent)
_prune_empty_parents(mp4.parent, stop_at=root_dir)
removed += 1
logger.info("purged expired visualization {}", mp4)
# Legacy layout from pre-cleanup deployments.
cache_root = root_dir / "cache"
if cache_root.is_dir():
for mp4 in cache_root.rglob(f"output/{VISUALIZATION_FILENAME}"):
if not mp4.is_file():
continue
try:
if mp4.stat().st_mtime >= cutoff:
continue
except OSError:
continue
mp4.unlink(missing_ok=True)
removed += 1
logger.info("purged expired legacy visualization {}", mp4)
if removed:
logger.info("video batch visualization TTL sweep removed {} file(s)", removed)
return removed