实现 video batch 自动清理与按需标注视频,并补充子进程调用测试。

batch 完成后仅保留数据库文本结果,勾选时才生成临时标注视频(24h TTL);新增 FastAPI 到 reference bundle 与 algorithm_runner 的单元测试。

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Kevin
2026-05-21 16:30:48 +08:00
parent e6434d0bb6
commit 09885b4184
10 changed files with 2047 additions and 1354 deletions

View File

@@ -40,3 +40,9 @@ VOICE_FILE_LOG_PATH: str = "logs/voice_{surgery_id}.txt"
VOICE_UPLOAD_MAX_BYTES: int = 10 * 1024 * 1024
VOICE_CONFIRM_MAX_FAILED_PARSE_ROUNDS: int = 2
# --- 非实时 batch 标注视频临时保留(小时)---
VIDEO_BATCH_VIS_TTL_HOURS: int = 24
# --- 非实时 batch 标注视频临时保留 ---
VIDEO_BATCH_VIS_TTL_HOURS: int = 24

View File

@@ -21,7 +21,14 @@ from app.schemas import SurgeryApiResponse, SurgeryStartRequest
from app.or_site_config import merge_video_rtsp_urls_into_file
from app.services.synthetic_rtsp import StreamSpec, SyntheticRtspManager
from app.services.surgery_pipeline import SurgeryPipeline
from app.services.video_batch_runner import VideoBatchRunner, VideoBatchRunResult
from app.baked import pipeline as bp
from app.services.video_batch_cleanup import (
purge_batch_artifacts,
purge_expired_visualizations,
purge_surgery_batch_tree,
stage_visualization_pending,
)
from app.services.video_batch_runner import VideoBatchRunner
from app.services.voice_terminal_hub import (
VoiceTerminalHub,
assign_voice_terminal_after_recording_started,
@@ -31,25 +38,29 @@ from app.surgery_errors import SurgeryPipelineError
router = APIRouter(prefix="/internal/demo", tags=["demo"])
# Bumped when video-batch flow changes; grep this string in logs after restart to confirm new code.
VIDEO_BATCH_FLOW_MARKER = "early-save+background-vis-v3"
VIDEO_BATCH_FLOW_MARKER = "purge-all+opt-in-vis-v4"
def _background_finalize_visualization(
runner: VideoBatchRunner,
result: VideoBatchRunResult,
surgery_id: str,
) -> None:
try:
runner.finalize_visualization(result, surgery_id=surgery_id)
runner.finalize_visualization(surgery_id=surgery_id)
except Exception:
logger.exception("video batch background visualization failed surgery_id={}", surgery_id)
finally:
purge_expired_visualizations(
runner.root_dir,
ttl_hours=float(bp.VIDEO_BATCH_VIS_TTL_HOURS),
)
class VideoBatchSurgeryResponse(BaseModel):
surgery_id: str
status: str
message: str
visualization_url: str
visualization_url: str | None = None
doctor_name: str | None = None
doctor_id: str | None = None
doctor_display: str | None = None
@@ -72,7 +83,7 @@ def _orchestrate_write_rtsp_host() -> str:
summary="非实时精确模式:上传单路 MP4 并跑配置引用包 batch",
description=(
"仅当 DEMO_ORCHESTRATOR_ENABLED=true。保存上传视频调用配置算法子进程包 main.py默认 algorithm_subprocesses/5.15"
"解析 TSV 后写入最终结果,并调用 visualize_result_video.py 生成带标签视频。"
"解析 TSV 后写入最终结果;可选 include_visualization 生成临时标注视频。"
),
)
async def video_batch_surgery(
@@ -80,6 +91,7 @@ async def video_batch_surgery(
surgery_id: Annotated[str, Form()],
video1: Annotated[UploadFile, File(description="单路完整 MP4")],
candidate_consumables_json: Annotated[str, Form()] = "[]",
include_visualization: Annotated[bool, Form()] = False,
pipeline: SurgeryPipeline = Depends(get_surgery_pipeline),
) -> SurgeryApiResponse:
if len(surgery_id) != 6 or not surgery_id.isdigit():
@@ -113,9 +125,10 @@ async def video_batch_surgery(
detail="video1 is empty",
)
logger.info(
"video batch request surgery_id={} flow={}",
"video batch request surgery_id={} flow={} include_visualization={}",
surgery_id,
VIDEO_BATCH_FLOW_MARKER,
include_visualization,
)
runner = VideoBatchRunner()
suffix = Path(video1.filename or "video.mp4").suffix or ".mp4"
@@ -155,13 +168,34 @@ async def video_batch_surgery(
surgery_id,
)
background_tasks.add_task(_background_finalize_visualization, runner, result, surgery_id)
visualization_url = f"/internal/demo/video-batch-surgery/{surgery_id}/visualization"
cache_input = result.output_path.parent.parent / "input" / "input.mp4"
if include_visualization:
stage_visualization_pending(
runner.root_dir,
surgery_id,
source_mp4=cache_input if cache_input.is_file() else result.input_path,
result_tsv=result.output_path,
)
background_tasks.add_task(_background_finalize_visualization, runner, surgery_id)
purge_batch_artifacts(
runner.root_dir,
surgery_id,
digest=result.video_sha256,
candidate_key=result.candidate_cache_key,
)
purge_surgery_batch_tree(runner.root_dir, surgery_id)
visualization_url: str | None = None
if include_visualization:
visualization_url = f"/internal/demo/video-batch-surgery/{surgery_id}/visualization"
doctor = result.doctor
doctor_suffix = ""
if doctor is not None and doctor.display:
doctor_suffix = f";医生={doctor.display}"
vis_suffix = ";标注视频后台生成中(完成后刷新 visualization URL"
vis_suffix = ""
if include_visualization:
vis_suffix = ";标注视频后台生成中(完成后刷新 visualization URL24 小时内有效)"
return VideoBatchSurgeryResponse(
surgery_id=surgery_id,
status="accepted",

View File

@@ -0,0 +1,167 @@
"""Purge temporary video-batch artifacts; labeled previews live under ``vis/`` with TTL."""
from __future__ import annotations
import shutil
import time
from pathlib import Path
from loguru import logger
VISUALIZATION_FILENAME = "result_vis.mp4"
RAW_VISUALIZATION_FILENAME = "result_vis_source.mp4"
PENDING_INPUT_FILENAME = "input.mp4"
PENDING_RESULT_FILENAME = "result.tsv"
def visualization_output_path(root_dir: Path, surgery_id: str) -> Path:
return root_dir / "vis" / surgery_id / VISUALIZATION_FILENAME
def visualization_pending_dir(root_dir: Path, surgery_id: str) -> Path:
return root_dir / "vis_pending" / surgery_id
def visualization_pending_input_path(root_dir: Path, surgery_id: str) -> Path:
return visualization_pending_dir(root_dir, surgery_id) / PENDING_INPUT_FILENAME
def visualization_pending_result_path(root_dir: Path, surgery_id: str) -> Path:
return visualization_pending_dir(root_dir, surgery_id) / PENDING_RESULT_FILENAME
def stage_visualization_pending(
root_dir: Path,
surgery_id: str,
*,
source_mp4: Path,
result_tsv: Path,
) -> tuple[Path, Path]:
"""Copy pipeline input + TSV so batch cache can be purged before background vis runs."""
pending = visualization_pending_dir(root_dir, surgery_id)
pending.mkdir(parents=True, exist_ok=True)
input_path = visualization_pending_input_path(root_dir, surgery_id)
result_path = visualization_pending_result_path(root_dir, surgery_id)
shutil.copy2(source_mp4, input_path)
shutil.copy2(result_tsv, result_path)
logger.info(
"staged visualization inputs surgery_id={} mp4={} tsv={}",
surgery_id,
input_path,
result_path,
)
return input_path, result_path
def _safe_rmtree(path: Path) -> None:
if not path.exists():
return
try:
shutil.rmtree(path)
except OSError as exc:
logger.warning("failed to remove {}: {}", path, exc)
def _prune_empty_parents(path: Path, *, stop_at: Path) -> None:
current = path
stop_at = stop_at.resolve()
while current != stop_at and current.is_dir():
try:
next(current.iterdir())
except StopIteration:
parent = current.parent
_safe_rmtree(current)
current = parent
continue
except OSError:
break
break
def purge_batch_artifacts(
root_dir: Path,
surgery_id: str,
*,
digest: str,
candidate_key: str,
) -> None:
"""Remove one cache entry plus surgery upload/input copies."""
cache_entry = root_dir / "cache" / surgery_id / digest / candidate_key
_safe_rmtree(cache_entry)
_prune_empty_parents(cache_entry.parent, stop_at=root_dir / "cache")
for rel in (Path(surgery_id) / "upload", Path(surgery_id) / "input"):
_safe_rmtree(root_dir / rel)
logger.info(
"purged video batch artifacts surgery_id={} digest={} candidate_key={}",
surgery_id,
digest[:12],
candidate_key,
)
def purge_surgery_batch_tree(root_dir: Path, surgery_id: str) -> None:
"""Remove leftover ``{surgery_id}/`` tree (upload + input)."""
_safe_rmtree(root_dir / surgery_id)
def purge_visualization_pending(root_dir: Path, surgery_id: str) -> None:
pending = visualization_pending_dir(root_dir, surgery_id)
_safe_rmtree(pending)
_prune_empty_parents(pending.parent, stop_at=root_dir)
def purge_visualization_artifacts(output_dir: Path) -> None:
"""Drop intermediate encode files; keep browser ``result_vis.mp4``."""
for name in (RAW_VISUALIZATION_FILENAME, f"{Path(VISUALIZATION_FILENAME).stem}.part{Path(VISUALIZATION_FILENAME).suffix}"):
path = output_dir / name
if path.is_file():
path.unlink(missing_ok=True)
def purge_expired_visualizations(root_dir: Path, *, ttl_hours: float = 24.0) -> int:
"""Delete ``vis/{surgery_id}/result_vis.mp4`` older than *ttl_hours*."""
vis_root = root_dir / "vis"
if not vis_root.is_dir():
return 0
cutoff = time.time() - float(ttl_hours) * 3600.0
removed = 0
for mp4 in vis_root.rglob(VISUALIZATION_FILENAME):
if not mp4.is_file():
continue
try:
if mp4.stat().st_mtime >= cutoff:
continue
except OSError:
continue
mp4.unlink(missing_ok=True)
purge_visualization_artifacts(mp4.parent)
_prune_empty_parents(mp4.parent, stop_at=root_dir)
removed += 1
logger.info("purged expired visualization {}", mp4)
# Legacy layout from pre-cleanup deployments.
cache_root = root_dir / "cache"
if cache_root.is_dir():
for mp4 in cache_root.rglob(f"output/{VISUALIZATION_FILENAME}"):
if not mp4.is_file():
continue
try:
if mp4.stat().st_mtime >= cutoff:
continue
except OSError:
continue
mp4.unlink(missing_ok=True)
removed += 1
logger.info("purged expired legacy visualization {}", mp4)
if removed:
logger.info("video batch visualization TTL sweep removed {} file(s)", removed)
return removed

View File

@@ -34,6 +34,15 @@ from app.algorithm_runner.reference_bundle_runtime import (
from app.baked import pipeline as bp
from app.consumable_catalog import build_name_mapping, effective_candidate_consumables, normalize_candidate_consumables_raw
from app.domain.consumption import SurgeryConsumptionStored
from app.services.video_batch_cleanup import (
RAW_VISUALIZATION_FILENAME,
VISUALIZATION_FILENAME,
purge_visualization_artifacts,
purge_visualization_pending,
visualization_output_path,
visualization_pending_input_path,
visualization_pending_result_path,
)
@dataclass(frozen=True)
@@ -66,8 +75,6 @@ class ReferenceRunFiles:
whitelist_path: Path
VISUALIZATION_FILENAME = "result_vis.mp4"
RAW_VISUALIZATION_FILENAME = "result_vis_source.mp4"
# 标注视频最长边上限(宽 1920 ≈ 1080p绘制与转码共用避免 4K 逐帧 YOLO。
VISUALIZATION_MAX_WIDTH = 1920
@@ -658,7 +665,7 @@ def build_reference_config(
cfg["io"]["out"] = str(output_path.resolve())
cfg["io"]["whitelist_json"] = str(whitelist_path.resolve())
cfg["runtime"]["work_dir"] = str(work_dir.resolve())
cfg["runtime"]["keep_work_dir"] = True
cfg["runtime"]["keep_work_dir"] = False
return cfg
@@ -795,32 +802,44 @@ class VideoBatchRunner:
logger.error("reference visualization produced unreadable mp4: {}", raw_video_path)
return None
if _transcode_visualization_for_browser(raw_video_path, output_video_path):
purge_visualization_artifacts(output_video_path.parent)
return output_video_path
logger.error("reference visualization transcode to browser mp4 failed: {}", output_video_path)
return None
def finalize_visualization(self, result: VideoBatchRunResult, *, surgery_id: str) -> Path | None:
"""Run hand-overlay visualization after batch text result is already persisted."""
def finalize_visualization(
self,
*,
surgery_id: str,
video_path: Path | None = None,
result_path: Path | None = None,
) -> Path | None:
"""Run hand-overlay visualization using staged ``vis_pending/`` inputs."""
logger.info(
"video batch visualization starting for surgery_id={} (visualize_result_video.py)",
surgery_id,
)
if not is_reference_result_complete(result.output_path):
logger.warning("skip visualization: incomplete result {}", result.output_path)
video_path = (video_path or visualization_pending_input_path(self._root_dir, surgery_id)).resolve()
result_path = (result_path or visualization_pending_result_path(self._root_dir, surgery_id)).resolve()
output_video_path = visualization_output_path(self._root_dir, surgery_id)
if not is_reference_result_complete(result_path):
logger.warning("skip visualization: incomplete result {}", result_path)
purge_visualization_pending(self._root_dir, surgery_id)
return None
if not video_path.is_file():
logger.warning("skip visualization: missing staged video {}", video_path)
purge_visualization_pending(self._root_dir, surgery_id)
return None
bundle_dir = resolve_reference_bundle_dir(self._bundle_dir_override)
cache_input = result.output_path.parent.parent / "input"
video_path = next((p for p in sorted(cache_input.glob("*")) if p.is_file()), None)
if video_path is None:
logger.warning("skip visualization: missing cache input video under {}", cache_input)
return None
output_video_path.parent.mkdir(parents=True, exist_ok=True)
vis_path = self._generate_visualization(
bundle_dir=bundle_dir,
video_path=video_path,
result_path=result.output_path,
output_video_path=result.output_path.with_name(VISUALIZATION_FILENAME),
result_path=result_path,
output_video_path=output_video_path,
)
purge_visualization_pending(self._root_dir, surgery_id)
if vis_path is not None:
logger.info(
"video batch visualization complete for surgery_id={} ({})",
@@ -834,19 +853,10 @@ class VideoBatchRunner:
def latest_visualization_path(self, surgery_id: str) -> Path | None:
"""Return an already-generated browser mp4; does not run visualization."""
surgery_cache_dir = self._root_dir / "cache" / surgery_id
if not surgery_cache_dir.is_dir():
return None
candidates = [
p
for p in surgery_cache_dir.rglob(f"output/{VISUALIZATION_FILENAME}")
if p.is_file() and p.stat().st_size > 0 and _is_browser_compatible_mp4(p)
]
if not candidates:
return None
candidates.sort(key=lambda p: p.stat().st_mtime, reverse=True)
return candidates[0]
path = visualization_output_path(self._root_dir, surgery_id)
if path.is_file() and path.stat().st_size > 0 and _is_browser_compatible_mp4(path):
return path
return None
def run(
self,
@@ -955,23 +965,4 @@ class VideoBatchRunner:
doctor=doctor,
visualization_path=None,
)
if not include_visualization:
return batch_result
vis_path = self.finalize_visualization(batch_result, surgery_id=surgery_id)
if vis_path is None:
logger.warning(
"video batch visualization missing for surgery_id={} after complete result",
surgery_id,
)
return batch_result
return VideoBatchRunResult(
video_sha256=batch_result.video_sha256,
candidate_cache_key=batch_result.candidate_cache_key,
input_path=batch_result.input_path,
work_dir=batch_result.work_dir,
output_path=batch_result.output_path,
details=batch_result.details,
reused_cache=batch_result.reused_cache,
doctor=batch_result.doctor,
visualization_path=vis_path,
)
return batch_result

View File

@@ -49,6 +49,15 @@ async def lifespan(app: FastAPI):
logger.info(
"Database connection verified; ensure schema is applied with `alembic upgrade head` before serving traffic"
)
from app.baked import pipeline as bp
from app.services.video_batch_cleanup import purge_expired_visualizations
from app.services.video_batch_runner import VideoBatchRunner
repo_root = Path(__file__).resolve().parent
purge_expired_visualizations(
VideoBatchRunner(root_dir=repo_root / "logs" / "video_batch").root_dir,
ttl_hours=float(bp.VIDEO_BATCH_VIS_TTL_HOURS),
)
container = build_container(settings)
app.state.container = container
await container.start()

View File

@@ -0,0 +1,240 @@
"""FastAPI → 算法子进程调用链单元测试。
覆盖两条生产路径:
1. ``POST /internal/demo/video-batch-surgery`` → ``VideoBatchRunner`` → ``subprocess.run``reference bundle ``main.py``
2. ``POST /client/surgeries/start`` → ``CameraSessionManager`` → ``asyncio.create_subprocess_exec````python -m app.algorithm_runner``
"""
from __future__ import annotations
import json
import sys
from pathlib import Path
from typing import Any
from unittest.mock import MagicMock
import pytest
import yaml
from fastapi import FastAPI
from fastapi.testclient import TestClient
from httpx import ASGITransport, AsyncClient
from app.api import router as api_router
from app.config import Settings
from app.dependencies import build_container, get_surgery_pipeline, get_voice_terminal_hub
from app.routers import demo_orch
from app.services.video.session_manager import CameraSessionManager
from app.services.video_batch_runner import VideoBatchRunner, build_reference_command
from tests.test_video_batch_runner import _complete_result_tsv_body, _write_minimal_reference_bundle
def _fake_reference_subprocess_run(captured: list[dict[str, Any]]):
class _Proc:
returncode = 0
stdout = ""
stderr = ""
def _run(cmd: list[str], **kwargs: Any) -> _Proc:
config_path = Path(cmd[cmd.index("--config") + 1])
config = yaml.safe_load(config_path.read_text(encoding="utf-8"))
captured.append(
{
"cmd": list(cmd),
"kwargs": dict(kwargs),
"config": config,
}
)
output = Path(config["io"]["out"])
output.parent.mkdir(parents=True, exist_ok=True)
output.write_text(_complete_result_tsv_body(), encoding="utf-8")
return _Proc()
return _run
@pytest.fixture
def reference_bundle(tmp_path: Path) -> Path:
bundle = tmp_path / "algorithm_subprocesses" / "5.15"
_write_minimal_reference_bundle(bundle)
return bundle
def test_video_batch_endpoint_invokes_reference_bundle_subprocess(
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
reference_bundle: Path,
sqlite_session_factory,
) -> None:
monkeypatch.setattr(demo_orch.settings, "demo_orchestrator_enabled", True)
monkeypatch.setattr(
"app.services.video_batch_runner.resolve_reference_bundle_dir",
lambda _override=None: reference_bundle.resolve(),
)
monkeypatch.setattr(
"app.services.video_batch_runner.ensure_reference_actionformer_nms_patch",
lambda _bundle: True,
)
monkeypatch.setattr(
demo_orch,
"VideoBatchRunner",
lambda: VideoBatchRunner(
bundle_dir=reference_bundle,
root_dir=tmp_path / "video_batch",
),
)
captured: list[dict[str, Any]] = []
monkeypatch.setattr(
"app.services.video_batch_runner.subprocess.run",
_fake_reference_subprocess_run(captured),
)
container = build_container(demo_orch.settings, session_factory=sqlite_session_factory)
app = FastAPI()
app.include_router(demo_orch.router)
app.dependency_overrides[get_surgery_pipeline] = lambda: container.surgery_pipeline
client = TestClient(app)
res = client.post(
"/internal/demo/video-batch-surgery",
data={
"surgery_id": "100001",
"candidate_consumables_json": '["耗材1"]',
"include_visualization": "false",
},
files={"video1": ("case.mp4", b"fake-mp4-bytes", "video/mp4")},
)
assert res.status_code == 200, res.text
assert len(captured) == 1, "expected exactly one reference bundle subprocess invocation"
call = captured[0]
cmd: list[str] = call["cmd"]
kwargs: dict[str, Any] = call["kwargs"]
expected = build_reference_command(
bundle_dir=reference_bundle,
config_path=Path(cmd[cmd.index("--config") + 1]),
)
assert cmd == expected
assert kwargs.get("cwd") == str(reference_bundle.resolve())
assert kwargs.get("env", {}).get("PYTHONFAULTHANDLER") == "1"
config = call["config"]
assert Path(config["io"]["video"]).name == "input.mp4"
assert str(config["io"]["excel"]).endswith("商品信息表.xlsx")
assert str(config["io"]["whitelist_json"]).endswith("whitelist.json")
assert config["runtime"]["keep_work_dir"] is False
@pytest.mark.asyncio
async def test_start_surgery_endpoint_spawns_algorithm_runner_subprocess(
monkeypatch: pytest.MonkeyPatch,
sqlite_session_factory,
tmp_path: Path,
) -> None:
async def _check_db_ok() -> None:
return None
monkeypatch.setattr("app.api.check_database", _check_db_ok)
monkeypatch.setattr(
"app.services.video.session_manager.LOGS_DIR",
tmp_path / "logs",
)
settings = Settings(
video_rtsp_url_template="rtsp://lab/{camera_id}/live",
video_open_timeout_sec=5.0,
)
container = build_container(settings, session_factory=sqlite_session_factory)
async def _fake_resolve_rtsp(
self,
*,
camera_id: str,
kind: Any,
) -> tuple[str, int | None, bool]:
return f"rtsp://unittest/{camera_id}/live", None, False
monkeypatch.setattr(CameraSessionManager, "_resolve_rtsp_url", _fake_resolve_rtsp)
captured_cmds: list[list[str]] = []
async def fake_create_subprocess_exec(*cmd: str, **kwargs: Any):
captured_cmds.append(list(cmd))
events_idx = list(cmd).index("--events-jsonl") + 1
events_path = Path(cmd[events_idx])
events_path.parent.mkdir(parents=True, exist_ok=True)
events_path.write_text(
json.dumps({"type": "ready", "camera_id": "cam1"}) + "\n",
encoding="utf-8",
)
proc = MagicMock()
proc.returncode = 0
proc.terminate = MagicMock()
proc.kill = MagicMock()
async def _wait() -> int:
return 0
proc.wait = _wait
return proc
monkeypatch.setattr(
"app.services.video.session_manager.asyncio.create_subprocess_exec",
fake_create_subprocess_exec,
)
async def _noop_tail(self, *args: Any, **kwargs: Any) -> None:
return None
monkeypatch.setattr(CameraSessionManager, "_tail_algo_events", _noop_tail)
async def _instant_sleep(_delay: float) -> None:
return None
monkeypatch.setattr("app.services.video.session_manager.asyncio.sleep", _instant_sleep)
monkeypatch.setattr("app.api.asyncio.sleep", _instant_sleep)
async def _noop_voice_assign(*args: Any, **kwargs: Any) -> None:
return None
monkeypatch.setattr("app.api.assign_voice_terminal_after_recording_started", _noop_voice_assign)
app = FastAPI()
app.include_router(api_router)
app.dependency_overrides[get_surgery_pipeline] = lambda: container.surgery_pipeline
app.dependency_overrides[get_voice_terminal_hub] = lambda: container.voice_terminal_hub
surgery_id = "123456"
async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
res = await client.post(
"/client/surgeries/start",
json={
"surgery_id": surgery_id,
"camera_ids": ["cam1"],
"candidate_consumables": ["纱布"],
},
)
assert res.status_code == 200, res.text
assert res.json()["status"] == "accepted"
end = await client.post("/client/surgeries/end", json={"surgery_id": surgery_id})
assert end.status_code == 200, end.text
assert len(captured_cmds) == 1, "expected one algorithm_runner subprocess spawn"
cmd = captured_cmds[0]
assert cmd[0] == sys.executable
assert cmd[1:3] == ["-m", "app.algorithm_runner"]
assert cmd[cmd.index("--source") + 1] == "rtsp://unittest/or-cam-03/live"
assert cmd[cmd.index("--source-mode") + 1] == "realtime"
assert cmd[cmd.index("--surgery-id") + 1] == surgery_id
assert cmd[cmd.index("--camera-id") + 1] == "or-cam-03"
whitelist_path = Path(cmd[cmd.index("--whitelist-json") + 1])
assert whitelist_path.is_file()
whitelist = json.loads(whitelist_path.read_text(encoding="utf-8"))
assert whitelist["candidate_consumables"] == ["纱布"]
events_path = Path(cmd[cmd.index("--events-jsonl") + 1])
assert events_path.is_file()
assert '"type": "ready"' in events_path.read_text(encoding="utf-8")

View File

@@ -0,0 +1,101 @@
from __future__ import annotations
import time
from pathlib import Path
from app.services.video_batch_cleanup import (
VISUALIZATION_FILENAME,
purge_batch_artifacts,
purge_expired_visualizations,
purge_surgery_batch_tree,
purge_visualization_artifacts,
stage_visualization_pending,
visualization_output_path,
visualization_pending_input_path,
visualization_pending_result_path,
)
def test_stage_visualization_pending_copies_mp4_and_tsv(tmp_path: Path) -> None:
root = tmp_path / "video_batch"
source = tmp_path / "source.mp4"
tsv = tmp_path / "result.tsv"
source.write_bytes(b"mp4-bytes")
tsv.write_text("rank\tx\n1\ta\n", encoding="utf-8")
stage_visualization_pending(root, "100001", source_mp4=source, result_tsv=tsv)
assert visualization_pending_input_path(root, "100001").read_bytes() == b"mp4-bytes"
assert visualization_pending_result_path(root, "100001").read_text(encoding="utf-8") == tsv.read_text(
encoding="utf-8"
)
def test_purge_batch_artifacts_removes_cache_and_uploads(tmp_path: Path) -> None:
root = tmp_path / "video_batch"
surgery_id = "100001"
digest = "d" * 64
candidate_key = "c1"
cache_entry = root / "cache" / surgery_id / digest / candidate_key
(cache_entry / "input").mkdir(parents=True)
(cache_entry / "input" / "input.mp4").write_bytes(b"x" * 100)
(cache_entry / "output").mkdir(parents=True)
(cache_entry / "output" / "result.tsv").write_text("ok\n", encoding="utf-8")
(cache_entry / "work").mkdir(parents=True)
(cache_entry / "work" / "features").mkdir(parents=True)
(cache_entry / "work" / "features" / "input.npy").write_bytes(b"npy")
(root / surgery_id / "upload").mkdir(parents=True)
(root / surgery_id / "upload" / "upload.mp4").write_bytes(b"upload")
(root / surgery_id / "input").mkdir(parents=True)
(root / surgery_id / "input" / "abc.mp4").write_bytes(b"input")
purge_batch_artifacts(root, surgery_id, digest=digest, candidate_key=candidate_key)
purge_surgery_batch_tree(root, surgery_id)
assert not cache_entry.exists()
assert not (root / surgery_id).exists()
def test_purge_visualization_artifacts_keeps_browser_mp4(tmp_path: Path) -> None:
out_dir = tmp_path / "vis" / "100001"
out_dir.mkdir(parents=True)
browser = out_dir / VISUALIZATION_FILENAME
raw = out_dir / "result_vis_source.mp4"
part = out_dir / "result_vis.part.mp4"
browser.write_bytes(b"browser")
raw.write_bytes(b"raw")
part.write_bytes(b"part")
purge_visualization_artifacts(out_dir)
assert browser.is_file()
assert not raw.exists()
assert not part.exists()
def test_purge_expired_visualizations_removes_old_vis_mp4(tmp_path: Path) -> None:
root = tmp_path / "video_batch"
vis_path = visualization_output_path(root, "100001")
vis_path.parent.mkdir(parents=True)
vis_path.write_bytes(b"vis")
old = time.time() - (25 * 3600)
import os
os.utime(vis_path, (old, old))
removed = purge_expired_visualizations(root, ttl_hours=24.0)
assert removed == 1
assert not vis_path.exists()
def test_purge_expired_visualizations_keeps_recent_vis_mp4(tmp_path: Path) -> None:
root = tmp_path / "video_batch"
vis_path = visualization_output_path(root, "100002")
vis_path.parent.mkdir(parents=True)
vis_path.write_bytes(b"vis")
removed = purge_expired_visualizations(root, ttl_hours=24.0)
assert removed == 0
assert vis_path.is_file()

View File

@@ -27,6 +27,7 @@ from app.services.video_batch_runner import (
browser_transcode_tmp_path,
build_reference_command,
VISUALIZATION_MAX_WIDTH,
build_reference_config,
build_reference_visualization_command,
describe_batch_returncode,
doctor_id_for_consumption_rows,
@@ -37,6 +38,7 @@ from app.services.video_batch_runner import (
parse_reference_doctor_info,
parse_reference_tsv,
)
from app.services.video_batch_cleanup import VISUALIZATION_FILENAME, visualization_output_path
def _complete_result_tsv_body() -> str:
@@ -73,6 +75,31 @@ def _write_minimal_reference_bundle(bundle: Path) -> None:
)
def test_build_reference_config_does_not_keep_work_dir(tmp_path: Path) -> None:
bundle = tmp_path / "bundle"
_write_minimal_reference_bundle(bundle)
cfg = build_reference_config(
bundle_dir=bundle,
video_path=tmp_path / "input.mp4",
output_path=tmp_path / "out.tsv",
work_dir=tmp_path / "work",
excel_path=tmp_path / "catalog.xlsx",
whitelist_path=tmp_path / "whitelist.json",
)
assert cfg["runtime"]["keep_work_dir"] is False
def test_latest_visualization_path_uses_vis_directory(tmp_path: Path) -> None:
root = tmp_path / "batch"
runner = VideoBatchRunner(root_dir=root)
assert runner.latest_visualization_path("100001") is None
vis_path = visualization_output_path(root, "100001")
vis_path.parent.mkdir(parents=True)
vis_path.write_bytes(b"not-really-mp4")
assert runner.latest_visualization_path("100001") is None
def test_is_reference_result_complete_requires_footer_and_rows(tmp_path: Path) -> None:
complete = tmp_path / "complete.tsv"
complete.write_text(_complete_result_tsv_body(), encoding="utf-8")
@@ -541,29 +568,37 @@ def test_demo_video_batch_endpoint_writes_queryable_result(
timestamp=datetime(2026, 5, 8, tzinfo=timezone.utc),
source="video_batch",
)
root_dir = tmp_path / "video_batch"
vis_calls: list[str] = []
class _FakeRunner:
root_dir = tmp_path / "video_batch"
def __init__(self) -> None:
self.root_dir = root_dir
def run(self, **kwargs: Any) -> VideoBatchRunResult:
assert kwargs["surgery_id"] == "100001"
assert kwargs["uploaded_video_path"].is_file()
assert kwargs["candidate_consumables"] == ["耗材1"]
assert kwargs.get("include_visualization") is False
cache_dir = root_dir / "cache" / "100001" / ("a" * 64) / "c1"
cache_input = cache_dir / "input" / "input.mp4"
cache_input.parent.mkdir(parents=True)
cache_input.write_bytes(b"pipeline-input")
output_path = cache_dir / "output" / "result.tsv"
output_path.parent.mkdir(parents=True)
output_path.write_text(_complete_result_tsv_body(), encoding="utf-8")
return VideoBatchRunResult(
video_sha256="abc",
video_sha256="a" * 64,
candidate_cache_key="c1",
input_path=tmp_path / "saved.mp4",
work_dir=tmp_path / "work",
output_path=tmp_path / "result.tsv",
input_path=root_dir / "100001" / "input" / "saved.mp4",
work_dir=cache_dir / "work",
output_path=output_path,
details=[detail],
reused_cache=False,
)
def finalize_visualization(self, result: VideoBatchRunResult, *, surgery_id: str) -> None:
assert surgery_id == "100001"
assert result.details[0].item_name == "耗材1"
return None
def finalize_visualization(self, *, surgery_id: str) -> None:
vis_calls.append(surgery_id)
class _FakePipeline:
def __init__(self) -> None:
@@ -609,10 +644,91 @@ def test_demo_video_batch_endpoint_writes_queryable_result(
files={"video1": ("case.mp4", b"video-bytes", "video/mp4")},
)
assert res.status_code == 200, res.text
assert res.json()["status"] == "accepted"
body = res.json()
assert body["status"] == "accepted"
assert body["visualization_url"] is None
assert vis_calls == []
assert not (root_dir / "cache" / "100001").exists()
assert not (root_dir / "100001").exists()
got = client.get("/client/surgeries/100001/result")
assert got.status_code == 200, got.text
body = got.json()
assert body["details"][0]["item_id"] == "P1"
assert body["summary"][0]["total_quantity"] == 1
result_body = got.json()
assert result_body["details"][0]["item_id"] == "P1"
assert result_body["summary"][0]["total_quantity"] == 1
def test_demo_video_batch_endpoint_stages_vis_and_purges_cache_when_requested(
tmp_path: Path,
monkeypatch,
) -> None:
monkeypatch.setattr(demo_orch.settings, "demo_orchestrator_enabled", True)
detail = SurgeryConsumptionStored(
item_id="P1",
item_name="耗材1",
qty=1,
doctor_id="vision",
timestamp=datetime(2026, 5, 8, tzinfo=timezone.utc),
source="video_batch",
)
root_dir = tmp_path / "video_batch"
vis_calls: list[str] = []
class _FakeRunner:
def __init__(self) -> None:
self.root_dir = root_dir
def run(self, **kwargs: Any) -> VideoBatchRunResult:
cache_dir = root_dir / "cache" / "100001" / ("b" * 64) / "c1"
cache_input = cache_dir / "input" / "input.mp4"
cache_input.parent.mkdir(parents=True)
cache_input.write_bytes(b"pipeline-input")
output_path = cache_dir / "output" / "result.tsv"
output_path.parent.mkdir(parents=True)
output_path.write_text(_complete_result_tsv_body(), encoding="utf-8")
return VideoBatchRunResult(
video_sha256="b" * 64,
candidate_cache_key="c1",
input_path=root_dir / "100001" / "input" / "saved.mp4",
work_dir=cache_dir / "work",
output_path=output_path,
details=[detail],
reused_cache=False,
)
def finalize_visualization(self, *, surgery_id: str) -> None:
vis_calls.append(surgery_id)
class _FakePipeline:
async def save_video_batch_result(
self,
surgery_id: str,
details: list[SurgeryConsumptionStored],
) -> None:
return None
monkeypatch.setattr(demo_orch, "VideoBatchRunner", _FakeRunner)
app = FastAPI()
app.include_router(demo_orch.router)
app.dependency_overrides[get_surgery_pipeline] = lambda: _FakePipeline()
client = TestClient(app)
res = client.post(
"/internal/demo/video-batch-surgery",
data={
"surgery_id": "100001",
"candidate_consumables_json": '["耗材1"]',
"include_visualization": "true",
},
files={"video1": ("case.mp4", b"video-bytes", "video/mp4")},
)
assert res.status_code == 200, res.text
body = res.json()
assert body["visualization_url"] == "/internal/demo/video-batch-surgery/100001/visualization"
assert vis_calls == ["100001"]
assert not (root_dir / "cache" / "100001").exists()
pending_input = root_dir / "vis_pending" / "100001" / "input.mp4"
pending_tsv = root_dir / "vis_pending" / "100001" / "result.tsv"
assert pending_input.read_bytes() == b"pipeline-input"
assert "医生信息" in pending_tsv.read_text(encoding="utf-8")

2562
backend/uv.lock generated

File diff suppressed because it is too large Load Diff