实现 video batch 自动清理与按需标注视频,并补充子进程调用测试。

batch 完成后仅保留数据库文本结果,勾选时才生成临时标注视频(24h TTL);新增 FastAPI 到 reference bundle 与 algorithm_runner 的单元测试。

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Kevin
2026-05-21 16:30:48 +08:00
parent e6434d0bb6
commit 09885b4184
10 changed files with 2047 additions and 1354 deletions

View File

@@ -0,0 +1,240 @@
"""FastAPI → 算法子进程调用链单元测试。
覆盖两条生产路径:
1. ``POST /internal/demo/video-batch-surgery`` → ``VideoBatchRunner`` → ``subprocess.run``reference bundle ``main.py``
2. ``POST /client/surgeries/start`` → ``CameraSessionManager`` → ``asyncio.create_subprocess_exec````python -m app.algorithm_runner``
"""
from __future__ import annotations
import json
import sys
from pathlib import Path
from typing import Any
from unittest.mock import MagicMock
import pytest
import yaml
from fastapi import FastAPI
from fastapi.testclient import TestClient
from httpx import ASGITransport, AsyncClient
from app.api import router as api_router
from app.config import Settings
from app.dependencies import build_container, get_surgery_pipeline, get_voice_terminal_hub
from app.routers import demo_orch
from app.services.video.session_manager import CameraSessionManager
from app.services.video_batch_runner import VideoBatchRunner, build_reference_command
from tests.test_video_batch_runner import _complete_result_tsv_body, _write_minimal_reference_bundle
def _fake_reference_subprocess_run(captured: list[dict[str, Any]]):
class _Proc:
returncode = 0
stdout = ""
stderr = ""
def _run(cmd: list[str], **kwargs: Any) -> _Proc:
config_path = Path(cmd[cmd.index("--config") + 1])
config = yaml.safe_load(config_path.read_text(encoding="utf-8"))
captured.append(
{
"cmd": list(cmd),
"kwargs": dict(kwargs),
"config": config,
}
)
output = Path(config["io"]["out"])
output.parent.mkdir(parents=True, exist_ok=True)
output.write_text(_complete_result_tsv_body(), encoding="utf-8")
return _Proc()
return _run
@pytest.fixture
def reference_bundle(tmp_path: Path) -> Path:
bundle = tmp_path / "algorithm_subprocesses" / "5.15"
_write_minimal_reference_bundle(bundle)
return bundle
def test_video_batch_endpoint_invokes_reference_bundle_subprocess(
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
reference_bundle: Path,
sqlite_session_factory,
) -> None:
monkeypatch.setattr(demo_orch.settings, "demo_orchestrator_enabled", True)
monkeypatch.setattr(
"app.services.video_batch_runner.resolve_reference_bundle_dir",
lambda _override=None: reference_bundle.resolve(),
)
monkeypatch.setattr(
"app.services.video_batch_runner.ensure_reference_actionformer_nms_patch",
lambda _bundle: True,
)
monkeypatch.setattr(
demo_orch,
"VideoBatchRunner",
lambda: VideoBatchRunner(
bundle_dir=reference_bundle,
root_dir=tmp_path / "video_batch",
),
)
captured: list[dict[str, Any]] = []
monkeypatch.setattr(
"app.services.video_batch_runner.subprocess.run",
_fake_reference_subprocess_run(captured),
)
container = build_container(demo_orch.settings, session_factory=sqlite_session_factory)
app = FastAPI()
app.include_router(demo_orch.router)
app.dependency_overrides[get_surgery_pipeline] = lambda: container.surgery_pipeline
client = TestClient(app)
res = client.post(
"/internal/demo/video-batch-surgery",
data={
"surgery_id": "100001",
"candidate_consumables_json": '["耗材1"]',
"include_visualization": "false",
},
files={"video1": ("case.mp4", b"fake-mp4-bytes", "video/mp4")},
)
assert res.status_code == 200, res.text
assert len(captured) == 1, "expected exactly one reference bundle subprocess invocation"
call = captured[0]
cmd: list[str] = call["cmd"]
kwargs: dict[str, Any] = call["kwargs"]
expected = build_reference_command(
bundle_dir=reference_bundle,
config_path=Path(cmd[cmd.index("--config") + 1]),
)
assert cmd == expected
assert kwargs.get("cwd") == str(reference_bundle.resolve())
assert kwargs.get("env", {}).get("PYTHONFAULTHANDLER") == "1"
config = call["config"]
assert Path(config["io"]["video"]).name == "input.mp4"
assert str(config["io"]["excel"]).endswith("商品信息表.xlsx")
assert str(config["io"]["whitelist_json"]).endswith("whitelist.json")
assert config["runtime"]["keep_work_dir"] is False
@pytest.mark.asyncio
async def test_start_surgery_endpoint_spawns_algorithm_runner_subprocess(
monkeypatch: pytest.MonkeyPatch,
sqlite_session_factory,
tmp_path: Path,
) -> None:
async def _check_db_ok() -> None:
return None
monkeypatch.setattr("app.api.check_database", _check_db_ok)
monkeypatch.setattr(
"app.services.video.session_manager.LOGS_DIR",
tmp_path / "logs",
)
settings = Settings(
video_rtsp_url_template="rtsp://lab/{camera_id}/live",
video_open_timeout_sec=5.0,
)
container = build_container(settings, session_factory=sqlite_session_factory)
async def _fake_resolve_rtsp(
self,
*,
camera_id: str,
kind: Any,
) -> tuple[str, int | None, bool]:
return f"rtsp://unittest/{camera_id}/live", None, False
monkeypatch.setattr(CameraSessionManager, "_resolve_rtsp_url", _fake_resolve_rtsp)
captured_cmds: list[list[str]] = []
async def fake_create_subprocess_exec(*cmd: str, **kwargs: Any):
captured_cmds.append(list(cmd))
events_idx = list(cmd).index("--events-jsonl") + 1
events_path = Path(cmd[events_idx])
events_path.parent.mkdir(parents=True, exist_ok=True)
events_path.write_text(
json.dumps({"type": "ready", "camera_id": "cam1"}) + "\n",
encoding="utf-8",
)
proc = MagicMock()
proc.returncode = 0
proc.terminate = MagicMock()
proc.kill = MagicMock()
async def _wait() -> int:
return 0
proc.wait = _wait
return proc
monkeypatch.setattr(
"app.services.video.session_manager.asyncio.create_subprocess_exec",
fake_create_subprocess_exec,
)
async def _noop_tail(self, *args: Any, **kwargs: Any) -> None:
return None
monkeypatch.setattr(CameraSessionManager, "_tail_algo_events", _noop_tail)
async def _instant_sleep(_delay: float) -> None:
return None
monkeypatch.setattr("app.services.video.session_manager.asyncio.sleep", _instant_sleep)
monkeypatch.setattr("app.api.asyncio.sleep", _instant_sleep)
async def _noop_voice_assign(*args: Any, **kwargs: Any) -> None:
return None
monkeypatch.setattr("app.api.assign_voice_terminal_after_recording_started", _noop_voice_assign)
app = FastAPI()
app.include_router(api_router)
app.dependency_overrides[get_surgery_pipeline] = lambda: container.surgery_pipeline
app.dependency_overrides[get_voice_terminal_hub] = lambda: container.voice_terminal_hub
surgery_id = "123456"
async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
res = await client.post(
"/client/surgeries/start",
json={
"surgery_id": surgery_id,
"camera_ids": ["cam1"],
"candidate_consumables": ["纱布"],
},
)
assert res.status_code == 200, res.text
assert res.json()["status"] == "accepted"
end = await client.post("/client/surgeries/end", json={"surgery_id": surgery_id})
assert end.status_code == 200, end.text
assert len(captured_cmds) == 1, "expected one algorithm_runner subprocess spawn"
cmd = captured_cmds[0]
assert cmd[0] == sys.executable
assert cmd[1:3] == ["-m", "app.algorithm_runner"]
assert cmd[cmd.index("--source") + 1] == "rtsp://unittest/or-cam-03/live"
assert cmd[cmd.index("--source-mode") + 1] == "realtime"
assert cmd[cmd.index("--surgery-id") + 1] == surgery_id
assert cmd[cmd.index("--camera-id") + 1] == "or-cam-03"
whitelist_path = Path(cmd[cmd.index("--whitelist-json") + 1])
assert whitelist_path.is_file()
whitelist = json.loads(whitelist_path.read_text(encoding="utf-8"))
assert whitelist["candidate_consumables"] == ["纱布"]
events_path = Path(cmd[cmd.index("--events-jsonl") + 1])
assert events_path.is_file()
assert '"type": "ready"' in events_path.read_text(encoding="utf-8")

View File

@@ -0,0 +1,101 @@
from __future__ import annotations
import time
from pathlib import Path
from app.services.video_batch_cleanup import (
VISUALIZATION_FILENAME,
purge_batch_artifacts,
purge_expired_visualizations,
purge_surgery_batch_tree,
purge_visualization_artifacts,
stage_visualization_pending,
visualization_output_path,
visualization_pending_input_path,
visualization_pending_result_path,
)
def test_stage_visualization_pending_copies_mp4_and_tsv(tmp_path: Path) -> None:
root = tmp_path / "video_batch"
source = tmp_path / "source.mp4"
tsv = tmp_path / "result.tsv"
source.write_bytes(b"mp4-bytes")
tsv.write_text("rank\tx\n1\ta\n", encoding="utf-8")
stage_visualization_pending(root, "100001", source_mp4=source, result_tsv=tsv)
assert visualization_pending_input_path(root, "100001").read_bytes() == b"mp4-bytes"
assert visualization_pending_result_path(root, "100001").read_text(encoding="utf-8") == tsv.read_text(
encoding="utf-8"
)
def test_purge_batch_artifacts_removes_cache_and_uploads(tmp_path: Path) -> None:
root = tmp_path / "video_batch"
surgery_id = "100001"
digest = "d" * 64
candidate_key = "c1"
cache_entry = root / "cache" / surgery_id / digest / candidate_key
(cache_entry / "input").mkdir(parents=True)
(cache_entry / "input" / "input.mp4").write_bytes(b"x" * 100)
(cache_entry / "output").mkdir(parents=True)
(cache_entry / "output" / "result.tsv").write_text("ok\n", encoding="utf-8")
(cache_entry / "work").mkdir(parents=True)
(cache_entry / "work" / "features").mkdir(parents=True)
(cache_entry / "work" / "features" / "input.npy").write_bytes(b"npy")
(root / surgery_id / "upload").mkdir(parents=True)
(root / surgery_id / "upload" / "upload.mp4").write_bytes(b"upload")
(root / surgery_id / "input").mkdir(parents=True)
(root / surgery_id / "input" / "abc.mp4").write_bytes(b"input")
purge_batch_artifacts(root, surgery_id, digest=digest, candidate_key=candidate_key)
purge_surgery_batch_tree(root, surgery_id)
assert not cache_entry.exists()
assert not (root / surgery_id).exists()
def test_purge_visualization_artifacts_keeps_browser_mp4(tmp_path: Path) -> None:
out_dir = tmp_path / "vis" / "100001"
out_dir.mkdir(parents=True)
browser = out_dir / VISUALIZATION_FILENAME
raw = out_dir / "result_vis_source.mp4"
part = out_dir / "result_vis.part.mp4"
browser.write_bytes(b"browser")
raw.write_bytes(b"raw")
part.write_bytes(b"part")
purge_visualization_artifacts(out_dir)
assert browser.is_file()
assert not raw.exists()
assert not part.exists()
def test_purge_expired_visualizations_removes_old_vis_mp4(tmp_path: Path) -> None:
root = tmp_path / "video_batch"
vis_path = visualization_output_path(root, "100001")
vis_path.parent.mkdir(parents=True)
vis_path.write_bytes(b"vis")
old = time.time() - (25 * 3600)
import os
os.utime(vis_path, (old, old))
removed = purge_expired_visualizations(root, ttl_hours=24.0)
assert removed == 1
assert not vis_path.exists()
def test_purge_expired_visualizations_keeps_recent_vis_mp4(tmp_path: Path) -> None:
root = tmp_path / "video_batch"
vis_path = visualization_output_path(root, "100002")
vis_path.parent.mkdir(parents=True)
vis_path.write_bytes(b"vis")
removed = purge_expired_visualizations(root, ttl_hours=24.0)
assert removed == 0
assert vis_path.is_file()

View File

@@ -27,6 +27,7 @@ from app.services.video_batch_runner import (
browser_transcode_tmp_path,
build_reference_command,
VISUALIZATION_MAX_WIDTH,
build_reference_config,
build_reference_visualization_command,
describe_batch_returncode,
doctor_id_for_consumption_rows,
@@ -37,6 +38,7 @@ from app.services.video_batch_runner import (
parse_reference_doctor_info,
parse_reference_tsv,
)
from app.services.video_batch_cleanup import VISUALIZATION_FILENAME, visualization_output_path
def _complete_result_tsv_body() -> str:
@@ -73,6 +75,31 @@ def _write_minimal_reference_bundle(bundle: Path) -> None:
)
def test_build_reference_config_does_not_keep_work_dir(tmp_path: Path) -> None:
bundle = tmp_path / "bundle"
_write_minimal_reference_bundle(bundle)
cfg = build_reference_config(
bundle_dir=bundle,
video_path=tmp_path / "input.mp4",
output_path=tmp_path / "out.tsv",
work_dir=tmp_path / "work",
excel_path=tmp_path / "catalog.xlsx",
whitelist_path=tmp_path / "whitelist.json",
)
assert cfg["runtime"]["keep_work_dir"] is False
def test_latest_visualization_path_uses_vis_directory(tmp_path: Path) -> None:
root = tmp_path / "batch"
runner = VideoBatchRunner(root_dir=root)
assert runner.latest_visualization_path("100001") is None
vis_path = visualization_output_path(root, "100001")
vis_path.parent.mkdir(parents=True)
vis_path.write_bytes(b"not-really-mp4")
assert runner.latest_visualization_path("100001") is None
def test_is_reference_result_complete_requires_footer_and_rows(tmp_path: Path) -> None:
complete = tmp_path / "complete.tsv"
complete.write_text(_complete_result_tsv_body(), encoding="utf-8")
@@ -541,29 +568,37 @@ def test_demo_video_batch_endpoint_writes_queryable_result(
timestamp=datetime(2026, 5, 8, tzinfo=timezone.utc),
source="video_batch",
)
root_dir = tmp_path / "video_batch"
vis_calls: list[str] = []
class _FakeRunner:
root_dir = tmp_path / "video_batch"
def __init__(self) -> None:
self.root_dir = root_dir
def run(self, **kwargs: Any) -> VideoBatchRunResult:
assert kwargs["surgery_id"] == "100001"
assert kwargs["uploaded_video_path"].is_file()
assert kwargs["candidate_consumables"] == ["耗材1"]
assert kwargs.get("include_visualization") is False
cache_dir = root_dir / "cache" / "100001" / ("a" * 64) / "c1"
cache_input = cache_dir / "input" / "input.mp4"
cache_input.parent.mkdir(parents=True)
cache_input.write_bytes(b"pipeline-input")
output_path = cache_dir / "output" / "result.tsv"
output_path.parent.mkdir(parents=True)
output_path.write_text(_complete_result_tsv_body(), encoding="utf-8")
return VideoBatchRunResult(
video_sha256="abc",
video_sha256="a" * 64,
candidate_cache_key="c1",
input_path=tmp_path / "saved.mp4",
work_dir=tmp_path / "work",
output_path=tmp_path / "result.tsv",
input_path=root_dir / "100001" / "input" / "saved.mp4",
work_dir=cache_dir / "work",
output_path=output_path,
details=[detail],
reused_cache=False,
)
def finalize_visualization(self, result: VideoBatchRunResult, *, surgery_id: str) -> None:
assert surgery_id == "100001"
assert result.details[0].item_name == "耗材1"
return None
def finalize_visualization(self, *, surgery_id: str) -> None:
vis_calls.append(surgery_id)
class _FakePipeline:
def __init__(self) -> None:
@@ -609,10 +644,91 @@ def test_demo_video_batch_endpoint_writes_queryable_result(
files={"video1": ("case.mp4", b"video-bytes", "video/mp4")},
)
assert res.status_code == 200, res.text
assert res.json()["status"] == "accepted"
body = res.json()
assert body["status"] == "accepted"
assert body["visualization_url"] is None
assert vis_calls == []
assert not (root_dir / "cache" / "100001").exists()
assert not (root_dir / "100001").exists()
got = client.get("/client/surgeries/100001/result")
assert got.status_code == 200, got.text
body = got.json()
assert body["details"][0]["item_id"] == "P1"
assert body["summary"][0]["total_quantity"] == 1
result_body = got.json()
assert result_body["details"][0]["item_id"] == "P1"
assert result_body["summary"][0]["total_quantity"] == 1
def test_demo_video_batch_endpoint_stages_vis_and_purges_cache_when_requested(
tmp_path: Path,
monkeypatch,
) -> None:
monkeypatch.setattr(demo_orch.settings, "demo_orchestrator_enabled", True)
detail = SurgeryConsumptionStored(
item_id="P1",
item_name="耗材1",
qty=1,
doctor_id="vision",
timestamp=datetime(2026, 5, 8, tzinfo=timezone.utc),
source="video_batch",
)
root_dir = tmp_path / "video_batch"
vis_calls: list[str] = []
class _FakeRunner:
def __init__(self) -> None:
self.root_dir = root_dir
def run(self, **kwargs: Any) -> VideoBatchRunResult:
cache_dir = root_dir / "cache" / "100001" / ("b" * 64) / "c1"
cache_input = cache_dir / "input" / "input.mp4"
cache_input.parent.mkdir(parents=True)
cache_input.write_bytes(b"pipeline-input")
output_path = cache_dir / "output" / "result.tsv"
output_path.parent.mkdir(parents=True)
output_path.write_text(_complete_result_tsv_body(), encoding="utf-8")
return VideoBatchRunResult(
video_sha256="b" * 64,
candidate_cache_key="c1",
input_path=root_dir / "100001" / "input" / "saved.mp4",
work_dir=cache_dir / "work",
output_path=output_path,
details=[detail],
reused_cache=False,
)
def finalize_visualization(self, *, surgery_id: str) -> None:
vis_calls.append(surgery_id)
class _FakePipeline:
async def save_video_batch_result(
self,
surgery_id: str,
details: list[SurgeryConsumptionStored],
) -> None:
return None
monkeypatch.setattr(demo_orch, "VideoBatchRunner", _FakeRunner)
app = FastAPI()
app.include_router(demo_orch.router)
app.dependency_overrides[get_surgery_pipeline] = lambda: _FakePipeline()
client = TestClient(app)
res = client.post(
"/internal/demo/video-batch-surgery",
data={
"surgery_id": "100001",
"candidate_consumables_json": '["耗材1"]',
"include_visualization": "true",
},
files={"video1": ("case.mp4", b"video-bytes", "video/mp4")},
)
assert res.status_code == 200, res.text
body = res.json()
assert body["visualization_url"] == "/internal/demo/video-batch-surgery/100001/visualization"
assert vis_calls == ["100001"]
assert not (root_dir / "cache" / "100001").exists()
pending_input = root_dir / "vis_pending" / "100001" / "input.mp4"
pending_tsv = root_dir / "vis_pending" / "100001" / "result.tsv"
assert pending_input.read_bytes() == b"pipeline-input"
assert "医生信息" in pending_tsv.read_text(encoding="utf-8")