Files
operating-room-monitor-server/backend/app/algo_host/batch_service.py

264 lines
9.7 KiB
Python
Raw Normal View History

2026-05-22 09:35:41 +08:00
"""Offline batch orchestration: spawn 5.15 main.py and optional visualization subprocess."""
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
from loguru import logger
from app.algo_host.bundle import default_reference_bundle_dir, resolve_reference_bundle_dir
from app.algo_host.job_workspace import prepare_batch_job
from app.algo_host.result_adapter import (
ReferenceDoctorInfo,
candidate_cache_key,
is_reference_result_complete,
parse_reference_doctor_info,
parse_reference_tsv,
resolve_reference_candidates,
sha256_file,
)
from app.algo_host.subprocess_runner import run_batch_main, run_visualization_script
from app.algo_host.transcode import (
is_browser_compatible_mp4,
is_readable_mp4,
publish_labeled_video_for_browser,
2026-05-22 11:15:22 +08:00
stage_batch_pipeline_input,
2026-05-22 09:35:41 +08:00
)
from app.domain.consumption import SurgeryConsumptionStored
from app.services.video_batch_cleanup import (
RAW_VISUALIZATION_FILENAME,
purge_visualization_artifacts,
purge_visualization_pending,
visualization_output_path,
visualization_pending_input_path,
visualization_pending_result_path,
)
@dataclass(frozen=True)
class BatchRunResult:
video_sha256: str
candidate_cache_key: str
input_path: Path
work_dir: Path
output_path: Path
details: list[SurgeryConsumptionStored]
reused_cache: bool
doctor: ReferenceDoctorInfo | None = None
visualization_path: Path | None = None
def default_batch_root_dir() -> Path:
repo_root = Path(__file__).resolve().parents[2]
return repo_root / "logs" / "video_batch"
class BatchAlgorithmService:
def __init__(
self,
*,
bundle_dir: Path | None = None,
root_dir: Path | None = None,
) -> None:
self._bundle_dir_override = bundle_dir
self._root_dir = root_dir or default_batch_root_dir()
@property
def bundle_dir(self) -> Path:
if self._bundle_dir_override is not None:
return Path(self._bundle_dir_override).expanduser().resolve()
return default_reference_bundle_dir()
@property
def root_dir(self) -> Path:
return self._root_dir
def _generate_visualization(
self,
*,
bundle_dir: Path,
video_path: Path,
result_path: Path,
output_video_path: Path,
) -> Path | None:
raw_video_path = output_video_path.with_name(RAW_VISUALIZATION_FILENAME)
script_path = bundle_dir / "visualize_result_video.py"
if not script_path.is_file():
logger.warning("reference visualization script not found: {}", script_path)
return None
if not video_path.is_file() or not result_path.is_file():
return None
if not is_reference_result_complete(result_path):
logger.warning("skip visualization: incomplete result {}", result_path)
return None
2026-05-22 09:35:41 +08:00
if output_video_path.is_file() and is_browser_compatible_mp4(output_video_path):
return output_video_path
if raw_video_path.is_file() and not is_readable_mp4(raw_video_path):
raw_video_path.unlink(missing_ok=True)
if output_video_path.is_file() and not is_browser_compatible_mp4(output_video_path):
output_video_path.unlink(missing_ok=True)
if raw_video_path.is_file() and is_readable_mp4(raw_video_path):
logger.info(
"reusing existing labeled visualization for browser publish: {}",
2026-05-22 09:35:41 +08:00
raw_video_path,
)
if publish_labeled_video_for_browser(
labeled_source=raw_video_path,
browser_output=output_video_path,
):
2026-05-22 09:35:41 +08:00
return output_video_path
logger.warning(
"browser publish from existing labeled source failed; regenerating visualization: {}",
2026-05-22 09:35:41 +08:00
raw_video_path,
)
raw_video_path.unlink(missing_ok=True)
try:
run_visualization_script(
bundle_dir=bundle_dir,
video_path=video_path,
result_path=result_path,
raw_output_video_path=raw_video_path,
)
except RuntimeError as exc:
logger.error("reference visualization failed: {}", exc)
return None
if not is_readable_mp4(raw_video_path):
logger.error("reference visualization produced unreadable mp4: {}", raw_video_path)
return None
if publish_labeled_video_for_browser(
labeled_source=raw_video_path,
browser_output=output_video_path,
):
2026-05-22 09:35:41 +08:00
purge_visualization_artifacts(output_video_path.parent)
return output_video_path
logger.error("labeled visualization browser publish failed: {}", output_video_path)
2026-05-22 09:35:41 +08:00
return None
def finalize_visualization(
self,
*,
surgery_id: str,
video_path: Path | None = None,
result_path: Path | None = None,
) -> Path | None:
logger.info(
"video batch visualization starting for surgery_id={} (visualize_result_video.py)",
surgery_id,
)
video_path = (video_path or visualization_pending_input_path(self._root_dir, surgery_id)).resolve()
result_path = (result_path or visualization_pending_result_path(self._root_dir, surgery_id)).resolve()
output_video_path = visualization_output_path(self._root_dir, surgery_id)
if not is_reference_result_complete(result_path):
logger.warning("skip visualization: incomplete result {}", result_path)
purge_visualization_pending(self._root_dir, surgery_id)
return None
if not video_path.is_file():
logger.warning("skip visualization: missing staged video {}", video_path)
purge_visualization_pending(self._root_dir, surgery_id)
return None
bundle_dir = resolve_reference_bundle_dir(self._bundle_dir_override)
output_video_path.parent.mkdir(parents=True, exist_ok=True)
vis_path = self._generate_visualization(
bundle_dir=bundle_dir,
video_path=video_path,
result_path=result_path,
output_video_path=output_video_path,
)
purge_visualization_pending(self._root_dir, surgery_id)
if vis_path is not None:
logger.info(
"video batch visualization complete for surgery_id={} ({})",
surgery_id,
vis_path,
)
else:
logger.warning("video batch visualization failed for surgery_id={}", surgery_id)
return vis_path
def latest_visualization_path(self, surgery_id: str) -> Path | None:
path = visualization_output_path(self._root_dir, surgery_id)
if path.is_file() and path.stat().st_size > 0 and is_browser_compatible_mp4(path):
return path
return None
def run(
self,
*,
surgery_id: str,
uploaded_video_path: Path,
original_filename: str = "video.mp4",
candidate_consumables: list[str] | None = None,
include_visualization: bool = False,
) -> BatchRunResult:
del original_filename, include_visualization
bundle_dir = resolve_reference_bundle_dir(self._bundle_dir_override)
uploaded_video_path = uploaded_video_path.resolve()
digest = sha256_file(uploaded_video_path)
candidates = resolve_reference_candidates(candidate_consumables)
candidate_key = candidate_cache_key(candidates)
2026-05-22 11:15:22 +08:00
pipeline_video = (
self._root_dir
/ "cache"
/ digest
/ "input"
/ f"pipeline{uploaded_video_path.suffix or '.mp4'}"
)
stage_batch_pipeline_input(
2026-05-22 09:35:41 +08:00
source_path=uploaded_video_path,
2026-05-22 11:15:22 +08:00
dest_path=pipeline_video,
2026-05-22 09:35:41 +08:00
)
cache_dir = self._root_dir / "cache" / digest / candidate_key
job = prepare_batch_job(
bundle_dir=self._bundle_dir_override,
cache_dir=cache_dir,
2026-05-22 11:15:22 +08:00
pipeline_video_path=pipeline_video,
2026-05-22 09:35:41 +08:00
candidate_consumables=candidates,
)
reused_cache = job.output_path.is_file() and is_reference_result_complete(job.output_path)
if reused_cache:
logger.info(
"reference batch cache hit digest={} candidate_key={} ({})",
digest[:12],
candidate_key,
job.output_path,
)
else:
logger.info(
"reference batch starting for surgery_id={} (5.15/main.py, work_dir={})",
surgery_id,
job.work_dir,
)
run_batch_main(
bundle_dir=bundle_dir,
config_path=job.config_path.resolve(),
work_dir=job.work_dir.resolve(),
output_path=job.output_path.resolve(),
)
if not is_reference_result_complete(job.output_path):
raise RuntimeError(
f"reference bundle finished but result.tsv is incomplete: {job.output_path}"
)
logger.info(
"reference batch complete for surgery_id={} ({})",
surgery_id,
job.output_path,
)
doctor = parse_reference_doctor_info(job.output_path)
details = parse_reference_tsv(job.output_path, doctor=doctor)
return BatchRunResult(
video_sha256=digest,
candidate_cache_key=candidate_key,
2026-05-22 11:15:22 +08:00
input_path=pipeline_video,
2026-05-22 09:35:41 +08:00
work_dir=job.work_dir,
output_path=job.output_path,
details=details,
reused_cache=reused_cache,
doctor=doctor,
visualization_path=None,
)