"""Offline batch orchestration: spawn 5.15 main.py and optional visualization subprocess.""" from __future__ import annotations from dataclasses import dataclass from pathlib import Path from loguru import logger from app.algo_host.bundle import default_reference_bundle_dir, resolve_reference_bundle_dir from app.algo_host.job_workspace import prepare_batch_job from app.algo_host.result_adapter import ( ReferenceDoctorInfo, candidate_cache_key, is_reference_result_complete, parse_reference_doctor_info, parse_reference_tsv, resolve_reference_candidates, sha256_file, ) from app.algo_host.subprocess_runner import run_batch_main, run_visualization_script from app.algo_host.transcode import ( is_browser_compatible_mp4, is_readable_mp4, stage_batch_pipeline_input, transcode_visualization_for_browser, ) from app.domain.consumption import SurgeryConsumptionStored from app.services.video_batch_cleanup import ( RAW_VISUALIZATION_FILENAME, purge_visualization_artifacts, purge_visualization_pending, visualization_output_path, visualization_pending_input_path, visualization_pending_result_path, ) @dataclass(frozen=True) class BatchRunResult: video_sha256: str candidate_cache_key: str input_path: Path work_dir: Path output_path: Path details: list[SurgeryConsumptionStored] reused_cache: bool doctor: ReferenceDoctorInfo | None = None visualization_path: Path | None = None def default_batch_root_dir() -> Path: repo_root = Path(__file__).resolve().parents[2] return repo_root / "logs" / "video_batch" class BatchAlgorithmService: def __init__( self, *, bundle_dir: Path | None = None, root_dir: Path | None = None, ) -> None: self._bundle_dir_override = bundle_dir self._root_dir = root_dir or default_batch_root_dir() @property def bundle_dir(self) -> Path: if self._bundle_dir_override is not None: return Path(self._bundle_dir_override).expanduser().resolve() return default_reference_bundle_dir() @property def root_dir(self) -> Path: return self._root_dir def _generate_visualization( self, *, bundle_dir: Path, video_path: Path, result_path: Path, output_video_path: Path, ) -> Path | None: raw_video_path = output_video_path.with_name(RAW_VISUALIZATION_FILENAME) script_path = bundle_dir / "visualize_result_video.py" if not script_path.is_file(): logger.warning("reference visualization script not found: {}", script_path) return None if not video_path.is_file() or not result_path.is_file(): return None if output_video_path.is_file() and is_browser_compatible_mp4(output_video_path): return output_video_path if raw_video_path.is_file() and not is_readable_mp4(raw_video_path): raw_video_path.unlink(missing_ok=True) if output_video_path.is_file() and not is_browser_compatible_mp4(output_video_path): output_video_path.unlink(missing_ok=True) if raw_video_path.is_file() and is_readable_mp4(raw_video_path): logger.info( "reusing existing visualization source for transcode: {}", raw_video_path, ) if transcode_visualization_for_browser(raw_video_path, output_video_path): return output_video_path logger.warning( "transcode from existing source failed; regenerating visualization: {}", raw_video_path, ) raw_video_path.unlink(missing_ok=True) try: run_visualization_script( bundle_dir=bundle_dir, video_path=video_path, result_path=result_path, raw_output_video_path=raw_video_path, ) except RuntimeError as exc: logger.error("reference visualization failed: {}", exc) return None if not is_readable_mp4(raw_video_path): logger.error("reference visualization produced unreadable mp4: {}", raw_video_path) return None if transcode_visualization_for_browser(raw_video_path, output_video_path): purge_visualization_artifacts(output_video_path.parent) return output_video_path logger.error("reference visualization transcode to browser mp4 failed: {}", output_video_path) return None def finalize_visualization( self, *, surgery_id: str, video_path: Path | None = None, result_path: Path | None = None, ) -> Path | None: logger.info( "video batch visualization starting for surgery_id={} (visualize_result_video.py)", surgery_id, ) video_path = (video_path or visualization_pending_input_path(self._root_dir, surgery_id)).resolve() result_path = (result_path or visualization_pending_result_path(self._root_dir, surgery_id)).resolve() output_video_path = visualization_output_path(self._root_dir, surgery_id) if not is_reference_result_complete(result_path): logger.warning("skip visualization: incomplete result {}", result_path) purge_visualization_pending(self._root_dir, surgery_id) return None if not video_path.is_file(): logger.warning("skip visualization: missing staged video {}", video_path) purge_visualization_pending(self._root_dir, surgery_id) return None bundle_dir = resolve_reference_bundle_dir(self._bundle_dir_override) output_video_path.parent.mkdir(parents=True, exist_ok=True) vis_path = self._generate_visualization( bundle_dir=bundle_dir, video_path=video_path, result_path=result_path, output_video_path=output_video_path, ) purge_visualization_pending(self._root_dir, surgery_id) if vis_path is not None: logger.info( "video batch visualization complete for surgery_id={} ({})", surgery_id, vis_path, ) else: logger.warning("video batch visualization failed for surgery_id={}", surgery_id) return vis_path def latest_visualization_path(self, surgery_id: str) -> Path | None: path = visualization_output_path(self._root_dir, surgery_id) if path.is_file() and path.stat().st_size > 0 and is_browser_compatible_mp4(path): return path return None def run( self, *, surgery_id: str, uploaded_video_path: Path, original_filename: str = "video.mp4", candidate_consumables: list[str] | None = None, include_visualization: bool = False, ) -> BatchRunResult: del original_filename, include_visualization bundle_dir = resolve_reference_bundle_dir(self._bundle_dir_override) uploaded_video_path = uploaded_video_path.resolve() digest = sha256_file(uploaded_video_path) candidates = resolve_reference_candidates(candidate_consumables) candidate_key = candidate_cache_key(candidates) pipeline_video = ( self._root_dir / "cache" / digest / "input" / f"pipeline{uploaded_video_path.suffix or '.mp4'}" ) stage_batch_pipeline_input( source_path=uploaded_video_path, dest_path=pipeline_video, ) cache_dir = self._root_dir / "cache" / digest / candidate_key job = prepare_batch_job( bundle_dir=self._bundle_dir_override, cache_dir=cache_dir, pipeline_video_path=pipeline_video, candidate_consumables=candidates, ) reused_cache = job.output_path.is_file() and is_reference_result_complete(job.output_path) if reused_cache: logger.info( "reference batch cache hit digest={} candidate_key={} ({})", digest[:12], candidate_key, job.output_path, ) else: logger.info( "reference batch starting for surgery_id={} (5.15/main.py, work_dir={})", surgery_id, job.work_dir, ) run_batch_main( bundle_dir=bundle_dir, config_path=job.config_path.resolve(), work_dir=job.work_dir.resolve(), output_path=job.output_path.resolve(), ) if not is_reference_result_complete(job.output_path): raise RuntimeError( f"reference bundle finished but result.tsv is incomplete: {job.output_path}" ) logger.info( "reference batch complete for surgery_id={} ({})", surgery_id, job.output_path, ) doctor = parse_reference_doctor_info(job.output_path) details = parse_reference_tsv(job.output_path, doctor=doctor) return BatchRunResult( video_sha256=digest, candidate_cache_key=candidate_key, input_path=pipeline_video, work_dir=job.work_dir, output_path=job.output_path, details=details, reused_cache=reused_cache, doctor=doctor, visualization_path=None, )