update minio port
This commit is contained in:
250
backend/app/algo_host/batch_service.py
Normal file
250
backend/app/algo_host/batch_service.py
Normal file
@@ -0,0 +1,250 @@
|
||||
"""Offline batch orchestration: spawn 5.15 main.py and optional visualization subprocess."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from app.algo_host.bundle import default_reference_bundle_dir, resolve_reference_bundle_dir
|
||||
from app.algo_host.job_workspace import prepare_batch_job
|
||||
from app.algo_host.result_adapter import (
|
||||
ReferenceDoctorInfo,
|
||||
candidate_cache_key,
|
||||
is_reference_result_complete,
|
||||
parse_reference_doctor_info,
|
||||
parse_reference_tsv,
|
||||
resolve_reference_candidates,
|
||||
sha256_file,
|
||||
)
|
||||
from app.algo_host.subprocess_runner import run_batch_main, run_visualization_script
|
||||
from app.algo_host.transcode import (
|
||||
ensure_batch_pipeline_input_video,
|
||||
is_browser_compatible_mp4,
|
||||
is_readable_mp4,
|
||||
transcode_visualization_for_browser,
|
||||
)
|
||||
from app.domain.consumption import SurgeryConsumptionStored
|
||||
from app.services.video_batch_cleanup import (
|
||||
RAW_VISUALIZATION_FILENAME,
|
||||
purge_visualization_artifacts,
|
||||
purge_visualization_pending,
|
||||
visualization_output_path,
|
||||
visualization_pending_input_path,
|
||||
visualization_pending_result_path,
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class BatchRunResult:
|
||||
video_sha256: str
|
||||
candidate_cache_key: str
|
||||
input_path: Path
|
||||
work_dir: Path
|
||||
output_path: Path
|
||||
details: list[SurgeryConsumptionStored]
|
||||
reused_cache: bool
|
||||
doctor: ReferenceDoctorInfo | None = None
|
||||
visualization_path: Path | None = None
|
||||
|
||||
|
||||
def default_batch_root_dir() -> Path:
|
||||
repo_root = Path(__file__).resolve().parents[2]
|
||||
return repo_root / "logs" / "video_batch"
|
||||
|
||||
|
||||
class BatchAlgorithmService:
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
bundle_dir: Path | None = None,
|
||||
root_dir: Path | None = None,
|
||||
) -> None:
|
||||
self._bundle_dir_override = bundle_dir
|
||||
self._root_dir = root_dir or default_batch_root_dir()
|
||||
|
||||
@property
|
||||
def bundle_dir(self) -> Path:
|
||||
if self._bundle_dir_override is not None:
|
||||
return Path(self._bundle_dir_override).expanduser().resolve()
|
||||
return default_reference_bundle_dir()
|
||||
|
||||
@property
|
||||
def root_dir(self) -> Path:
|
||||
return self._root_dir
|
||||
|
||||
def _generate_visualization(
|
||||
self,
|
||||
*,
|
||||
bundle_dir: Path,
|
||||
video_path: Path,
|
||||
result_path: Path,
|
||||
output_video_path: Path,
|
||||
) -> Path | None:
|
||||
raw_video_path = output_video_path.with_name(RAW_VISUALIZATION_FILENAME)
|
||||
script_path = bundle_dir / "visualize_result_video.py"
|
||||
if not script_path.is_file():
|
||||
logger.warning("reference visualization script not found: {}", script_path)
|
||||
return None
|
||||
if not video_path.is_file() or not result_path.is_file():
|
||||
return None
|
||||
if output_video_path.is_file() and is_browser_compatible_mp4(output_video_path):
|
||||
return output_video_path
|
||||
if raw_video_path.is_file() and not is_readable_mp4(raw_video_path):
|
||||
raw_video_path.unlink(missing_ok=True)
|
||||
if output_video_path.is_file() and not is_browser_compatible_mp4(output_video_path):
|
||||
output_video_path.unlink(missing_ok=True)
|
||||
if raw_video_path.is_file() and is_readable_mp4(raw_video_path):
|
||||
logger.info(
|
||||
"reusing existing visualization source for transcode: {}",
|
||||
raw_video_path,
|
||||
)
|
||||
if transcode_visualization_for_browser(raw_video_path, output_video_path):
|
||||
return output_video_path
|
||||
logger.warning(
|
||||
"transcode from existing source failed; regenerating visualization: {}",
|
||||
raw_video_path,
|
||||
)
|
||||
raw_video_path.unlink(missing_ok=True)
|
||||
try:
|
||||
run_visualization_script(
|
||||
bundle_dir=bundle_dir,
|
||||
video_path=video_path,
|
||||
result_path=result_path,
|
||||
raw_output_video_path=raw_video_path,
|
||||
)
|
||||
except RuntimeError as exc:
|
||||
logger.error("reference visualization failed: {}", exc)
|
||||
return None
|
||||
if not is_readable_mp4(raw_video_path):
|
||||
logger.error("reference visualization produced unreadable mp4: {}", raw_video_path)
|
||||
return None
|
||||
if transcode_visualization_for_browser(raw_video_path, output_video_path):
|
||||
purge_visualization_artifacts(output_video_path.parent)
|
||||
return output_video_path
|
||||
logger.error("reference visualization transcode to browser mp4 failed: {}", output_video_path)
|
||||
return None
|
||||
|
||||
def finalize_visualization(
|
||||
self,
|
||||
*,
|
||||
surgery_id: str,
|
||||
video_path: Path | None = None,
|
||||
result_path: Path | None = None,
|
||||
) -> Path | None:
|
||||
logger.info(
|
||||
"video batch visualization starting for surgery_id={} (visualize_result_video.py)",
|
||||
surgery_id,
|
||||
)
|
||||
video_path = (video_path or visualization_pending_input_path(self._root_dir, surgery_id)).resolve()
|
||||
result_path = (result_path or visualization_pending_result_path(self._root_dir, surgery_id)).resolve()
|
||||
output_video_path = visualization_output_path(self._root_dir, surgery_id)
|
||||
if not is_reference_result_complete(result_path):
|
||||
logger.warning("skip visualization: incomplete result {}", result_path)
|
||||
purge_visualization_pending(self._root_dir, surgery_id)
|
||||
return None
|
||||
if not video_path.is_file():
|
||||
logger.warning("skip visualization: missing staged video {}", video_path)
|
||||
purge_visualization_pending(self._root_dir, surgery_id)
|
||||
return None
|
||||
bundle_dir = resolve_reference_bundle_dir(self._bundle_dir_override)
|
||||
output_video_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
vis_path = self._generate_visualization(
|
||||
bundle_dir=bundle_dir,
|
||||
video_path=video_path,
|
||||
result_path=result_path,
|
||||
output_video_path=output_video_path,
|
||||
)
|
||||
purge_visualization_pending(self._root_dir, surgery_id)
|
||||
if vis_path is not None:
|
||||
logger.info(
|
||||
"video batch visualization complete for surgery_id={} ({})",
|
||||
surgery_id,
|
||||
vis_path,
|
||||
)
|
||||
else:
|
||||
logger.warning("video batch visualization failed for surgery_id={}", surgery_id)
|
||||
return vis_path
|
||||
|
||||
def latest_visualization_path(self, surgery_id: str) -> Path | None:
|
||||
path = visualization_output_path(self._root_dir, surgery_id)
|
||||
if path.is_file() and path.stat().st_size > 0 and is_browser_compatible_mp4(path):
|
||||
return path
|
||||
return None
|
||||
|
||||
def run(
|
||||
self,
|
||||
*,
|
||||
surgery_id: str,
|
||||
uploaded_video_path: Path,
|
||||
original_filename: str = "video.mp4",
|
||||
candidate_consumables: list[str] | None = None,
|
||||
include_visualization: bool = False,
|
||||
) -> BatchRunResult:
|
||||
del original_filename, include_visualization
|
||||
bundle_dir = resolve_reference_bundle_dir(self._bundle_dir_override)
|
||||
uploaded_video_path = uploaded_video_path.resolve()
|
||||
digest = sha256_file(uploaded_video_path)
|
||||
candidates = resolve_reference_candidates(candidate_consumables)
|
||||
candidate_key = candidate_cache_key(candidates)
|
||||
|
||||
surgery_input_dir = self._root_dir / surgery_id / "input"
|
||||
surgery_input_dir.mkdir(parents=True, exist_ok=True)
|
||||
surgery_input = surgery_input_dir / f"{digest[:12]}.mp4"
|
||||
ensure_batch_pipeline_input_video(
|
||||
source_path=uploaded_video_path,
|
||||
dest_path=surgery_input,
|
||||
)
|
||||
|
||||
cache_dir = self._root_dir / "cache" / digest / candidate_key
|
||||
job = prepare_batch_job(
|
||||
bundle_dir=self._bundle_dir_override,
|
||||
cache_dir=cache_dir,
|
||||
uploaded_video_path=uploaded_video_path,
|
||||
candidate_consumables=candidates,
|
||||
)
|
||||
|
||||
reused_cache = job.output_path.is_file() and is_reference_result_complete(job.output_path)
|
||||
if reused_cache:
|
||||
logger.info(
|
||||
"reference batch cache hit digest={} candidate_key={} ({})",
|
||||
digest[:12],
|
||||
candidate_key,
|
||||
job.output_path,
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"reference batch starting for surgery_id={} (5.15/main.py, work_dir={})",
|
||||
surgery_id,
|
||||
job.work_dir,
|
||||
)
|
||||
run_batch_main(
|
||||
bundle_dir=bundle_dir,
|
||||
config_path=job.config_path.resolve(),
|
||||
work_dir=job.work_dir.resolve(),
|
||||
output_path=job.output_path.resolve(),
|
||||
)
|
||||
if not is_reference_result_complete(job.output_path):
|
||||
raise RuntimeError(
|
||||
f"reference bundle finished but result.tsv is incomplete: {job.output_path}"
|
||||
)
|
||||
logger.info(
|
||||
"reference batch complete for surgery_id={} ({})",
|
||||
surgery_id,
|
||||
job.output_path,
|
||||
)
|
||||
|
||||
doctor = parse_reference_doctor_info(job.output_path)
|
||||
details = parse_reference_tsv(job.output_path, doctor=doctor)
|
||||
return BatchRunResult(
|
||||
video_sha256=digest,
|
||||
candidate_cache_key=candidate_key,
|
||||
input_path=surgery_input,
|
||||
work_dir=job.work_dir,
|
||||
output_path=job.output_path,
|
||||
details=details,
|
||||
reused_cache=reused_cache,
|
||||
doctor=doctor,
|
||||
visualization_path=None,
|
||||
)
|
||||
Reference in New Issue
Block a user