2026-05-22 09:35:41 +08:00
|
|
|
"""Offline batch orchestration: spawn 5.15 main.py and optional visualization subprocess."""
|
|
|
|
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
from dataclasses import dataclass
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
from loguru import logger
|
|
|
|
|
|
|
|
|
|
from app.algo_host.bundle import default_reference_bundle_dir, resolve_reference_bundle_dir
|
|
|
|
|
from app.algo_host.job_workspace import prepare_batch_job
|
|
|
|
|
from app.algo_host.result_adapter import (
|
|
|
|
|
ReferenceDoctorInfo,
|
|
|
|
|
candidate_cache_key,
|
|
|
|
|
is_reference_result_complete,
|
|
|
|
|
parse_reference_doctor_info,
|
|
|
|
|
parse_reference_tsv,
|
|
|
|
|
resolve_reference_candidates,
|
|
|
|
|
sha256_file,
|
|
|
|
|
)
|
|
|
|
|
from app.algo_host.subprocess_runner import run_batch_main, run_visualization_script
|
|
|
|
|
from app.algo_host.transcode import (
|
|
|
|
|
is_browser_compatible_mp4,
|
|
|
|
|
is_readable_mp4,
|
2026-05-22 11:19:12 +08:00
|
|
|
publish_labeled_video_for_browser,
|
2026-05-22 11:15:22 +08:00
|
|
|
stage_batch_pipeline_input,
|
2026-05-22 09:35:41 +08:00
|
|
|
)
|
|
|
|
|
from app.domain.consumption import SurgeryConsumptionStored
|
|
|
|
|
from app.services.video_batch_cleanup import (
|
|
|
|
|
RAW_VISUALIZATION_FILENAME,
|
|
|
|
|
purge_visualization_artifacts,
|
|
|
|
|
purge_visualization_pending,
|
|
|
|
|
visualization_output_path,
|
|
|
|
|
visualization_pending_input_path,
|
|
|
|
|
visualization_pending_result_path,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
|
|
|
class BatchRunResult:
|
|
|
|
|
video_sha256: str
|
|
|
|
|
candidate_cache_key: str
|
|
|
|
|
input_path: Path
|
|
|
|
|
work_dir: Path
|
|
|
|
|
output_path: Path
|
|
|
|
|
details: list[SurgeryConsumptionStored]
|
|
|
|
|
reused_cache: bool
|
|
|
|
|
doctor: ReferenceDoctorInfo | None = None
|
|
|
|
|
visualization_path: Path | None = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def default_batch_root_dir() -> Path:
|
|
|
|
|
repo_root = Path(__file__).resolve().parents[2]
|
|
|
|
|
return repo_root / "logs" / "video_batch"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class BatchAlgorithmService:
|
|
|
|
|
def __init__(
|
|
|
|
|
self,
|
|
|
|
|
*,
|
|
|
|
|
bundle_dir: Path | None = None,
|
|
|
|
|
root_dir: Path | None = None,
|
|
|
|
|
) -> None:
|
|
|
|
|
self._bundle_dir_override = bundle_dir
|
|
|
|
|
self._root_dir = root_dir or default_batch_root_dir()
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def bundle_dir(self) -> Path:
|
|
|
|
|
if self._bundle_dir_override is not None:
|
|
|
|
|
return Path(self._bundle_dir_override).expanduser().resolve()
|
|
|
|
|
return default_reference_bundle_dir()
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def root_dir(self) -> Path:
|
|
|
|
|
return self._root_dir
|
|
|
|
|
|
|
|
|
|
def _generate_visualization(
|
|
|
|
|
self,
|
|
|
|
|
*,
|
|
|
|
|
bundle_dir: Path,
|
|
|
|
|
video_path: Path,
|
|
|
|
|
result_path: Path,
|
|
|
|
|
output_video_path: Path,
|
|
|
|
|
) -> Path | None:
|
|
|
|
|
raw_video_path = output_video_path.with_name(RAW_VISUALIZATION_FILENAME)
|
|
|
|
|
script_path = bundle_dir / "visualize_result_video.py"
|
|
|
|
|
if not script_path.is_file():
|
|
|
|
|
logger.warning("reference visualization script not found: {}", script_path)
|
|
|
|
|
return None
|
|
|
|
|
if not video_path.is_file() or not result_path.is_file():
|
|
|
|
|
return None
|
2026-05-22 11:19:12 +08:00
|
|
|
if not is_reference_result_complete(result_path):
|
|
|
|
|
logger.warning("skip visualization: incomplete result {}", result_path)
|
|
|
|
|
return None
|
2026-05-22 09:35:41 +08:00
|
|
|
if output_video_path.is_file() and is_browser_compatible_mp4(output_video_path):
|
|
|
|
|
return output_video_path
|
|
|
|
|
if raw_video_path.is_file() and not is_readable_mp4(raw_video_path):
|
|
|
|
|
raw_video_path.unlink(missing_ok=True)
|
|
|
|
|
if output_video_path.is_file() and not is_browser_compatible_mp4(output_video_path):
|
|
|
|
|
output_video_path.unlink(missing_ok=True)
|
|
|
|
|
if raw_video_path.is_file() and is_readable_mp4(raw_video_path):
|
|
|
|
|
logger.info(
|
2026-05-22 11:19:12 +08:00
|
|
|
"reusing existing labeled visualization for browser publish: {}",
|
2026-05-22 09:35:41 +08:00
|
|
|
raw_video_path,
|
|
|
|
|
)
|
2026-05-22 11:19:12 +08:00
|
|
|
if publish_labeled_video_for_browser(
|
|
|
|
|
labeled_source=raw_video_path,
|
|
|
|
|
browser_output=output_video_path,
|
|
|
|
|
):
|
2026-05-22 09:35:41 +08:00
|
|
|
return output_video_path
|
|
|
|
|
logger.warning(
|
2026-05-22 11:19:12 +08:00
|
|
|
"browser publish from existing labeled source failed; regenerating visualization: {}",
|
2026-05-22 09:35:41 +08:00
|
|
|
raw_video_path,
|
|
|
|
|
)
|
|
|
|
|
raw_video_path.unlink(missing_ok=True)
|
|
|
|
|
try:
|
|
|
|
|
run_visualization_script(
|
|
|
|
|
bundle_dir=bundle_dir,
|
|
|
|
|
video_path=video_path,
|
|
|
|
|
result_path=result_path,
|
|
|
|
|
raw_output_video_path=raw_video_path,
|
|
|
|
|
)
|
|
|
|
|
except RuntimeError as exc:
|
|
|
|
|
logger.error("reference visualization failed: {}", exc)
|
|
|
|
|
return None
|
|
|
|
|
if not is_readable_mp4(raw_video_path):
|
|
|
|
|
logger.error("reference visualization produced unreadable mp4: {}", raw_video_path)
|
|
|
|
|
return None
|
2026-05-22 11:19:12 +08:00
|
|
|
if publish_labeled_video_for_browser(
|
|
|
|
|
labeled_source=raw_video_path,
|
|
|
|
|
browser_output=output_video_path,
|
|
|
|
|
):
|
2026-05-22 09:35:41 +08:00
|
|
|
purge_visualization_artifacts(output_video_path.parent)
|
|
|
|
|
return output_video_path
|
2026-05-22 11:19:12 +08:00
|
|
|
logger.error("labeled visualization browser publish failed: {}", output_video_path)
|
2026-05-22 09:35:41 +08:00
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
def finalize_visualization(
|
|
|
|
|
self,
|
|
|
|
|
*,
|
|
|
|
|
surgery_id: str,
|
|
|
|
|
video_path: Path | None = None,
|
|
|
|
|
result_path: Path | None = None,
|
|
|
|
|
) -> Path | None:
|
|
|
|
|
logger.info(
|
|
|
|
|
"video batch visualization starting for surgery_id={} (visualize_result_video.py)",
|
|
|
|
|
surgery_id,
|
|
|
|
|
)
|
|
|
|
|
video_path = (video_path or visualization_pending_input_path(self._root_dir, surgery_id)).resolve()
|
|
|
|
|
result_path = (result_path or visualization_pending_result_path(self._root_dir, surgery_id)).resolve()
|
|
|
|
|
output_video_path = visualization_output_path(self._root_dir, surgery_id)
|
|
|
|
|
if not is_reference_result_complete(result_path):
|
|
|
|
|
logger.warning("skip visualization: incomplete result {}", result_path)
|
|
|
|
|
purge_visualization_pending(self._root_dir, surgery_id)
|
|
|
|
|
return None
|
|
|
|
|
if not video_path.is_file():
|
|
|
|
|
logger.warning("skip visualization: missing staged video {}", video_path)
|
|
|
|
|
purge_visualization_pending(self._root_dir, surgery_id)
|
|
|
|
|
return None
|
|
|
|
|
bundle_dir = resolve_reference_bundle_dir(self._bundle_dir_override)
|
|
|
|
|
output_video_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
vis_path = self._generate_visualization(
|
|
|
|
|
bundle_dir=bundle_dir,
|
|
|
|
|
video_path=video_path,
|
|
|
|
|
result_path=result_path,
|
|
|
|
|
output_video_path=output_video_path,
|
|
|
|
|
)
|
|
|
|
|
purge_visualization_pending(self._root_dir, surgery_id)
|
|
|
|
|
if vis_path is not None:
|
|
|
|
|
logger.info(
|
|
|
|
|
"video batch visualization complete for surgery_id={} ({})",
|
|
|
|
|
surgery_id,
|
|
|
|
|
vis_path,
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
logger.warning("video batch visualization failed for surgery_id={}", surgery_id)
|
|
|
|
|
return vis_path
|
|
|
|
|
|
|
|
|
|
def latest_visualization_path(self, surgery_id: str) -> Path | None:
|
|
|
|
|
path = visualization_output_path(self._root_dir, surgery_id)
|
|
|
|
|
if path.is_file() and path.stat().st_size > 0 and is_browser_compatible_mp4(path):
|
|
|
|
|
return path
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
def run(
|
|
|
|
|
self,
|
|
|
|
|
*,
|
|
|
|
|
surgery_id: str,
|
|
|
|
|
uploaded_video_path: Path,
|
|
|
|
|
original_filename: str = "video.mp4",
|
|
|
|
|
candidate_consumables: list[str] | None = None,
|
|
|
|
|
include_visualization: bool = False,
|
|
|
|
|
) -> BatchRunResult:
|
|
|
|
|
del original_filename, include_visualization
|
|
|
|
|
bundle_dir = resolve_reference_bundle_dir(self._bundle_dir_override)
|
|
|
|
|
uploaded_video_path = uploaded_video_path.resolve()
|
|
|
|
|
digest = sha256_file(uploaded_video_path)
|
|
|
|
|
candidates = resolve_reference_candidates(candidate_consumables)
|
|
|
|
|
candidate_key = candidate_cache_key(candidates)
|
|
|
|
|
|
2026-05-22 11:15:22 +08:00
|
|
|
pipeline_video = (
|
|
|
|
|
self._root_dir
|
|
|
|
|
/ "cache"
|
|
|
|
|
/ digest
|
|
|
|
|
/ "input"
|
|
|
|
|
/ f"pipeline{uploaded_video_path.suffix or '.mp4'}"
|
|
|
|
|
)
|
|
|
|
|
stage_batch_pipeline_input(
|
2026-05-22 09:35:41 +08:00
|
|
|
source_path=uploaded_video_path,
|
2026-05-22 11:15:22 +08:00
|
|
|
dest_path=pipeline_video,
|
2026-05-22 09:35:41 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
cache_dir = self._root_dir / "cache" / digest / candidate_key
|
|
|
|
|
job = prepare_batch_job(
|
|
|
|
|
bundle_dir=self._bundle_dir_override,
|
|
|
|
|
cache_dir=cache_dir,
|
2026-05-22 11:15:22 +08:00
|
|
|
pipeline_video_path=pipeline_video,
|
2026-05-22 09:35:41 +08:00
|
|
|
candidate_consumables=candidates,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
reused_cache = job.output_path.is_file() and is_reference_result_complete(job.output_path)
|
|
|
|
|
if reused_cache:
|
|
|
|
|
logger.info(
|
|
|
|
|
"reference batch cache hit digest={} candidate_key={} ({})",
|
|
|
|
|
digest[:12],
|
|
|
|
|
candidate_key,
|
|
|
|
|
job.output_path,
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
logger.info(
|
|
|
|
|
"reference batch starting for surgery_id={} (5.15/main.py, work_dir={})",
|
|
|
|
|
surgery_id,
|
|
|
|
|
job.work_dir,
|
|
|
|
|
)
|
|
|
|
|
run_batch_main(
|
|
|
|
|
bundle_dir=bundle_dir,
|
|
|
|
|
config_path=job.config_path.resolve(),
|
|
|
|
|
work_dir=job.work_dir.resolve(),
|
|
|
|
|
output_path=job.output_path.resolve(),
|
|
|
|
|
)
|
|
|
|
|
if not is_reference_result_complete(job.output_path):
|
|
|
|
|
raise RuntimeError(
|
|
|
|
|
f"reference bundle finished but result.tsv is incomplete: {job.output_path}"
|
|
|
|
|
)
|
|
|
|
|
logger.info(
|
|
|
|
|
"reference batch complete for surgery_id={} ({})",
|
|
|
|
|
surgery_id,
|
|
|
|
|
job.output_path,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
doctor = parse_reference_doctor_info(job.output_path)
|
|
|
|
|
details = parse_reference_tsv(job.output_path, doctor=doctor)
|
|
|
|
|
return BatchRunResult(
|
|
|
|
|
video_sha256=digest,
|
|
|
|
|
candidate_cache_key=candidate_key,
|
2026-05-22 11:15:22 +08:00
|
|
|
input_path=pipeline_video,
|
2026-05-22 09:35:41 +08:00
|
|
|
work_dir=job.work_dir,
|
|
|
|
|
output_path=job.output_path,
|
|
|
|
|
details=details,
|
|
|
|
|
reused_cache=reused_cache,
|
|
|
|
|
doctor=doctor,
|
|
|
|
|
visualization_path=None,
|
|
|
|
|
)
|