update minio port
This commit is contained in:
24
backend/app/algo_host/__init__.py
Normal file
24
backend/app/algo_host/__init__.py
Normal file
@@ -0,0 +1,24 @@
|
||||
"""Thin orchestration around algorithm_subprocesses reference bundles (no in-process inference)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from app.algo_host.result_adapter import ReferenceDoctorInfo
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.algo_host.batch_service import BatchAlgorithmService, BatchRunResult
|
||||
|
||||
__all__ = ["BatchAlgorithmService", "BatchRunResult", "ReferenceDoctorInfo"]
|
||||
|
||||
|
||||
def __getattr__(name: str):
|
||||
if name == "BatchAlgorithmService":
|
||||
from app.algo_host.batch_service import BatchAlgorithmService
|
||||
|
||||
return BatchAlgorithmService
|
||||
if name == "BatchRunResult":
|
||||
from app.algo_host.batch_service import BatchRunResult
|
||||
|
||||
return BatchRunResult
|
||||
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
||||
250
backend/app/algo_host/batch_service.py
Normal file
250
backend/app/algo_host/batch_service.py
Normal file
@@ -0,0 +1,250 @@
|
||||
"""Offline batch orchestration: spawn 5.15 main.py and optional visualization subprocess."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from app.algo_host.bundle import default_reference_bundle_dir, resolve_reference_bundle_dir
|
||||
from app.algo_host.job_workspace import prepare_batch_job
|
||||
from app.algo_host.result_adapter import (
|
||||
ReferenceDoctorInfo,
|
||||
candidate_cache_key,
|
||||
is_reference_result_complete,
|
||||
parse_reference_doctor_info,
|
||||
parse_reference_tsv,
|
||||
resolve_reference_candidates,
|
||||
sha256_file,
|
||||
)
|
||||
from app.algo_host.subprocess_runner import run_batch_main, run_visualization_script
|
||||
from app.algo_host.transcode import (
|
||||
ensure_batch_pipeline_input_video,
|
||||
is_browser_compatible_mp4,
|
||||
is_readable_mp4,
|
||||
transcode_visualization_for_browser,
|
||||
)
|
||||
from app.domain.consumption import SurgeryConsumptionStored
|
||||
from app.services.video_batch_cleanup import (
|
||||
RAW_VISUALIZATION_FILENAME,
|
||||
purge_visualization_artifacts,
|
||||
purge_visualization_pending,
|
||||
visualization_output_path,
|
||||
visualization_pending_input_path,
|
||||
visualization_pending_result_path,
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class BatchRunResult:
|
||||
video_sha256: str
|
||||
candidate_cache_key: str
|
||||
input_path: Path
|
||||
work_dir: Path
|
||||
output_path: Path
|
||||
details: list[SurgeryConsumptionStored]
|
||||
reused_cache: bool
|
||||
doctor: ReferenceDoctorInfo | None = None
|
||||
visualization_path: Path | None = None
|
||||
|
||||
|
||||
def default_batch_root_dir() -> Path:
|
||||
repo_root = Path(__file__).resolve().parents[2]
|
||||
return repo_root / "logs" / "video_batch"
|
||||
|
||||
|
||||
class BatchAlgorithmService:
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
bundle_dir: Path | None = None,
|
||||
root_dir: Path | None = None,
|
||||
) -> None:
|
||||
self._bundle_dir_override = bundle_dir
|
||||
self._root_dir = root_dir or default_batch_root_dir()
|
||||
|
||||
@property
|
||||
def bundle_dir(self) -> Path:
|
||||
if self._bundle_dir_override is not None:
|
||||
return Path(self._bundle_dir_override).expanduser().resolve()
|
||||
return default_reference_bundle_dir()
|
||||
|
||||
@property
|
||||
def root_dir(self) -> Path:
|
||||
return self._root_dir
|
||||
|
||||
def _generate_visualization(
|
||||
self,
|
||||
*,
|
||||
bundle_dir: Path,
|
||||
video_path: Path,
|
||||
result_path: Path,
|
||||
output_video_path: Path,
|
||||
) -> Path | None:
|
||||
raw_video_path = output_video_path.with_name(RAW_VISUALIZATION_FILENAME)
|
||||
script_path = bundle_dir / "visualize_result_video.py"
|
||||
if not script_path.is_file():
|
||||
logger.warning("reference visualization script not found: {}", script_path)
|
||||
return None
|
||||
if not video_path.is_file() or not result_path.is_file():
|
||||
return None
|
||||
if output_video_path.is_file() and is_browser_compatible_mp4(output_video_path):
|
||||
return output_video_path
|
||||
if raw_video_path.is_file() and not is_readable_mp4(raw_video_path):
|
||||
raw_video_path.unlink(missing_ok=True)
|
||||
if output_video_path.is_file() and not is_browser_compatible_mp4(output_video_path):
|
||||
output_video_path.unlink(missing_ok=True)
|
||||
if raw_video_path.is_file() and is_readable_mp4(raw_video_path):
|
||||
logger.info(
|
||||
"reusing existing visualization source for transcode: {}",
|
||||
raw_video_path,
|
||||
)
|
||||
if transcode_visualization_for_browser(raw_video_path, output_video_path):
|
||||
return output_video_path
|
||||
logger.warning(
|
||||
"transcode from existing source failed; regenerating visualization: {}",
|
||||
raw_video_path,
|
||||
)
|
||||
raw_video_path.unlink(missing_ok=True)
|
||||
try:
|
||||
run_visualization_script(
|
||||
bundle_dir=bundle_dir,
|
||||
video_path=video_path,
|
||||
result_path=result_path,
|
||||
raw_output_video_path=raw_video_path,
|
||||
)
|
||||
except RuntimeError as exc:
|
||||
logger.error("reference visualization failed: {}", exc)
|
||||
return None
|
||||
if not is_readable_mp4(raw_video_path):
|
||||
logger.error("reference visualization produced unreadable mp4: {}", raw_video_path)
|
||||
return None
|
||||
if transcode_visualization_for_browser(raw_video_path, output_video_path):
|
||||
purge_visualization_artifacts(output_video_path.parent)
|
||||
return output_video_path
|
||||
logger.error("reference visualization transcode to browser mp4 failed: {}", output_video_path)
|
||||
return None
|
||||
|
||||
def finalize_visualization(
|
||||
self,
|
||||
*,
|
||||
surgery_id: str,
|
||||
video_path: Path | None = None,
|
||||
result_path: Path | None = None,
|
||||
) -> Path | None:
|
||||
logger.info(
|
||||
"video batch visualization starting for surgery_id={} (visualize_result_video.py)",
|
||||
surgery_id,
|
||||
)
|
||||
video_path = (video_path or visualization_pending_input_path(self._root_dir, surgery_id)).resolve()
|
||||
result_path = (result_path or visualization_pending_result_path(self._root_dir, surgery_id)).resolve()
|
||||
output_video_path = visualization_output_path(self._root_dir, surgery_id)
|
||||
if not is_reference_result_complete(result_path):
|
||||
logger.warning("skip visualization: incomplete result {}", result_path)
|
||||
purge_visualization_pending(self._root_dir, surgery_id)
|
||||
return None
|
||||
if not video_path.is_file():
|
||||
logger.warning("skip visualization: missing staged video {}", video_path)
|
||||
purge_visualization_pending(self._root_dir, surgery_id)
|
||||
return None
|
||||
bundle_dir = resolve_reference_bundle_dir(self._bundle_dir_override)
|
||||
output_video_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
vis_path = self._generate_visualization(
|
||||
bundle_dir=bundle_dir,
|
||||
video_path=video_path,
|
||||
result_path=result_path,
|
||||
output_video_path=output_video_path,
|
||||
)
|
||||
purge_visualization_pending(self._root_dir, surgery_id)
|
||||
if vis_path is not None:
|
||||
logger.info(
|
||||
"video batch visualization complete for surgery_id={} ({})",
|
||||
surgery_id,
|
||||
vis_path,
|
||||
)
|
||||
else:
|
||||
logger.warning("video batch visualization failed for surgery_id={}", surgery_id)
|
||||
return vis_path
|
||||
|
||||
def latest_visualization_path(self, surgery_id: str) -> Path | None:
|
||||
path = visualization_output_path(self._root_dir, surgery_id)
|
||||
if path.is_file() and path.stat().st_size > 0 and is_browser_compatible_mp4(path):
|
||||
return path
|
||||
return None
|
||||
|
||||
def run(
|
||||
self,
|
||||
*,
|
||||
surgery_id: str,
|
||||
uploaded_video_path: Path,
|
||||
original_filename: str = "video.mp4",
|
||||
candidate_consumables: list[str] | None = None,
|
||||
include_visualization: bool = False,
|
||||
) -> BatchRunResult:
|
||||
del original_filename, include_visualization
|
||||
bundle_dir = resolve_reference_bundle_dir(self._bundle_dir_override)
|
||||
uploaded_video_path = uploaded_video_path.resolve()
|
||||
digest = sha256_file(uploaded_video_path)
|
||||
candidates = resolve_reference_candidates(candidate_consumables)
|
||||
candidate_key = candidate_cache_key(candidates)
|
||||
|
||||
surgery_input_dir = self._root_dir / surgery_id / "input"
|
||||
surgery_input_dir.mkdir(parents=True, exist_ok=True)
|
||||
surgery_input = surgery_input_dir / f"{digest[:12]}.mp4"
|
||||
ensure_batch_pipeline_input_video(
|
||||
source_path=uploaded_video_path,
|
||||
dest_path=surgery_input,
|
||||
)
|
||||
|
||||
cache_dir = self._root_dir / "cache" / digest / candidate_key
|
||||
job = prepare_batch_job(
|
||||
bundle_dir=self._bundle_dir_override,
|
||||
cache_dir=cache_dir,
|
||||
uploaded_video_path=uploaded_video_path,
|
||||
candidate_consumables=candidates,
|
||||
)
|
||||
|
||||
reused_cache = job.output_path.is_file() and is_reference_result_complete(job.output_path)
|
||||
if reused_cache:
|
||||
logger.info(
|
||||
"reference batch cache hit digest={} candidate_key={} ({})",
|
||||
digest[:12],
|
||||
candidate_key,
|
||||
job.output_path,
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"reference batch starting for surgery_id={} (5.15/main.py, work_dir={})",
|
||||
surgery_id,
|
||||
job.work_dir,
|
||||
)
|
||||
run_batch_main(
|
||||
bundle_dir=bundle_dir,
|
||||
config_path=job.config_path.resolve(),
|
||||
work_dir=job.work_dir.resolve(),
|
||||
output_path=job.output_path.resolve(),
|
||||
)
|
||||
if not is_reference_result_complete(job.output_path):
|
||||
raise RuntimeError(
|
||||
f"reference bundle finished but result.tsv is incomplete: {job.output_path}"
|
||||
)
|
||||
logger.info(
|
||||
"reference batch complete for surgery_id={} ({})",
|
||||
surgery_id,
|
||||
job.output_path,
|
||||
)
|
||||
|
||||
doctor = parse_reference_doctor_info(job.output_path)
|
||||
details = parse_reference_tsv(job.output_path, doctor=doctor)
|
||||
return BatchRunResult(
|
||||
video_sha256=digest,
|
||||
candidate_cache_key=candidate_key,
|
||||
input_path=surgery_input,
|
||||
work_dir=job.work_dir,
|
||||
output_path=job.output_path,
|
||||
details=details,
|
||||
reused_cache=reused_cache,
|
||||
doctor=doctor,
|
||||
visualization_path=None,
|
||||
)
|
||||
59
backend/app/algo_host/bundle.py
Normal file
59
backend/app/algo_host/bundle.py
Normal file
@@ -0,0 +1,59 @@
|
||||
"""Resolve reference algorithm bundle paths and default YAML (read-only; no vendor patches)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import yaml
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
DEFAULT_REFERENCE_BUNDLE_RELATIVE = "algorithm_subprocesses/5.15"
|
||||
|
||||
|
||||
def configured_reference_bundle_relative() -> str:
|
||||
from app.config import Settings
|
||||
|
||||
raw = (Settings().reference_bundle_relative or "").strip()
|
||||
return raw or DEFAULT_REFERENCE_BUNDLE_RELATIVE
|
||||
|
||||
|
||||
def default_reference_bundle_dir() -> Path:
|
||||
raw = configured_reference_bundle_relative()
|
||||
path = Path(raw).expanduser()
|
||||
if path.is_absolute():
|
||||
return path.resolve()
|
||||
return (REPO_ROOT / path).resolve()
|
||||
|
||||
|
||||
def resolve_reference_bundle_dir(bundle_dir: Path | None = None) -> Path:
|
||||
if bundle_dir is None:
|
||||
label = configured_reference_bundle_relative()
|
||||
root = default_reference_bundle_dir()
|
||||
else:
|
||||
label = str(bundle_dir)
|
||||
root = Path(bundle_dir).expanduser().resolve()
|
||||
if not (root / "main.py").is_file():
|
||||
raise FileNotFoundError(f"reference bundle main.py not found: {label} -> {root}")
|
||||
if not (root / "code" / "repo_root.py").is_file():
|
||||
raise FileNotFoundError(f"reference bundle vendor code not found: {label} -> {root / 'code'}")
|
||||
return root
|
||||
|
||||
|
||||
def load_reference_default_config(bundle_dir: Path | None = None) -> dict[str, Any]:
|
||||
root = resolve_reference_bundle_dir(bundle_dir)
|
||||
path = root / "configs" / "default_config.yaml"
|
||||
data = yaml.safe_load(path.read_text(encoding="utf-8"))
|
||||
if not isinstance(data, dict):
|
||||
raise ValueError(f"invalid reference bundle default config: {path}")
|
||||
return copy.deepcopy(data)
|
||||
|
||||
|
||||
def resolve_bundle_relative_path(bundle_dir: Path, raw: str) -> Path:
|
||||
p = Path((raw or "").strip())
|
||||
if not str(p):
|
||||
raise ValueError("empty bundle-relative path")
|
||||
if p.is_absolute():
|
||||
return p.resolve()
|
||||
return (bundle_dir / p).resolve()
|
||||
120
backend/app/algo_host/job_workspace.py
Normal file
120
backend/app/algo_host/job_workspace.py
Normal file
@@ -0,0 +1,120 @@
|
||||
"""Prepare input artifacts expected by algorithm_subprocesses/5.15 main.py."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
from app.algo_host.bundle import load_reference_default_config, resolve_reference_bundle_dir
|
||||
from app.algo_host.transcode import ensure_batch_pipeline_input_video
|
||||
from app.consumable_catalog import build_name_mapping
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class BatchJobFiles:
|
||||
config_path: Path
|
||||
excel_path: Path
|
||||
whitelist_path: Path
|
||||
output_path: Path
|
||||
work_dir: Path
|
||||
input_video_path: Path
|
||||
|
||||
|
||||
def write_reference_catalog_excel(
|
||||
path: Path,
|
||||
*,
|
||||
candidate_consumables: list[str],
|
||||
) -> None:
|
||||
import pandas as pd
|
||||
|
||||
name_to_code = build_name_mapping(candidate_consumables)
|
||||
rows = [
|
||||
{
|
||||
"序号": idx,
|
||||
"产品编码": name_to_code.get(name, name),
|
||||
"商品名称": name,
|
||||
}
|
||||
for idx, name in enumerate(candidate_consumables, start=1)
|
||||
]
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
pd.DataFrame(rows, columns=["序号", "产品编码", "商品名称"]).to_excel(path, index=False)
|
||||
|
||||
|
||||
def write_reference_whitelist_json(path: Path, *, candidate_consumables: list[str]) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(
|
||||
json.dumps({"allowed_names": candidate_consumables}, ensure_ascii=False, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def build_job_config(
|
||||
*,
|
||||
bundle_dir: Path,
|
||||
video_path: Path,
|
||||
output_path: Path,
|
||||
work_dir: Path,
|
||||
excel_path: Path,
|
||||
whitelist_path: Path,
|
||||
) -> dict:
|
||||
cfg = copy.deepcopy(load_reference_default_config(bundle_dir))
|
||||
cfg["io"]["video"] = str(video_path.resolve())
|
||||
cfg["io"]["excel"] = str(excel_path.resolve())
|
||||
cfg["io"]["out"] = str(output_path.resolve())
|
||||
cfg["io"]["whitelist_json"] = str(whitelist_path.resolve())
|
||||
cfg["runtime"]["work_dir"] = str(work_dir.resolve())
|
||||
cfg["runtime"]["keep_work_dir"] = False
|
||||
return cfg
|
||||
|
||||
|
||||
def prepare_batch_job(
|
||||
*,
|
||||
bundle_dir: Path | None,
|
||||
cache_dir: Path,
|
||||
uploaded_video_path: Path,
|
||||
candidate_consumables: list[str],
|
||||
) -> BatchJobFiles:
|
||||
root = resolve_reference_bundle_dir(bundle_dir)
|
||||
cache_input_dir = cache_dir / "input"
|
||||
cache_output_dir = cache_dir / "output"
|
||||
cache_work_dir = cache_dir / "work"
|
||||
cache_config_dir = cache_dir / "config"
|
||||
for d in (cache_input_dir, cache_output_dir, cache_work_dir, cache_config_dir):
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
cache_input = cache_input_dir / "input.mp4"
|
||||
ensure_batch_pipeline_input_video(
|
||||
source_path=uploaded_video_path,
|
||||
dest_path=cache_input,
|
||||
)
|
||||
output_path = cache_output_dir / "result.tsv"
|
||||
excel_path = cache_config_dir / "商品信息表.xlsx"
|
||||
whitelist_path = cache_config_dir / "whitelist.json"
|
||||
config_path = cache_config_dir / "config.yaml"
|
||||
|
||||
write_reference_catalog_excel(excel_path, candidate_consumables=candidate_consumables)
|
||||
write_reference_whitelist_json(whitelist_path, candidate_consumables=candidate_consumables)
|
||||
config = build_job_config(
|
||||
bundle_dir=root,
|
||||
video_path=cache_input.resolve(),
|
||||
output_path=output_path.resolve(),
|
||||
work_dir=cache_work_dir.resolve(),
|
||||
excel_path=excel_path.resolve(),
|
||||
whitelist_path=whitelist_path.resolve(),
|
||||
)
|
||||
config_path.write_text(
|
||||
yaml.safe_dump(config, allow_unicode=True, sort_keys=False),
|
||||
encoding="utf-8",
|
||||
)
|
||||
return BatchJobFiles(
|
||||
config_path=config_path,
|
||||
excel_path=excel_path,
|
||||
whitelist_path=whitelist_path,
|
||||
output_path=output_path,
|
||||
work_dir=cache_work_dir,
|
||||
input_video_path=cache_input,
|
||||
)
|
||||
178
backend/app/algo_host/result_adapter.py
Normal file
178
backend/app/algo_host/result_adapter.py
Normal file
@@ -0,0 +1,178 @@
|
||||
"""Map algorithm_subprocesses/5.15 TSV output to domain objects (orchestration adapter only)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import hashlib
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from app.baked import pipeline as bp
|
||||
from app.consumable_catalog import (
|
||||
effective_candidate_consumables,
|
||||
normalize_candidate_consumables_raw,
|
||||
)
|
||||
from app.domain.consumption import SurgeryConsumptionStored
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ReferenceDoctorInfo:
|
||||
doctor_id: str
|
||||
doctor_name: str | None
|
||||
display: str
|
||||
raw_line: str
|
||||
|
||||
|
||||
_DOCTOR_NAME_ID_RE = re.compile(
|
||||
r"^(?P<name>.+?)\s*\(id=(?P<id>[^,\s)]+)(?:,\s*conf=[\d.]+)?\)\s*(?:\[低置信度\])?\s*$"
|
||||
)
|
||||
_DOCTOR_ID_ONLY_RE = re.compile(
|
||||
r"^doctor_id=(?P<id>[^\s(]+)(?:\s*\(conf=[\d.]+\))?\s*(?:\[低置信度\])?\s*$"
|
||||
)
|
||||
|
||||
|
||||
def sha256_file(path: Path) -> str:
|
||||
h = hashlib.sha256()
|
||||
with path.open("rb") as f:
|
||||
for chunk in iter(lambda: f.read(1024 * 1024), b""):
|
||||
h.update(chunk)
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def candidate_cache_key(candidate_consumables: list[str]) -> str:
|
||||
raw = "\n".join(candidate_consumables).encode("utf-8")
|
||||
return hashlib.sha256(raw).hexdigest()[:12]
|
||||
|
||||
|
||||
def resolve_reference_candidates(candidate_consumables: list[str] | None) -> list[str]:
|
||||
requested = normalize_candidate_consumables_raw(list(candidate_consumables or []))
|
||||
return effective_candidate_consumables(requested)
|
||||
|
||||
|
||||
def parse_reference_doctor_info(path: Path) -> ReferenceDoctorInfo | None:
|
||||
if not path.is_file():
|
||||
return None
|
||||
raw_line = ""
|
||||
for line in path.read_text(encoding="utf-8").splitlines():
|
||||
stripped = line.strip()
|
||||
if stripped.startswith("医生信息:") or stripped.startswith("医生信息:"):
|
||||
raw_line = stripped
|
||||
break
|
||||
if not raw_line:
|
||||
return None
|
||||
|
||||
body = raw_line.split(":", 1)[-1].split(":", 1)[-1].strip()
|
||||
if not body or body == "未启用":
|
||||
return ReferenceDoctorInfo(
|
||||
doctor_id=bp.VIDEO_RESULT_DOCTOR_ID,
|
||||
doctor_name=None,
|
||||
display=body or "未启用",
|
||||
raw_line=raw_line,
|
||||
)
|
||||
if body.startswith("识别失败"):
|
||||
return ReferenceDoctorInfo(
|
||||
doctor_id=bp.VIDEO_RESULT_DOCTOR_ID,
|
||||
doctor_name=None,
|
||||
display=body,
|
||||
raw_line=raw_line,
|
||||
)
|
||||
|
||||
match = _DOCTOR_NAME_ID_RE.match(body)
|
||||
if match:
|
||||
name = match.group("name").strip()
|
||||
did = match.group("id").strip()
|
||||
return ReferenceDoctorInfo(
|
||||
doctor_id=did,
|
||||
doctor_name=name,
|
||||
display=f"{name} ({did})",
|
||||
raw_line=raw_line,
|
||||
)
|
||||
|
||||
match = _DOCTOR_ID_ONLY_RE.match(body)
|
||||
if match:
|
||||
did = match.group("id").strip()
|
||||
return ReferenceDoctorInfo(
|
||||
doctor_id=did,
|
||||
doctor_name=None,
|
||||
display=did,
|
||||
raw_line=raw_line,
|
||||
)
|
||||
|
||||
return ReferenceDoctorInfo(
|
||||
doctor_id=bp.VIDEO_RESULT_DOCTOR_ID,
|
||||
doctor_name=None,
|
||||
display=body,
|
||||
raw_line=raw_line,
|
||||
)
|
||||
|
||||
|
||||
def is_reference_result_complete(path: Path) -> bool:
|
||||
if not path.is_file() or path.stat().st_size <= 0:
|
||||
return False
|
||||
lines = [line.strip() for line in path.read_text(encoding="utf-8").splitlines() if line.strip()]
|
||||
if not any(line.lower().startswith("rank\t") for line in lines):
|
||||
return False
|
||||
has_doctor_footer = any(
|
||||
line.startswith("医生信息:") or line.startswith("医生信息:") for line in lines
|
||||
)
|
||||
has_segment_row = False
|
||||
for line in lines:
|
||||
if line.lower().startswith("rank\t"):
|
||||
continue
|
||||
if line.startswith("医生信息"):
|
||||
continue
|
||||
parts = line.split("\t")
|
||||
if len(parts) >= 5 and parts[0].strip().isdigit():
|
||||
has_segment_row = True
|
||||
break
|
||||
return has_doctor_footer and has_segment_row
|
||||
|
||||
|
||||
def doctor_id_for_consumption_rows(doctor: ReferenceDoctorInfo | None) -> str:
|
||||
if doctor is None:
|
||||
return bp.VIDEO_RESULT_DOCTOR_ID
|
||||
if doctor.doctor_name:
|
||||
return f"{doctor.doctor_name} ({doctor.doctor_id})"
|
||||
if doctor.doctor_id and doctor.doctor_id != bp.VIDEO_RESULT_DOCTOR_ID:
|
||||
return doctor.doctor_id
|
||||
return bp.VIDEO_RESULT_DOCTOR_ID
|
||||
|
||||
|
||||
def parse_reference_tsv(
|
||||
path: Path,
|
||||
*,
|
||||
base_timestamp: datetime | None = None,
|
||||
doctor: ReferenceDoctorInfo | None = None,
|
||||
) -> list[SurgeryConsumptionStored]:
|
||||
if base_timestamp is None:
|
||||
base_timestamp = datetime.now(timezone.utc)
|
||||
if doctor is None:
|
||||
doctor = parse_reference_doctor_info(path)
|
||||
row_doctor_id = doctor_id_for_consumption_rows(doctor)
|
||||
out: list[SurgeryConsumptionStored] = []
|
||||
with path.open("r", encoding="utf-8", newline="") as f:
|
||||
reader = csv.DictReader(f, delimiter="\t")
|
||||
for row in reader:
|
||||
name = (row.get("top1_name") or "").strip()
|
||||
if not name or name.startswith("("):
|
||||
continue
|
||||
if name.startswith("医生信息"):
|
||||
continue
|
||||
item_id = (row.get("product_id_top1") or "").strip() or name
|
||||
try:
|
||||
start_sec = float((row.get("start_sec") or "0").strip() or 0.0)
|
||||
except ValueError:
|
||||
start_sec = 0.0
|
||||
out.append(
|
||||
SurgeryConsumptionStored(
|
||||
item_id=item_id,
|
||||
item_name=name,
|
||||
qty=1,
|
||||
doctor_id=row_doctor_id,
|
||||
timestamp=base_timestamp + timedelta(seconds=max(0.0, start_sec)),
|
||||
source="video_batch",
|
||||
)
|
||||
)
|
||||
return out
|
||||
180
backend/app/algo_host/subprocess_runner.py
Normal file
180
backend/app/algo_host/subprocess_runner.py
Normal file
@@ -0,0 +1,180 @@
|
||||
"""Spawn reference bundle child processes (main.py, visualize_result_video.py)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from app.algo_host.bundle import load_reference_default_config, resolve_bundle_relative_path
|
||||
from app.algo_host.transcode import VISUALIZATION_MAX_WIDTH
|
||||
|
||||
|
||||
def build_reference_env() -> dict[str, str]:
|
||||
env = os.environ.copy()
|
||||
env["PYTHONFAULTHANDLER"] = "1"
|
||||
env["PYTHONUNBUFFERED"] = "1"
|
||||
return env
|
||||
|
||||
|
||||
def build_batch_main_command(*, bundle_dir: Path, config_path: Path) -> list[str]:
|
||||
return [
|
||||
"uv",
|
||||
"run",
|
||||
"python",
|
||||
"-X",
|
||||
"faulthandler",
|
||||
str(bundle_dir / "main.py"),
|
||||
"--config",
|
||||
str(config_path),
|
||||
]
|
||||
|
||||
|
||||
def build_visualization_command(
|
||||
*,
|
||||
bundle_dir: Path,
|
||||
video_path: Path,
|
||||
result_path: Path,
|
||||
output_video_path: Path,
|
||||
) -> list[str]:
|
||||
cfg = load_reference_default_config(bundle_dir)
|
||||
weights = cfg.get("weights") if isinstance(cfg.get("weights"), dict) else {}
|
||||
device_cfg = cfg.get("device") if isinstance(cfg.get("device"), dict) else {}
|
||||
hand_raw = str((weights or {}).get("hand") or "weights/hand_detect.pt").strip()
|
||||
hand_model = resolve_bundle_relative_path(bundle_dir, hand_raw)
|
||||
return [
|
||||
sys.executable,
|
||||
"-X",
|
||||
"faulthandler",
|
||||
str((bundle_dir / "visualize_result_video.py").resolve()),
|
||||
"--video",
|
||||
str(video_path.resolve()),
|
||||
"--result-txt",
|
||||
str(result_path.resolve()),
|
||||
"--hand-model",
|
||||
str(hand_model),
|
||||
"--out-video",
|
||||
str(output_video_path.resolve()),
|
||||
"--device",
|
||||
str(device_cfg.get("type") or "cuda"),
|
||||
"--max-width",
|
||||
str(VISUALIZATION_MAX_WIDTH),
|
||||
]
|
||||
|
||||
|
||||
def _signal_name(signum: int) -> str:
|
||||
try:
|
||||
return signal.Signals(signum).name
|
||||
except ValueError:
|
||||
return f"signal {signum}"
|
||||
|
||||
|
||||
def describe_batch_returncode(returncode: int) -> str:
|
||||
if returncode < 0:
|
||||
signum = -returncode
|
||||
return f"terminated by {_signal_name(signum)} ({signum})"
|
||||
if returncode > 128:
|
||||
wrapped = returncode - 256
|
||||
if wrapped < 0:
|
||||
signum = -wrapped
|
||||
return f"exit={returncode} (possibly propagated {wrapped}/{_signal_name(signum)})"
|
||||
return f"exit={returncode}"
|
||||
|
||||
|
||||
def format_batch_failure(
|
||||
returncode: int,
|
||||
*,
|
||||
stdout: str,
|
||||
stderr: str,
|
||||
work_dir: Path,
|
||||
output_path: Path,
|
||||
) -> str:
|
||||
chunks: list[str] = [
|
||||
describe_batch_returncode(returncode),
|
||||
f"work_dir={work_dir}",
|
||||
f"output={output_path}",
|
||||
]
|
||||
stdout = stdout.strip()
|
||||
stderr = stderr.strip()
|
||||
if stdout:
|
||||
chunks.append(f"stdout:\n{stdout[-3000:]}")
|
||||
if stderr:
|
||||
chunks.append(f"stderr:\n{stderr[-3000:]}")
|
||||
return "\n".join(chunks)
|
||||
|
||||
|
||||
def _log_subprocess_output(prefix: str, stdout: str, stderr: str, *, max_lines: int = 40) -> None:
|
||||
for label, text in (("stdout", stdout), ("stderr", stderr)):
|
||||
lines = [ln for ln in (text or "").splitlines() if ln.strip()]
|
||||
if not lines:
|
||||
continue
|
||||
tail = lines[-max_lines:] if len(lines) > max_lines else lines
|
||||
for line in tail:
|
||||
logger.info("{} {}", prefix, line)
|
||||
|
||||
|
||||
def run_subprocess(
|
||||
cmd: list[str],
|
||||
*,
|
||||
cwd: Path,
|
||||
work_dir: Path,
|
||||
output_path: Path,
|
||||
log_label: str,
|
||||
) -> None:
|
||||
proc = subprocess.run(
|
||||
cmd,
|
||||
cwd=str(cwd),
|
||||
check=False,
|
||||
text=True,
|
||||
capture_output=True,
|
||||
env=build_reference_env(),
|
||||
)
|
||||
if proc.returncode != 0:
|
||||
msg = format_batch_failure(
|
||||
proc.returncode,
|
||||
stdout=proc.stdout or "",
|
||||
stderr=proc.stderr or "",
|
||||
work_dir=work_dir,
|
||||
output_path=output_path,
|
||||
)
|
||||
raise RuntimeError(f"{log_label} failed {msg}")
|
||||
_log_subprocess_output(log_label, proc.stdout or "", proc.stderr or "")
|
||||
|
||||
|
||||
def run_batch_main(*, bundle_dir: Path, config_path: Path, work_dir: Path, output_path: Path) -> None:
|
||||
cmd = build_batch_main_command(bundle_dir=bundle_dir, config_path=config_path)
|
||||
logger.info("reference batch starting: {}", " ".join(cmd))
|
||||
run_subprocess(
|
||||
cmd,
|
||||
cwd=bundle_dir,
|
||||
work_dir=work_dir,
|
||||
output_path=output_path,
|
||||
log_label="reference batch",
|
||||
)
|
||||
|
||||
|
||||
def run_visualization_script(
|
||||
*,
|
||||
bundle_dir: Path,
|
||||
video_path: Path,
|
||||
result_path: Path,
|
||||
raw_output_video_path: Path,
|
||||
) -> None:
|
||||
cmd = build_visualization_command(
|
||||
bundle_dir=bundle_dir,
|
||||
video_path=video_path,
|
||||
result_path=result_path,
|
||||
output_video_path=raw_output_video_path,
|
||||
)
|
||||
logger.info("reference visualization starting: {}", " ".join(cmd))
|
||||
run_subprocess(
|
||||
cmd,
|
||||
cwd=bundle_dir,
|
||||
work_dir=raw_output_video_path.parent,
|
||||
output_path=raw_output_video_path,
|
||||
log_label="visualize",
|
||||
)
|
||||
276
backend/app/algo_host/transcode.py
Normal file
276
backend/app/algo_host/transcode.py
Normal file
@@ -0,0 +1,276 @@
|
||||
"""FFmpeg/ffprobe helpers for batch uploads and browser-ready MP4 (infrastructure only)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from loguru import logger
|
||||
|
||||
VISUALIZATION_MAX_WIDTH = 1920
|
||||
|
||||
|
||||
def visualization_ffmpeg_scale_filter() -> str:
|
||||
return f"scale='min({VISUALIZATION_MAX_WIDTH},iw)':-2"
|
||||
|
||||
|
||||
def browser_transcode_tmp_path(output_path: Path) -> Path:
|
||||
return output_path.with_name(f"{output_path.stem}.part{output_path.suffix}")
|
||||
|
||||
|
||||
def is_readable_mp4(path: Path) -> bool:
|
||||
ffprobe = shutil.which("ffprobe")
|
||||
if ffprobe is None or not path.is_file() or path.stat().st_size < 4096:
|
||||
return False
|
||||
proc = subprocess.run(
|
||||
[
|
||||
ffprobe,
|
||||
"-v",
|
||||
"error",
|
||||
"-select_streams",
|
||||
"v:0",
|
||||
"-show_entries",
|
||||
"stream=codec_name",
|
||||
"-of",
|
||||
"csv=p=0",
|
||||
str(path),
|
||||
],
|
||||
check=False,
|
||||
text=True,
|
||||
capture_output=True,
|
||||
)
|
||||
return proc.returncode == 0 and bool((proc.stdout or "").strip())
|
||||
|
||||
|
||||
def ffprobe_fields(path: Path, entries: str) -> dict[str, str]:
|
||||
ffprobe = shutil.which("ffprobe")
|
||||
if ffprobe is None or not path.is_file():
|
||||
return {}
|
||||
proc = subprocess.run(
|
||||
[
|
||||
ffprobe,
|
||||
"-v",
|
||||
"error",
|
||||
"-select_streams",
|
||||
"v:0",
|
||||
"-show_entries",
|
||||
entries,
|
||||
"-of",
|
||||
"default=noprint_wrappers=1",
|
||||
str(path),
|
||||
],
|
||||
check=False,
|
||||
text=True,
|
||||
capture_output=True,
|
||||
)
|
||||
if proc.returncode != 0:
|
||||
return {}
|
||||
fields: dict[str, str] = {}
|
||||
for line in proc.stdout.splitlines():
|
||||
if "=" not in line:
|
||||
continue
|
||||
key, value = line.split("=", 1)
|
||||
fields[key.strip().lower()] = value.strip().lower()
|
||||
return fields
|
||||
|
||||
|
||||
def ffprobe_container_format(path: Path) -> str:
|
||||
ffprobe = shutil.which("ffprobe")
|
||||
if ffprobe is None or not path.is_file():
|
||||
return ""
|
||||
proc = subprocess.run(
|
||||
[
|
||||
ffprobe,
|
||||
"-v",
|
||||
"error",
|
||||
"-show_entries",
|
||||
"format=format_name",
|
||||
"-of",
|
||||
"default=noprint_wrappers=1:nokey=1",
|
||||
str(path),
|
||||
],
|
||||
check=False,
|
||||
text=True,
|
||||
capture_output=True,
|
||||
)
|
||||
if proc.returncode != 0:
|
||||
return ""
|
||||
return (proc.stdout or "").strip().lower()
|
||||
|
||||
|
||||
def is_browser_compatible_mp4(path: Path) -> bool:
|
||||
fields = ffprobe_fields(path, "stream=codec_name,pix_fmt")
|
||||
return fields.get("codec_name") == "h264" and fields.get("pix_fmt") in {"yuv420p", "yuvj420p"}
|
||||
|
||||
|
||||
def batch_input_needs_normalize(path: Path) -> bool:
|
||||
if not is_readable_mp4(path):
|
||||
return True
|
||||
if not is_browser_compatible_mp4(path):
|
||||
return True
|
||||
container = ffprobe_container_format(path)
|
||||
if container and "mpeg" in container:
|
||||
return True
|
||||
fields = ffprobe_fields(path, "stream=codec_name,width,height")
|
||||
try:
|
||||
width = int(fields.get("width") or "0")
|
||||
except ValueError:
|
||||
width = 0
|
||||
return width > 1920
|
||||
|
||||
|
||||
def normalize_batch_input_video(source_path: Path, output_path: Path) -> bool:
|
||||
ffmpeg = shutil.which("ffmpeg")
|
||||
if ffmpeg is None or not source_path.is_file():
|
||||
return False
|
||||
if not is_readable_mp4(source_path):
|
||||
logger.warning("skip batch input normalize: unreadable source {}", source_path)
|
||||
return False
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp_path = browser_transcode_tmp_path(output_path)
|
||||
if tmp_path.exists():
|
||||
tmp_path.unlink()
|
||||
logger.info("ffmpeg batch input normalize starting: {} -> {}", source_path, output_path)
|
||||
proc = subprocess.run(
|
||||
[
|
||||
ffmpeg,
|
||||
"-y",
|
||||
"-hide_banner",
|
||||
"-loglevel",
|
||||
"error",
|
||||
"-fflags",
|
||||
"+genpts",
|
||||
"-i",
|
||||
str(source_path),
|
||||
"-map",
|
||||
"0:v:0",
|
||||
"-an",
|
||||
"-f",
|
||||
"mp4",
|
||||
"-c:v",
|
||||
"libx264",
|
||||
"-pix_fmt",
|
||||
"yuv420p",
|
||||
"-preset",
|
||||
"veryfast",
|
||||
"-crf",
|
||||
"23",
|
||||
"-vf",
|
||||
"scale='min(1920,iw)':-2",
|
||||
"-movflags",
|
||||
"+faststart",
|
||||
str(tmp_path),
|
||||
],
|
||||
check=False,
|
||||
text=True,
|
||||
capture_output=True,
|
||||
)
|
||||
if proc.returncode != 0:
|
||||
stderr = (proc.stderr or "").strip()
|
||||
logger.warning("ffmpeg batch input normalize failed: {}", stderr[-3000:])
|
||||
if tmp_path.exists():
|
||||
tmp_path.unlink()
|
||||
return False
|
||||
if not tmp_path.is_file() or tmp_path.stat().st_size <= 0:
|
||||
logger.warning("ffmpeg batch input normalize produced empty file: {}", tmp_path)
|
||||
if tmp_path.exists():
|
||||
tmp_path.unlink()
|
||||
return False
|
||||
tmp_path.replace(output_path)
|
||||
if not is_browser_compatible_mp4(output_path):
|
||||
logger.warning("ffmpeg batch input normalize output not h264/yuv420p: {}", output_path)
|
||||
output_path.unlink(missing_ok=True)
|
||||
return False
|
||||
logger.info(
|
||||
"ffmpeg batch input normalize complete: {} ({} bytes)",
|
||||
output_path,
|
||||
output_path.stat().st_size,
|
||||
)
|
||||
return True
|
||||
|
||||
|
||||
def ensure_batch_pipeline_input_video(*, source_path: Path, dest_path: Path) -> None:
|
||||
import shutil as sh
|
||||
|
||||
dest_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
if dest_path.is_file() and dest_path.stat().st_size > 0 and not batch_input_needs_normalize(dest_path):
|
||||
return
|
||||
if batch_input_needs_normalize(source_path):
|
||||
if normalize_batch_input_video(source_path, dest_path):
|
||||
return
|
||||
logger.warning(
|
||||
"batch input normalize failed, falling back to raw copy: {} -> {}",
|
||||
source_path,
|
||||
dest_path,
|
||||
)
|
||||
if not dest_path.is_file():
|
||||
sh.copy2(source_path, dest_path)
|
||||
|
||||
|
||||
def transcode_visualization_for_browser(source_path: Path, output_path: Path) -> bool:
|
||||
ffmpeg = shutil.which("ffmpeg")
|
||||
if ffmpeg is None or not source_path.is_file():
|
||||
return False
|
||||
if not is_readable_mp4(source_path):
|
||||
logger.warning("skip visualization transcode: unreadable source {}", source_path)
|
||||
return False
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp_path = browser_transcode_tmp_path(output_path)
|
||||
if tmp_path.exists():
|
||||
tmp_path.unlink()
|
||||
logger.info("ffmpeg visualization transcode starting: {} -> {}", source_path, output_path)
|
||||
proc = subprocess.run(
|
||||
[
|
||||
ffmpeg,
|
||||
"-y",
|
||||
"-hide_banner",
|
||||
"-loglevel",
|
||||
"error",
|
||||
"-i",
|
||||
str(source_path),
|
||||
"-map",
|
||||
"0:v:0",
|
||||
"-an",
|
||||
"-f",
|
||||
"mp4",
|
||||
"-c:v",
|
||||
"libx264",
|
||||
"-pix_fmt",
|
||||
"yuv420p",
|
||||
"-preset",
|
||||
"ultrafast",
|
||||
"-crf",
|
||||
"23",
|
||||
"-vf",
|
||||
visualization_ffmpeg_scale_filter(),
|
||||
"-movflags",
|
||||
"+faststart",
|
||||
str(tmp_path),
|
||||
],
|
||||
check=False,
|
||||
text=True,
|
||||
capture_output=True,
|
||||
)
|
||||
if proc.returncode != 0:
|
||||
stderr = (proc.stderr or "").strip()
|
||||
logger.warning("ffmpeg visualization transcode failed: {}", stderr[-3000:])
|
||||
if tmp_path.exists():
|
||||
tmp_path.unlink()
|
||||
return False
|
||||
if not tmp_path.is_file() or tmp_path.stat().st_size <= 0:
|
||||
logger.warning("ffmpeg visualization transcode produced empty file: {}", tmp_path)
|
||||
if tmp_path.exists():
|
||||
tmp_path.unlink()
|
||||
return False
|
||||
tmp_path.replace(output_path)
|
||||
if not is_browser_compatible_mp4(output_path):
|
||||
logger.warning("ffmpeg output is not browser-compatible h264/yuv420p: {}", output_path)
|
||||
output_path.unlink(missing_ok=True)
|
||||
return False
|
||||
logger.info(
|
||||
"ffmpeg visualization transcode complete: {} ({} bytes)",
|
||||
output_path,
|
||||
output_path.stat().st_size,
|
||||
)
|
||||
return True
|
||||
Reference in New Issue
Block a user