update minio port
This commit is contained in:
178
backend/app/algo_host/result_adapter.py
Normal file
178
backend/app/algo_host/result_adapter.py
Normal file
@@ -0,0 +1,178 @@
|
||||
"""Map algorithm_subprocesses/5.15 TSV output to domain objects (orchestration adapter only)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import hashlib
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from app.baked import pipeline as bp
|
||||
from app.consumable_catalog import (
|
||||
effective_candidate_consumables,
|
||||
normalize_candidate_consumables_raw,
|
||||
)
|
||||
from app.domain.consumption import SurgeryConsumptionStored
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ReferenceDoctorInfo:
|
||||
doctor_id: str
|
||||
doctor_name: str | None
|
||||
display: str
|
||||
raw_line: str
|
||||
|
||||
|
||||
_DOCTOR_NAME_ID_RE = re.compile(
|
||||
r"^(?P<name>.+?)\s*\(id=(?P<id>[^,\s)]+)(?:,\s*conf=[\d.]+)?\)\s*(?:\[低置信度\])?\s*$"
|
||||
)
|
||||
_DOCTOR_ID_ONLY_RE = re.compile(
|
||||
r"^doctor_id=(?P<id>[^\s(]+)(?:\s*\(conf=[\d.]+\))?\s*(?:\[低置信度\])?\s*$"
|
||||
)
|
||||
|
||||
|
||||
def sha256_file(path: Path) -> str:
|
||||
h = hashlib.sha256()
|
||||
with path.open("rb") as f:
|
||||
for chunk in iter(lambda: f.read(1024 * 1024), b""):
|
||||
h.update(chunk)
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def candidate_cache_key(candidate_consumables: list[str]) -> str:
|
||||
raw = "\n".join(candidate_consumables).encode("utf-8")
|
||||
return hashlib.sha256(raw).hexdigest()[:12]
|
||||
|
||||
|
||||
def resolve_reference_candidates(candidate_consumables: list[str] | None) -> list[str]:
|
||||
requested = normalize_candidate_consumables_raw(list(candidate_consumables or []))
|
||||
return effective_candidate_consumables(requested)
|
||||
|
||||
|
||||
def parse_reference_doctor_info(path: Path) -> ReferenceDoctorInfo | None:
|
||||
if not path.is_file():
|
||||
return None
|
||||
raw_line = ""
|
||||
for line in path.read_text(encoding="utf-8").splitlines():
|
||||
stripped = line.strip()
|
||||
if stripped.startswith("医生信息:") or stripped.startswith("医生信息:"):
|
||||
raw_line = stripped
|
||||
break
|
||||
if not raw_line:
|
||||
return None
|
||||
|
||||
body = raw_line.split(":", 1)[-1].split(":", 1)[-1].strip()
|
||||
if not body or body == "未启用":
|
||||
return ReferenceDoctorInfo(
|
||||
doctor_id=bp.VIDEO_RESULT_DOCTOR_ID,
|
||||
doctor_name=None,
|
||||
display=body or "未启用",
|
||||
raw_line=raw_line,
|
||||
)
|
||||
if body.startswith("识别失败"):
|
||||
return ReferenceDoctorInfo(
|
||||
doctor_id=bp.VIDEO_RESULT_DOCTOR_ID,
|
||||
doctor_name=None,
|
||||
display=body,
|
||||
raw_line=raw_line,
|
||||
)
|
||||
|
||||
match = _DOCTOR_NAME_ID_RE.match(body)
|
||||
if match:
|
||||
name = match.group("name").strip()
|
||||
did = match.group("id").strip()
|
||||
return ReferenceDoctorInfo(
|
||||
doctor_id=did,
|
||||
doctor_name=name,
|
||||
display=f"{name} ({did})",
|
||||
raw_line=raw_line,
|
||||
)
|
||||
|
||||
match = _DOCTOR_ID_ONLY_RE.match(body)
|
||||
if match:
|
||||
did = match.group("id").strip()
|
||||
return ReferenceDoctorInfo(
|
||||
doctor_id=did,
|
||||
doctor_name=None,
|
||||
display=did,
|
||||
raw_line=raw_line,
|
||||
)
|
||||
|
||||
return ReferenceDoctorInfo(
|
||||
doctor_id=bp.VIDEO_RESULT_DOCTOR_ID,
|
||||
doctor_name=None,
|
||||
display=body,
|
||||
raw_line=raw_line,
|
||||
)
|
||||
|
||||
|
||||
def is_reference_result_complete(path: Path) -> bool:
|
||||
if not path.is_file() or path.stat().st_size <= 0:
|
||||
return False
|
||||
lines = [line.strip() for line in path.read_text(encoding="utf-8").splitlines() if line.strip()]
|
||||
if not any(line.lower().startswith("rank\t") for line in lines):
|
||||
return False
|
||||
has_doctor_footer = any(
|
||||
line.startswith("医生信息:") or line.startswith("医生信息:") for line in lines
|
||||
)
|
||||
has_segment_row = False
|
||||
for line in lines:
|
||||
if line.lower().startswith("rank\t"):
|
||||
continue
|
||||
if line.startswith("医生信息"):
|
||||
continue
|
||||
parts = line.split("\t")
|
||||
if len(parts) >= 5 and parts[0].strip().isdigit():
|
||||
has_segment_row = True
|
||||
break
|
||||
return has_doctor_footer and has_segment_row
|
||||
|
||||
|
||||
def doctor_id_for_consumption_rows(doctor: ReferenceDoctorInfo | None) -> str:
|
||||
if doctor is None:
|
||||
return bp.VIDEO_RESULT_DOCTOR_ID
|
||||
if doctor.doctor_name:
|
||||
return f"{doctor.doctor_name} ({doctor.doctor_id})"
|
||||
if doctor.doctor_id and doctor.doctor_id != bp.VIDEO_RESULT_DOCTOR_ID:
|
||||
return doctor.doctor_id
|
||||
return bp.VIDEO_RESULT_DOCTOR_ID
|
||||
|
||||
|
||||
def parse_reference_tsv(
|
||||
path: Path,
|
||||
*,
|
||||
base_timestamp: datetime | None = None,
|
||||
doctor: ReferenceDoctorInfo | None = None,
|
||||
) -> list[SurgeryConsumptionStored]:
|
||||
if base_timestamp is None:
|
||||
base_timestamp = datetime.now(timezone.utc)
|
||||
if doctor is None:
|
||||
doctor = parse_reference_doctor_info(path)
|
||||
row_doctor_id = doctor_id_for_consumption_rows(doctor)
|
||||
out: list[SurgeryConsumptionStored] = []
|
||||
with path.open("r", encoding="utf-8", newline="") as f:
|
||||
reader = csv.DictReader(f, delimiter="\t")
|
||||
for row in reader:
|
||||
name = (row.get("top1_name") or "").strip()
|
||||
if not name or name.startswith("("):
|
||||
continue
|
||||
if name.startswith("医生信息"):
|
||||
continue
|
||||
item_id = (row.get("product_id_top1") or "").strip() or name
|
||||
try:
|
||||
start_sec = float((row.get("start_sec") or "0").strip() or 0.0)
|
||||
except ValueError:
|
||||
start_sec = 0.0
|
||||
out.append(
|
||||
SurgeryConsumptionStored(
|
||||
item_id=item_id,
|
||||
item_name=name,
|
||||
qty=1,
|
||||
doctor_id=row_doctor_id,
|
||||
timestamp=base_timestamp + timedelta(seconds=max(0.0, start_sec)),
|
||||
source="video_batch",
|
||||
)
|
||||
)
|
||||
return out
|
||||
Reference in New Issue
Block a user