update minio port

This commit is contained in:
Kevin
2026-05-22 09:35:41 +08:00
parent 153c91f8ff
commit 62b14d7386
22 changed files with 1256 additions and 1170 deletions

View File

@@ -0,0 +1,178 @@
"""Map algorithm_subprocesses/5.15 TSV output to domain objects (orchestration adapter only)."""
from __future__ import annotations
import csv
import hashlib
import re
from dataclasses import dataclass
from datetime import datetime, timedelta, timezone
from pathlib import Path
from app.baked import pipeline as bp
from app.consumable_catalog import (
effective_candidate_consumables,
normalize_candidate_consumables_raw,
)
from app.domain.consumption import SurgeryConsumptionStored
@dataclass(frozen=True)
class ReferenceDoctorInfo:
doctor_id: str
doctor_name: str | None
display: str
raw_line: str
_DOCTOR_NAME_ID_RE = re.compile(
r"^(?P<name>.+?)\s*\(id=(?P<id>[^,\s)]+)(?:,\s*conf=[\d.]+)?\)\s*(?:\[低置信度\])?\s*$"
)
_DOCTOR_ID_ONLY_RE = re.compile(
r"^doctor_id=(?P<id>[^\s(]+)(?:\s*\(conf=[\d.]+\))?\s*(?:\[低置信度\])?\s*$"
)
def sha256_file(path: Path) -> str:
h = hashlib.sha256()
with path.open("rb") as f:
for chunk in iter(lambda: f.read(1024 * 1024), b""):
h.update(chunk)
return h.hexdigest()
def candidate_cache_key(candidate_consumables: list[str]) -> str:
raw = "\n".join(candidate_consumables).encode("utf-8")
return hashlib.sha256(raw).hexdigest()[:12]
def resolve_reference_candidates(candidate_consumables: list[str] | None) -> list[str]:
requested = normalize_candidate_consumables_raw(list(candidate_consumables or []))
return effective_candidate_consumables(requested)
def parse_reference_doctor_info(path: Path) -> ReferenceDoctorInfo | None:
if not path.is_file():
return None
raw_line = ""
for line in path.read_text(encoding="utf-8").splitlines():
stripped = line.strip()
if stripped.startswith("医生信息:") or stripped.startswith("医生信息:"):
raw_line = stripped
break
if not raw_line:
return None
body = raw_line.split("", 1)[-1].split(":", 1)[-1].strip()
if not body or body == "未启用":
return ReferenceDoctorInfo(
doctor_id=bp.VIDEO_RESULT_DOCTOR_ID,
doctor_name=None,
display=body or "未启用",
raw_line=raw_line,
)
if body.startswith("识别失败"):
return ReferenceDoctorInfo(
doctor_id=bp.VIDEO_RESULT_DOCTOR_ID,
doctor_name=None,
display=body,
raw_line=raw_line,
)
match = _DOCTOR_NAME_ID_RE.match(body)
if match:
name = match.group("name").strip()
did = match.group("id").strip()
return ReferenceDoctorInfo(
doctor_id=did,
doctor_name=name,
display=f"{name} ({did})",
raw_line=raw_line,
)
match = _DOCTOR_ID_ONLY_RE.match(body)
if match:
did = match.group("id").strip()
return ReferenceDoctorInfo(
doctor_id=did,
doctor_name=None,
display=did,
raw_line=raw_line,
)
return ReferenceDoctorInfo(
doctor_id=bp.VIDEO_RESULT_DOCTOR_ID,
doctor_name=None,
display=body,
raw_line=raw_line,
)
def is_reference_result_complete(path: Path) -> bool:
if not path.is_file() or path.stat().st_size <= 0:
return False
lines = [line.strip() for line in path.read_text(encoding="utf-8").splitlines() if line.strip()]
if not any(line.lower().startswith("rank\t") for line in lines):
return False
has_doctor_footer = any(
line.startswith("医生信息:") or line.startswith("医生信息:") for line in lines
)
has_segment_row = False
for line in lines:
if line.lower().startswith("rank\t"):
continue
if line.startswith("医生信息"):
continue
parts = line.split("\t")
if len(parts) >= 5 and parts[0].strip().isdigit():
has_segment_row = True
break
return has_doctor_footer and has_segment_row
def doctor_id_for_consumption_rows(doctor: ReferenceDoctorInfo | None) -> str:
if doctor is None:
return bp.VIDEO_RESULT_DOCTOR_ID
if doctor.doctor_name:
return f"{doctor.doctor_name} ({doctor.doctor_id})"
if doctor.doctor_id and doctor.doctor_id != bp.VIDEO_RESULT_DOCTOR_ID:
return doctor.doctor_id
return bp.VIDEO_RESULT_DOCTOR_ID
def parse_reference_tsv(
path: Path,
*,
base_timestamp: datetime | None = None,
doctor: ReferenceDoctorInfo | None = None,
) -> list[SurgeryConsumptionStored]:
if base_timestamp is None:
base_timestamp = datetime.now(timezone.utc)
if doctor is None:
doctor = parse_reference_doctor_info(path)
row_doctor_id = doctor_id_for_consumption_rows(doctor)
out: list[SurgeryConsumptionStored] = []
with path.open("r", encoding="utf-8", newline="") as f:
reader = csv.DictReader(f, delimiter="\t")
for row in reader:
name = (row.get("top1_name") or "").strip()
if not name or name.startswith(""):
continue
if name.startswith("医生信息"):
continue
item_id = (row.get("product_id_top1") or "").strip() or name
try:
start_sec = float((row.get("start_sec") or "0").strip() or 0.0)
except ValueError:
start_sec = 0.0
out.append(
SurgeryConsumptionStored(
item_id=item_id,
item_name=name,
qty=1,
doctor_id=row_doctor_id,
timestamp=base_timestamp + timedelta(seconds=max(0.0, start_sec)),
source="video_batch",
)
)
return out