update minio port

2026-05-22 09:35:41 +08:00
parent 153c91f8ff
commit 62b14d7386
22 changed files with 1256 additions and 1170 deletions
--- a/backend/app/algo_host/result_adapter.py
+++ b/backend/app/algo_host/result_adapter.py
@@ -0,0 +1,178 @@
+"""Map algorithm_subprocesses/5.15 TSV output to domain objects (orchestration adapter only)."""
+
+from __future__ import annotations
+
+import csv
+import hashlib
+import re
+from dataclasses import dataclass
+from datetime import datetime, timedelta, timezone
+from pathlib import Path
+
+from app.baked import pipeline as bp
+from app.consumable_catalog import (
+    effective_candidate_consumables,
+    normalize_candidate_consumables_raw,
+)
+from app.domain.consumption import SurgeryConsumptionStored
+
+
+@dataclass(frozen=True)
+class ReferenceDoctorInfo:
+    doctor_id: str
+    doctor_name: str | None
+    display: str
+    raw_line: str
+
+
+_DOCTOR_NAME_ID_RE = re.compile(
+    r"^(?P<name>.+?)\s*\(id=(?P<id>[^,\s)]+)(?:,\s*conf=[\d.]+)?\)\s*(?:\[低置信度\])?\s*$"
+)
+_DOCTOR_ID_ONLY_RE = re.compile(
+    r"^doctor_id=(?P<id>[^\s(]+)(?:\s*\(conf=[\d.]+\))?\s*(?:\[低置信度\])?\s*$"
+)
+
+
+def sha256_file(path: Path) -> str:
+    h = hashlib.sha256()
+    with path.open("rb") as f:
+        for chunk in iter(lambda: f.read(1024 * 1024), b""):
+            h.update(chunk)
+    return h.hexdigest()
+
+
+def candidate_cache_key(candidate_consumables: list[str]) -> str:
+    raw = "\n".join(candidate_consumables).encode("utf-8")
+    return hashlib.sha256(raw).hexdigest()[:12]
+
+
+def resolve_reference_candidates(candidate_consumables: list[str] | None) -> list[str]:
+    requested = normalize_candidate_consumables_raw(list(candidate_consumables or []))
+    return effective_candidate_consumables(requested)
+
+
+def parse_reference_doctor_info(path: Path) -> ReferenceDoctorInfo | None:
+    if not path.is_file():
+        return None
+    raw_line = ""
+    for line in path.read_text(encoding="utf-8").splitlines():
+        stripped = line.strip()
+        if stripped.startswith("医生信息：") or stripped.startswith("医生信息:"):
+            raw_line = stripped
+            break
+    if not raw_line:
+        return None
+
+    body = raw_line.split("：", 1)[-1].split(":", 1)[-1].strip()
+    if not body or body == "未启用":
+        return ReferenceDoctorInfo(
+            doctor_id=bp.VIDEO_RESULT_DOCTOR_ID,
+            doctor_name=None,
+            display=body or "未启用",
+            raw_line=raw_line,
+        )
+    if body.startswith("识别失败"):
+        return ReferenceDoctorInfo(
+            doctor_id=bp.VIDEO_RESULT_DOCTOR_ID,
+            doctor_name=None,
+            display=body,
+            raw_line=raw_line,
+        )
+
+    match = _DOCTOR_NAME_ID_RE.match(body)
+    if match:
+        name = match.group("name").strip()
+        did = match.group("id").strip()
+        return ReferenceDoctorInfo(
+            doctor_id=did,
+            doctor_name=name,
+            display=f"{name} ({did})",
+            raw_line=raw_line,
+        )
+
+    match = _DOCTOR_ID_ONLY_RE.match(body)
+    if match:
+        did = match.group("id").strip()
+        return ReferenceDoctorInfo(
+            doctor_id=did,
+            doctor_name=None,
+            display=did,
+            raw_line=raw_line,
+        )
+
+    return ReferenceDoctorInfo(
+        doctor_id=bp.VIDEO_RESULT_DOCTOR_ID,
+        doctor_name=None,
+        display=body,
+        raw_line=raw_line,
+    )
+
+
+def is_reference_result_complete(path: Path) -> bool:
+    if not path.is_file() or path.stat().st_size <= 0:
+        return False
+    lines = [line.strip() for line in path.read_text(encoding="utf-8").splitlines() if line.strip()]
+    if not any(line.lower().startswith("rank\t") for line in lines):
+        return False
+    has_doctor_footer = any(
+        line.startswith("医生信息：") or line.startswith("医生信息:") for line in lines
+    )
+    has_segment_row = False
+    for line in lines:
+        if line.lower().startswith("rank\t"):
+            continue
+        if line.startswith("医生信息"):
+            continue
+        parts = line.split("\t")
+        if len(parts) >= 5 and parts[0].strip().isdigit():
+            has_segment_row = True
+            break
+    return has_doctor_footer and has_segment_row
+
+
+def doctor_id_for_consumption_rows(doctor: ReferenceDoctorInfo | None) -> str:
+    if doctor is None:
+        return bp.VIDEO_RESULT_DOCTOR_ID
+    if doctor.doctor_name:
+        return f"{doctor.doctor_name} ({doctor.doctor_id})"
+    if doctor.doctor_id and doctor.doctor_id != bp.VIDEO_RESULT_DOCTOR_ID:
+        return doctor.doctor_id
+    return bp.VIDEO_RESULT_DOCTOR_ID
+
+
+def parse_reference_tsv(
+    path: Path,
+    *,
+    base_timestamp: datetime | None = None,
+    doctor: ReferenceDoctorInfo | None = None,
+) -> list[SurgeryConsumptionStored]:
+    if base_timestamp is None:
+        base_timestamp = datetime.now(timezone.utc)
+    if doctor is None:
+        doctor = parse_reference_doctor_info(path)
+    row_doctor_id = doctor_id_for_consumption_rows(doctor)
+    out: list[SurgeryConsumptionStored] = []
+    with path.open("r", encoding="utf-8", newline="") as f:
+        reader = csv.DictReader(f, delimiter="\t")
+        for row in reader:
+            name = (row.get("top1_name") or "").strip()
+            if not name or name.startswith("（"):
+                continue
+            if name.startswith("医生信息"):
+                continue
+            item_id = (row.get("product_id_top1") or "").strip() or name
+            try:
+                start_sec = float((row.get("start_sec") or "0").strip() or 0.0)
+            except ValueError:
+                start_sec = 0.0
+            out.append(
+                SurgeryConsumptionStored(
+                    item_id=item_id,
+                    item_name=name,
+                    qty=1,
+                    doctor_id=row_doctor_id,
+                    timestamp=base_timestamp + timedelta(seconds=max(0.0, start_sec)),
+                    source="video_batch",
+                )
+            )
+    return out