feat: 配置写死与 baked 模块，Alembic 建表，百度仅 BAIDU_*

- 新增 app/baked/algorithm|pipeline，非部署参数不再走 env；Settings 保留 DB/HTTP/RTSP/海康/百度/MinIO/Demo - 移除 init_db_schema 与 reload 配置；main 仅 check_database；start*.sh 在 uvicorn 前执行 alembic upgrade head - 依赖 psycopg[binary] 供 Alembic 同步 URL；alembic/env 注释与预发清单更新 - 撕段门控消费管线、各视频/语音/归档调用改为 baked - 百度环境变量仅 BAIDU_APP_ID、BAIDU_API_KEY、BAIDU_SECRET_KEY 与 BAIDU_* 超时/ASR；人脸脚本与 baidu_speech 文案同步 - 全量单测与 .env.example 更新；.gitignore 忽略 refs/（本地权重/视频不入库） Made-with: Cursor
2026-04-24 15:33:22 +08:00
parent b651364877
commit 8a4bad99d3
47 changed files with 1333 additions and 648 deletions
--- a/app/services/tear_gated_segment_consumption/init.py
+++ b/app/services/tear_gated_segment_consumption/init.py
@@ -0,0 +1,13 @@
+"""撕段门控 + 41 类耗材：与 `refs/haocai_consumption_demo_pack` 同构，输入为 RTSP。"""
+
+from app.services.tear_gated_segment_consumption.runner import (
+    TearGatedSegmentModelBundle,
+    TearGatedSegmentRecord,
+    TearGatedSegmentRunner,
+)
+
+__all__ = [
+    "TearGatedSegmentModelBundle",
+    "TearGatedSegmentRecord",
+    "TearGatedSegmentRunner",
+]
--- a/app/services/tear_gated_segment_consumption/geometry.py
+++ b/app/services/tear_gated_segment_consumption/geometry.py
@@ -0,0 +1,92 @@
+"""手部框、撕动作几何与概率（从离线 tear 脚本抽离，不依赖 OpenCV 绘制）。"""
+
+from __future__ import annotations
+
+from itertools import combinations
+from typing import Any
+
+import numpy as np
+from ultralytics import YOLO
+
+
+def union_boxes(boxes: list[list[float]]) -> list[float]:
+    x1 = min(b[0] for b in boxes)
+    y1 = min(b[1] for b in boxes)
+    x2 = max(b[2] for b in boxes)
+    y2 = max(b[3] for b in boxes)
+    return [x1, y1, x2, y2]
+
+
+def pad_box(
+    xyxy: list[float],
+    img_w: int,
+    img_h: int,
+    pad_ratio: float = 0.30,
+) -> tuple[int, int, int, int]:
+    x1, y1, x2, y2 = xyxy
+    bw, bh = x2 - x1, y2 - y1
+    px, py = bw * pad_ratio, bh * pad_ratio
+    return (
+        max(0, int(x1 - px)),
+        max(0, int(y1 - py)),
+        min(img_w, int(x2 + px)),
+        min(img_h, int(y2 + py)),
+    )
+
+
+def collect_hand_boxes(det_model: YOLO, boxes) -> list[list[float]]:
+    names = det_model.names
+    out: list[list[float]] = []
+    for box in boxes:
+        cid = int(box.cls[0])
+        label = names.get(cid, "")
+        if label == "hand":
+            out.append(box.xyxy[0].tolist())
+    return out
+
+
+def box_edge_distance(a: list[float], b: list[float]) -> float:
+    dx = max(0, max(a[0], b[0]) - min(a[2], b[2]))
+    dy = max(0, max(a[1], b[1]) - min(a[3], b[3]))
+    return float((dx**2 + dy**2) ** 0.5)
+
+
+def box_avg_width(boxes: list[list[float]]) -> float:
+    if not boxes:
+        return 0.0
+    return sum(b[2] - b[0] for b in boxes) / len(boxes)
+
+
+def find_tearing_pair(
+    hand_boxes: list[list[float]],
+    gap_ratio: float = 1.5,
+) -> tuple[list[float], list[float]] | None:
+    if len(hand_boxes) < 2:
+        return None
+    avg_w = box_avg_width(hand_boxes)
+    threshold = avg_w * gap_ratio
+    best_dist = float("inf")
+    best_pair: tuple[list[float], list[float]] | None = None
+    for a, b in combinations(hand_boxes, 2):
+        d = box_edge_distance(a, b)
+        if d < best_dist:
+            best_dist = d
+            best_pair = (a, b)
+    if best_pair is not None and best_dist <= threshold:
+        return best_pair
+    return None
+
+
+def prob_tearing(tprobs, tear_names: dict[Any, str]) -> float:
+    if tprobs is None:
+        return 0.0
+    data = tprobs.data
+    if data is None:
+        return 0.0
+    d = data.detach().float().cpu().numpy().ravel()
+    for i, name in tear_names.items():
+        if name == "tearing":
+            ii = int(i)
+            if 0 <= ii < len(d):
+                return float(d[ii])
+    return 0.0
--- a/app/services/tear_gated_segment_consumption/product_map.py
+++ b/app/services/tear_gated_segment_consumption/product_map.py
@@ -0,0 +1,27 @@
+"""撕段段级结果中「类名 -> 业务物品 id」：与现网一致使用 ``consumable_classifier_labels.yaml``。"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from loguru import logger
+
+from app.baked import algorithm as ba
+from app.services.consumable_vision_algorithm import load_name_to_label_id_from_yaml
+
+
+def resolve_tear_segment_labels_yaml_path() -> Path:
+    """非空 `TEAR_SEGMENT_LABELS_YAML_PATH` 优先；否则与耗材管线共用默认 labels。"""
+    override = (ba.TEAR_SEGMENT_LABELS_YAML_PATH or "").strip()
+    if override:
+        return Path(override).expanduser().resolve()
+    return Path(ba.CONSUMABLE_CLASSIFIER_LABELS_YAML_PATH).expanduser().resolve()
+
+
+def load_tear_segment_name_to_id() -> dict[str, str]:
+    """`names` + `label_id` 与 ``ConsumableVisionAlgorithmService`` 同口径（含 `_norm_product_name` 键）。"""
+    p = resolve_tear_segment_labels_yaml_path()
+    m = load_name_to_label_id_from_yaml(p)
+    if not m:
+        logger.warning("撕段 name→id 映射为空，请检查 YAML: {}", p)
+    return m
--- a/app/services/tear_gated_segment_consumption/report.py
+++ b/app/services/tear_gated_segment_consumption/report.py
@@ -0,0 +1,47 @@
+"""与离线 demo main.py 同结构的段级文本报告（可选落盘）。"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from app.services.tear_gated_segment_consumption.runner import TearGatedSegmentRecord
+
+
+def write_tear_segment_txt(
+    *,
+    path: Path,
+    surgery_id: str,
+    camera_id: str,
+    labels_source: str,
+    records: list[TearGatedSegmentRecord],
+) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    lines: list[str] = []
+    lines.append("耗材消耗推断（撕段模式 / FastAPI）")
+    lines.append(f"手术: {surgery_id}  camera: {camera_id}")
+    lines.append(f"类名→label_id YAML: {labels_source}")
+    lines.append(
+        "说明: 人手检测 -> 撕二分类 -> 好帧门控(坏帧不判耗材) -> 41 类; "
+        "段内为概率向量平均; 众数作展示对照"
+    )
+    lines.append("")
+    for rec in records:
+        line = "\t".join(
+            [
+                f"段{rec.segment_index}",
+                f"时间戳(秒)={rec.mid_stream_sec:.3f};范围={rec.start_sec:.2f}~{rec.end_sec:.2f}",
+                f"物品id={rec.item_id}",
+                f"物品名称(模型top1)={rec.item_name}",
+                f"top1置信度(段内平均)={rec.top1_conf:.4f}",
+                f"top2={rec.top2_name}",
+                f"top2_conf={rec.top2_conf:.4f}",
+                f"top3={rec.top3_name}",
+                f"top3_conf={rec.top3_conf:.4f}",
+                f"段内众数(参考)={rec.majority_ref}",
+                f"消耗数量=1",
+                f"医生id=暂无",
+            ]
+        )
+        lines.append(line)
+        lines.append("")
+    path.write_text("\n".join(lines) + "\n", encoding="utf-8")
--- a/app/services/tear_gated_segment_consumption/runner.py
+++ b/app/services/tear_gated_segment_consumption/runner.py
@@ -0,0 +1,334 @@
+"""有状态逐帧处理 + 停录时段级汇总（与 haocai_consumption demo main.run 同构）。"""
+
+from __future__ import annotations
+
+import time
+from collections import Counter
+from dataclasses import dataclass
+from pathlib import Path
+from threading import Lock
+import numpy as np
+from loguru import logger
+from ultralytics import YOLO
+
+from app.baked import algorithm as ba
+from app.services.consumable_vision_algorithm import (
+    _norm_product_name,
+    resolve_inference_device,
+)
+from app.services.tear_gated_segment_consumption.geometry import (
+    collect_hand_boxes,
+    find_tearing_pair,
+    pad_box,
+    prob_tearing,
+    union_boxes,
+)
+from app.services.tear_gated_segment_consumption.segments import merge_tear_segments
+
+
+@dataclass(frozen=True)
+class TearGatedSegmentRecord:
+    """单段输出，与离线 txt 一行语义一致。"""
+
+    segment_index: int
+    start_sec: float
+    end_sec: float
+    mid_stream_sec: float
+    item_id: str
+    item_name: str
+    top1_conf: float
+    top2_name: str
+    top2_conf: float
+    top3_name: str
+    top3_conf: float
+    majority_ref: str
+
+
+def is_good_frame(
+    gb_model: YOLO, crop: np.ndarray, gb_names: dict, imgsz: int, device: str | None
+) -> bool:
+    if crop.size == 0:
+        return False
+    r = gb_model.predict(crop, imgsz=imgsz, verbose=False, device=device)[0]
+    if r.probs is None:
+        return False
+    tid = int(r.probs.top1)
+    label = str(gb_names.get(tid, ""))
+    return label == "good"
+
+
+def haocai_mean_topk(
+    probs_list: list[np.ndarray],
+    names: dict,
+) -> tuple[str, float, str, float, str, float]:
+    if not probs_list:
+        return "（无有效帧）", 0.0, "", 0.0, "", 0.0
+    p = np.mean(np.stack(probs_list, axis=0), axis=0)
+    order = np.argsort(-p)
+    t1, t2, t3 = (int(order[0]), int(order[1]), int(order[2]))
+    return (
+        str(names.get(t1, str(t1))),
+        float(p[t1]),
+        str(names.get(t2, str(t2))),
+        float(p[t2]),
+        str(names.get(t3, str(t3))),
+        float(p[t3]),
+    )
+
+
+class TearGatedSegmentRunner:
+    """从首帧到 finalize：累积 timeline，停录时合并撕段并生成记录。"""
+
+    def __init__(
+        self,
+        *,
+        det_m: YOLO,
+        tear_m: YOLO,
+        gb_m: YOLO,
+        haoc_m: YOLO,
+        name_to_id: dict[str, str],
+    ) -> None:
+        self._det_m = det_m
+        self._tear_m = tear_m
+        self._gb_m = gb_m
+        self._haoc_m = haoc_m
+        self._tear_names = tear_m.names
+        self._gb_names = gb_m.names
+        self._haoc_names = haoc_m.names
+        self._n_h = len(self._haoc_names) if isinstance(self._haoc_names, dict) else 41
+        self._name_to_id = name_to_id
+        self._lock = Lock()
+        self._frame_idx = 0
+        self._tear_buf: list[float] = []
+        self._timeline: list[tuple[int, float, bool, str]] = []
+        self._frame_probs: list[np.ndarray | None] = []
+        self._wall_t0: float | None = None
+        self._start_seconds = 0.0
+
+    def _effective_fps(self) -> float:
+        raw = float(ba.TEAR_SEGMENT_ASSUMED_FPS)
+        return raw if raw > 0 else 25.0
+
+    def process_frame_bgr(self, frame: np.ndarray) -> None:
+        """处理单帧 BGR（与 demo run() 主循环体一致）。"""
+        with self._lock:
+            if self._wall_t0 is None:
+                self._wall_t0 = time.time()
+            det_conf = ba.TEAR_SEGMENT_DET_CONF
+            pad_ratio = ba.TEAR_SEGMENT_PAD_RATIO
+            tear_conf = ba.TEAR_SEGMENT_TEAR_CONF
+            tear_smooth = ba.TEAR_SEGMENT_TEAR_SMOOTH
+            gap_ratio = ba.TEAR_SEGMENT_GAP_RATIO
+            fps = self._effective_fps()
+            w = int(frame.shape[1])
+            h = int(frame.shape[0])
+            start_seconds = self._start_seconds
+            fidx = self._frame_idx
+            t_abs = start_seconds + fidx / fps
+
+            r = self._det_m.predict(
+                frame,
+                conf=det_conf,
+                imgsz=ba.TEAR_SEGMENT_DET_IMGSZ,
+                device=resolve_inference_device(ba.HAND_DETECTION_DEVICE),
+                verbose=False,
+            )
+            hand_xyxys = collect_hand_boxes(self._det_m, r[0].boxes)
+            geom = (
+                len(hand_xyxys) >= 2
+                and find_tearing_pair(hand_xyxys, gap_ratio=gap_ratio) is not None
+            )
+            f_probs: np.ndarray | None = None
+            rec_label = ""
+            is_tear = False
+            max_p = 0.0
+            if len(hand_xyxys) >= 1:
+                merged = union_boxes(hand_xyxys)
+                cx1, cy1, cx2, cy2 = pad_box(merged, w, h, pad_ratio)
+                gb_dev = resolve_inference_device(ba.TEAR_SEGMENT_GOODBAD_DEVICE)
+                haoc_dev = resolve_inference_device(ba.TEAR_SEGMENT_HAOCAI_DEVICE)
+                for hbox in hand_xyxys:
+                    hx1, hy1, hx2, hy2 = pad_box(hbox, w, h, pad_ratio)
+                    hc = frame[hy1:hy2, hx1:hx2]
+                    if hc.size > 0:
+                        tr = self._tear_m.predict(
+                            hc,
+                            imgsz=ba.TEAR_SEGMENT_TEAR_IMGSZ,
+                            verbose=False,
+                            device=resolve_inference_device(
+                                ba.TEAR_SEGMENT_TEAR_DEVICE
+                            ),
+                        )
+                        max_p = max(
+                            max_p, prob_tearing(tr[0].probs, self._tear_names)
+                        )
+                if tear_smooth > 0:
+                    self._tear_buf.append(max_p)
+                    if len(self._tear_buf) > tear_smooth:
+                        self._tear_buf.pop(0)
+                    p_eff = sum(self._tear_buf) / len(self._tear_buf)
+                else:
+                    p_eff = max_p
+                eff = tear_conf * 0.55 if geom else tear_conf
+                is_tear = p_eff >= eff
+
+                if is_tear:
+                    cls_c = frame[cy1:cy2, cx1:cx2]
+                    if cls_c.size > 0 and is_good_frame(
+                        self._gb_m,
+                        cls_c,
+                        self._gb_names,
+                        ba.TEAR_SEGMENT_GOODBAD_IMGSZ,
+                        gb_dev,
+                    ):
+                        h_r = self._haoc_m.predict(
+                            cls_c,
+                            imgsz=ba.TEAR_SEGMENT_HAOCAI_IMGSZ,
+                            verbose=False,
+                            device=haoc_dev,
+                        )[0]
+                        pr = h_r.probs
+                        if pr is not None and pr.data is not None:
+                            v = pr.data.detach().float().cpu().numpy().ravel()
+                            n_exp = self._n_h
+                            if v.size < n_exp:
+                                v = np.resize(v, n_exp)
+                            v = v[:n_exp]
+                            s = v.sum()
+                            f_probs = (v / s) if s > 0 else v
+                            tid = int(np.argmax(f_probs))
+                            rec_label = str(self._haoc_names.get(tid, str(tid)))
+            else:
+                self._tear_buf.clear()
+
+            self._timeline.append((fidx, t_abs, is_tear, rec_label))
+            self._frame_probs.append(f_probs)
+            self._frame_idx += 1
+            if self._frame_idx % 200 == 0:
+                logger.info(
+                    "tear_segment: processed {} frames (surgery stream)",
+                    self._frame_idx,
+                )
+
+    def finalize(self) -> list[TearGatedSegmentRecord]:
+        """段合并 + 段内 topK 与 YAML 类名→label_id 映射；RTSP 无片尾，以停录为界。"""
+        with self._lock:
+            timeline = self._timeline
+            frame_probs = self._frame_probs
+            haoc_names = self._haoc_names
+            name_to_id = self._name_to_id
+
+        if not timeline:
+            return []
+
+        segs = merge_tear_segments(
+            timeline,
+            min_tear_sec=ba.TEAR_SEGMENT_MIN_TEAR_SEC,
+            min_gap_sec=ba.TEAR_SEGMENT_MIN_GAP_SEC,
+        )
+        out: list[TearGatedSegmentRecord] = []
+        for s in segs:
+            f0, f1 = s["start_frame"], s["end_frame"]
+            probs_ok: list[np.ndarray] = []
+            lbs: list[str] = []
+            for fi in range(f0, f1 + 1):
+                if 0 <= fi < len(frame_probs) and frame_probs[fi] is not None:
+                    probs_ok.append(frame_probs[fi])
+            for fi in range(f0, f1 + 1):
+                if 0 <= fi < len(timeline):
+                    _, __, it, lab = timeline[fi]
+                    if it and lab:
+                        lbs.append(lab)
+            if lbs:
+                majority = Counter(lbs).most_common(1)[0][0]
+            else:
+                majority = "（本段无好帧+耗材）"
+
+            t1, c1, t2, c2, t3, c3 = haocai_mean_topk(probs_ok, haoc_names)
+            use_name = t1
+            if use_name in ("", "（无有效帧）"):
+                use_name = majority
+            if use_name.startswith("（") or use_name == "（本段无好帧+耗材）":
+                item_id = "（无）"
+            else:
+                key = _norm_product_name(use_name)
+                item_id = name_to_id.get(key, "（无匹配编码）")
+            t_mid = 0.5 * (s["start_sec"] + s["end_sec"])
+            out.append(
+                TearGatedSegmentRecord(
+                    segment_index=s["index"],
+                    start_sec=s["start_sec"],
+                    end_sec=s["end_sec"],
+                    mid_stream_sec=t_mid,
+                    item_id=item_id,
+                    item_name=use_name,
+                    top1_conf=c1,
+                    top2_name=t2,
+                    top2_conf=c2,
+                    top3_name=t3,
+                    top3_conf=c3,
+                    majority_ref=majority,
+                )
+            )
+        return out
+
+    def wall_time_for_record(self, rec: TearGatedSegmentRecord) -> float:
+        """段中点对应的 Unix 时间（秒），用于落库时间戳。"""
+        with self._lock:
+            t0w = self._wall_t0
+        if t0w is None:
+            return time.time()
+        return t0w + rec.mid_stream_sec
+
+
+class TearGatedSegmentModelBundle:
+    """四模型只加载一次，供多例 Runner 复用。"""
+
+    def __init__(self) -> None:
+        self._lock = Lock()
+        self._det: YOLO | None = None
+        self._tear: YOLO | None = None
+        self._gb: YOLO | None = None
+        self._haoc: YOLO | None = None
+
+    def _p(self, key: str) -> Path:
+        return Path((key or "").strip()).expanduser().resolve()
+
+    def _load(self) -> None:
+        with self._lock:
+            if self._det is not None:
+                return
+            dp = self._p(ba.TEAR_SEGMENT_HAND_DET_WEIGHTS)
+            tp = self._p(ba.TEAR_SEGMENT_TEAR_WEIGHTS)
+            gp = self._p(ba.TEAR_SEGMENT_GOODBAD_WEIGHTS)
+            hp = self._p(ba.TEAR_SEGMENT_HAOCAI_WEIGHTS)
+            for p, label in (
+                (dp, "hand det"),
+                (tp, "tear"),
+                (gp, "good/bad"),
+                (hp, "haocai 41"),
+            ):
+                if not p.is_file():
+                    raise FileNotFoundError(f"tear_segment {label} 权重不存在: {p}")
+            logger.info("加载撕段四模型: {} {} {} {}", dp, tp, gp, hp)
+            self._det = YOLO(str(dp))
+            self._tear = YOLO(str(tp))
+            self._gb = YOLO(str(gp))
+            self._haoc = YOLO(str(hp))
+
+    def ensure_loaded(self) -> None:
+        self._load()
+
+    def create_runner(self, name_to_id: dict[str, str]) -> TearGatedSegmentRunner:
+        self.ensure_loaded()
+        assert self._det is not None
+        assert self._tear is not None
+        assert self._gb is not None
+        assert self._haoc is not None
+        return TearGatedSegmentRunner(
+            det_m=self._det,
+            tear_m=self._tear,
+            gb_m=self._gb,
+            haoc_m=self._haoc,
+            name_to_id=name_to_id,
+        )
--- a/app/services/tear_gated_segment_consumption/segments.py
+++ b/app/services/tear_gated_segment_consumption/segments.py
@@ -0,0 +1,78 @@
+"""撕段时间线合并（与离线 demo 一致）。"""
+
+from __future__ import annotations
+
+from collections import Counter
+
+
+def merge_tear_segments(
+    rows: list[tuple[int, float, bool, str]],
+    min_tear_sec: float = 3.0,
+    min_gap_sec: float = 1.5,
+) -> list[dict]:
+    """
+    rows: (frame_idx_in_clip, abs_time_sec, is_tear, consumable_label_or_empty)
+    """
+    raw_segs: list[dict] = []
+    cur: dict | None = None
+
+    for frame_idx, t, is_tear, label in rows:
+        if is_tear:
+            if cur is None:
+                cur = {
+                    "t0": t,
+                    "t1": t,
+                    "f0": frame_idx,
+                    "f1": frame_idx,
+                    "labels": [],
+                }
+            else:
+                gap = t - cur["t1"]
+                if gap > min_gap_sec:
+                    raw_segs.append(cur)
+                    cur = {
+                        "t0": t,
+                        "t1": t,
+                        "f0": frame_idx,
+                        "f1": frame_idx,
+                        "labels": [],
+                    }
+            cur["t1"] = t
+            cur["f1"] = frame_idx
+            if label:
+                cur["labels"].append(label)
+        else:
+            if cur is not None:
+                gap = t - cur["t1"]
+                if gap > min_gap_sec:
+                    raw_segs.append(cur)
+                    cur = None
+
+    if cur is not None:
+        raw_segs.append(cur)
+
+    valid: list[dict] = []
+    for s in raw_segs:
+        dur = s["t1"] - s["t0"]
+        if dur >= min_tear_sec - 1e-9:
+            valid.append(s)
+
+    out: list[dict] = []
+    for i, s in enumerate(valid, 1):
+        top_label = (
+            Counter(s["labels"]).most_common(1)[0][0]
+            if s["labels"]
+            else "（未识别耗材）"
+        )
+        out.append(
+            {
+                "index": i,
+                "start_sec": s["t0"],
+                "end_sec": s["t1"],
+                "duration_sec": s["t1"] - s["t0"],
+                "consumable": top_label,
+                "start_frame": s["f0"],
+                "end_frame": s["f1"],
+            }
+        )
+    return out