refactor: 统一耗材视觉算法并扩展语音确认至全量候选清单

- 以 ConsumableVisionAlgorithmService 替代 consumable_classifier 与 tear_action;
  可选手部检测权重,未配置时全帧分类;时间窗众数与 Excel 白名单配置。
- 语音待确认:ASR 先匹配 pending topk,再匹配本台 candidate_consumables;
  记账 item_id 与 vision 一致使用 name_to_code。
- 更新 config、Compose、.env.example、依赖(pandas/openpyxl)与测试。

Made-with: Cursor
This commit is contained in:
Kevin
2026-04-22 16:31:12 +08:00
parent 4c4550d58b
commit 132702aea9
18 changed files with 791 additions and 476 deletions

View File

@@ -1,197 +0,0 @@
from __future__ import annotations
from dataclasses import dataclass
from io import BytesIO
import os
import sys
from pathlib import Path
from threading import Lock
import numpy as np
from fastapi.concurrency import run_in_threadpool
from loguru import logger
from PIL import Image, UnidentifiedImageError
os.environ["YOLO_CONFIG_DIR"] = "/tmp"
from ultralytics import YOLO
from app.config import settings
def resolve_classifier_inference_device(explicit: str) -> str | None:
"""Ultralytics `device` string. If unset: macOS prefers MPS; Linux/Windows prefer CUDA when available."""
configured = (explicit or "").strip()
if configured:
return configured
try:
import torch
except Exception:
return None
if sys.platform == "darwin":
if torch.backends.mps.is_available():
return "mps"
return None
if torch.cuda.is_available():
return "cuda:0"
return None
@dataclass(frozen=True)
class PredictionCandidate:
label: str
confidence: float
@dataclass(frozen=True)
class PredictionResult:
label: str
confidence: float
topk: list[PredictionCandidate]
class ModelNotConfiguredError(RuntimeError):
"""Raised when the model weights are not configured or missing."""
class InvalidImageError(ValueError):
"""Raised when uploaded bytes cannot be decoded as an image."""
class PredictionError(RuntimeError):
"""Raised when the model cannot produce a prediction."""
class ConsumableClassifierService:
"""耗材识别与分类YOLO-cls判断画面中的耗材类别与撕扯动作模型 `TearActionService` 分离。内部流水线调用,不对外 HTTP。"""
def __init__(self) -> None:
self._model: YOLO | None = None
self._model_lock = Lock()
@property
def weights_path(self) -> Path | None:
if not settings.consumable_classifier_weights:
return None
return Path(settings.consumable_classifier_weights).expanduser()
@property
def configured(self) -> bool:
return self.weights_path is not None
@property
def weights_found(self) -> bool:
path = self.weights_path
return path is not None and path.is_file()
@property
def model_loaded(self) -> bool:
return self._model is not None
async def predict_image_bytes(
self,
payload: bytes,
*,
topk: int | None = None,
) -> PredictionResult:
return await run_in_threadpool(self._predict_image_bytes, payload, topk)
def _predict_image_bytes(
self,
payload: bytes,
topk: int | None,
) -> PredictionResult:
model = self._get_model()
image = self._decode_image(payload)
try:
result = model.predict(
image,
imgsz=settings.consumable_classifier_imgsz,
device=resolve_classifier_inference_device(
settings.consumable_classifier_device
),
verbose=False,
)[0]
except Exception as exc: # pragma: no cover - ultralytics runtime errors vary.
raise PredictionError(
f"Failed to run consumable classifier inference: {exc}"
) from exc
return self._build_prediction_result(result, model, topk=topk)
def _get_model(self) -> YOLO:
path = self.weights_path
if path is None:
raise ModelNotConfiguredError(
"Consumable classifier weights are not configured. "
"Set CONSUMABLE_CLASSIFIER_WEIGHTS."
)
path = path.resolve()
if not path.is_file():
raise ModelNotConfiguredError(
f"Consumable classifier weights not found: {path}"
)
if self._model is None:
with self._model_lock:
if self._model is None:
logger.info("Loading consumable classifier weights from {}", path)
self._model = YOLO(str(path))
return self._model
def _decode_image(self, payload: bytes) -> np.ndarray:
if not payload:
raise InvalidImageError("Uploaded image is empty.")
try:
with Image.open(BytesIO(payload)) as image:
return np.asarray(image.convert("RGB"))
except (UnidentifiedImageError, OSError) as exc:
raise InvalidImageError("Uploaded file is not a valid image.") from exc
def _build_prediction_result(
self,
result: object,
model: YOLO,
*,
topk: int | None,
) -> PredictionResult:
probs = getattr(result, "probs", None)
data = getattr(probs, "data", None)
if probs is None or data is None:
raise PredictionError("Model did not return classification probabilities.")
scores = data.tolist()
if not isinstance(scores, list):
scores = [float(scores)]
names = self._names(model)
limit = max(1, topk or settings.consumable_classifier_topk)
ranked = sorted(
((index, float(score)) for index, score in enumerate(scores)),
key=lambda item: item[1],
reverse=True,
)[:limit]
if not ranked:
raise PredictionError("Model returned an empty prediction result.")
candidates = [
PredictionCandidate(
label=names.get(index, str(index)),
confidence=confidence,
)
for index, confidence in ranked
]
return PredictionResult(
label=candidates[0].label,
confidence=candidates[0].confidence,
topk=candidates,
)
def _names(self, model: YOLO) -> dict[int, str]:
raw = getattr(model.model, "names", None) or {}
return {int(key): str(value) for key, value in raw.items()}

View File

@@ -0,0 +1,392 @@
"""手术室耗材视觉算法:可选手部检测 ROI + YOLO-cls原离线双机位流水线核心逻辑
作为 FastAPI 内唯一的视频推理入口;撕扯动作分类已移除,由手部检测 + 耗材分类替代。
"""
from __future__ import annotations
import os
import sys
from collections import Counter
from dataclasses import dataclass
from pathlib import Path
from threading import Lock
import numpy as np
import pandas as pd
from loguru import logger
from ultralytics import YOLO
from app.config import Settings, settings
os.environ["YOLO_CONFIG_DIR"] = "/tmp"
def resolve_inference_device(explicit: str) -> str | None:
"""Ultralytics `device`;空则 macOS 优先 MPSLinux/Windows 优先 CUDA。"""
configured = (explicit or "").strip()
if configured:
return configured
try:
import torch
except Exception:
return None
if sys.platform == "darwin":
if torch.backends.mps.is_available():
return "mps"
return None
if torch.cuda.is_available():
return "cuda:0"
return None
@dataclass(frozen=True)
class PredictionCandidate:
label: str
confidence: float
@dataclass(frozen=True)
class PredictionResult:
label: str
confidence: float
topk: list[PredictionCandidate]
class ModelNotConfiguredError(RuntimeError):
"""权重未配置或文件不存在。"""
class PredictionError(RuntimeError):
"""推理失败。"""
@dataclass
class ClsTop3:
t1_name: str
t1_conf: float
t2_name: str
t2_conf: float
t3_name: str
t3_conf: float
t1_pid: str
t2_pid: str
t3_pid: str
def _find_col(df: pd.DataFrame, want: str) -> str | None:
want = want.strip()
for c in df.columns:
if str(c).strip() == want:
return str(c)
return None
def _norm_product_name(name: str) -> str:
s = (name or "").strip()
if s == "一次性医用垫单":
return "一次性使用手术单(一次性医用垫单)"
return s
def load_name_to_product_code(xlsx: Path) -> dict[str, str]:
"""商品名称 -> 产品编码(白名单键为归一化后的名称)。"""
df = pd.read_excel(xlsx, sheet_name=0)
c_code = _find_col(df, "产品编码")
c_name = _find_col(df, "商品名称")
if c_code is None or c_name is None:
raise ValueError("Excel 缺少「产品编码」或「商品名称」列")
m: dict[str, str] = {}
dups: set[str] = set()
for _, row in df.iterrows():
raw = row.get(c_name)
if raw is None or (isinstance(raw, float) and pd.isna(raw)):
continue
n = _norm_product_name(str(raw).strip())
if not n:
continue
code = row.get(c_code)
if code is None or (isinstance(code, float) and pd.isna(code)):
continue
sc = str(code).strip()
if n in m and m[n] != sc:
dups.add(n)
continue
if n not in m:
m[n] = sc
if dups:
logger.warning(
"Excel 中以下商品名称对应多组产品编码,已保留首次映射: {}",
"".join(sorted(dups)[:12]) + ("" if len(dups) > 12 else ""),
)
return m
def collect_hand_boxes(model: YOLO, boxes) -> list[tuple[float, float, float, float]]:
if boxes is None or len(boxes) == 0:
return []
xyxy = boxes.xyxy.cpu().numpy()
cls_ids = boxes.cls.cpu().numpy().astype(int)
names = model.names
out: list[tuple[float, float, float, float]] = []
for i, c in enumerate(cls_ids):
label = str(names.get(int(c), "")).strip().lower()
if "hand" in label or label in {"", "手部"}:
out.append(tuple(float(x) for x in xyxy[i]))
if not out and len(xyxy) > 0:
# 单类检测模型:无 hand 字样时保留全部框
for row in xyxy:
out.append(tuple(float(x) for x in row))
return out
def union_boxes(
boxes: list[tuple[float, float, float, float]],
) -> tuple[float, float, float, float]:
xs1, ys1, xs2, ys2 = zip(*boxes, strict=True)
return min(xs1), min(ys1), max(xs2), max(ys2)
def pad_box(
box: tuple[float, float, float, float],
w: int,
h: int,
pad_ratio: float,
) -> tuple[int, int, int, int]:
x1, y1, x2, y2 = box
bw, bh = x2 - x1, y2 - y1
pad_w, pad_h = bw * pad_ratio, bh * pad_ratio
nx1 = int(max(0, x1 - pad_w))
ny1 = int(max(0, y1 - pad_h))
nx2 = int(min(w, x2 + pad_w))
ny2 = int(min(h, y2 + pad_h))
return nx1, ny1, nx2, ny2
def cls_top3_from_result(
cls: YOLO, r, name_to_code: dict[str, str]
) -> ClsTop3 | None:
pr = r[0].probs
if pr is None or not hasattr(pr, "top5") or not pr.top5:
return None
t5i = list(pr.top5)
tc = pr.top5conf
if tc is None:
return None
def _ci(i: int) -> float:
if i < 0 or i >= len(tc):
return 0.0
try:
v = tc[i]
return float(v.item() if hasattr(v, "item") else v)
except (IndexError, ValueError, TypeError):
return 0.0
t1i = int(pr.top1)
c1 = _ci(0) if t5i and int(t5i[0]) == t1i else float(
pr.top1conf.item() if hasattr(pr.top1conf, "item") else pr.top1conf
)
n1 = str(cls.names.get(t1i, "")).strip()
n2 = n3 = ""
c2 = c3 = 0.0
if len(t5i) > 1:
n2 = str(cls.names.get(int(t5i[1]), "")).strip()
c2 = _ci(1)
if len(t5i) > 2:
n3 = str(cls.names.get(int(t5i[2]), "")).strip()
c3 = _ci(2)
return ClsTop3(
t1_name=n1,
t1_conf=c1,
t2_name=n2,
t2_conf=c2,
t3_name=n3,
t3_conf=c3,
t1_pid=name_to_code.get(n1, ""),
t2_pid=name_to_code.get(n2, ""),
t3_pid=name_to_code.get(n3, ""),
)
def cls_top3_to_prediction_result(snap: ClsTop3) -> PredictionResult:
topk: list[PredictionCandidate] = []
if snap.t1_name:
topk.append(PredictionCandidate(snap.t1_name, snap.t1_conf))
if snap.t2_name:
topk.append(PredictionCandidate(snap.t2_name, snap.t2_conf))
if snap.t3_name:
topk.append(PredictionCandidate(snap.t3_name, snap.t3_conf))
if not topk:
topk = [PredictionCandidate("", 0.0)]
return PredictionResult(
label=snap.t1_name,
confidence=snap.t1_conf,
topk=topk,
)
def _mode_lex(names: list[str]) -> str | None:
if not names:
return None
c = Counter(names)
best = max(c.values())
pool = [n for n, k in c.items() if k == best]
return min(pool)
def window_bucket_to_best_snap(
bucket_pts: list[tuple[str, ClsTop3]],
) -> ClsTop3 | None:
"""单个时间窗内:众数类名 + 该类下 top1 置信度最大的快照。"""
pick = _mode_lex([a for a, _ in bucket_pts])
if pick is None:
return None
best: ClsTop3 | None = None
for pname, sn in bucket_pts:
if pname == pick and (best is None or sn.t1_conf > best.t1_conf):
best = sn
return best
class ConsumableVisionAlgorithmService:
"""手部检测(可选)+ 耗材分类;供 CameraSessionManager 在视频线程中调用。"""
def __init__(self, app_settings: Settings | None = None) -> None:
self._s = app_settings or settings
self._det: YOLO | None = None
self._cls: YOLO | None = None
self._det_lock = Lock()
self._cls_lock = Lock()
def build_name_mapping(
self, candidate_consumables: list[str]
) -> dict[str, str]:
"""分类标签 -> 业务物品 idExcel 产品编码;无表时用名称自身)。"""
stripped = [_norm_product_name(c.strip()) for c in candidate_consumables if c.strip()]
candidates_norm = {n: n for n in stripped}
xlsx_raw = (self._s.consumable_catalog_xlsx_path or "").strip()
if xlsx_raw:
path = Path(xlsx_raw).expanduser()
if path.is_file():
full = load_name_to_product_code(path)
out: dict[str, str] = {}
for norm in candidates_norm:
if norm in full:
out[norm] = full[norm]
return out
logger.warning("耗材目录 Excel 路径已配置但文件不存在: {}", path)
return {n: n for n in candidates_norm}
def _det_weights(self) -> Path | None:
raw = (self._s.hand_detection_weights or "").strip()
if not raw:
return None
p = Path(raw).expanduser()
return p if p.is_file() else None
def _cls_weights(self) -> Path:
raw = (self._s.consumable_classifier_weights or "").strip()
if not raw:
raise ModelNotConfiguredError(
"未配置耗材分类权重。请设置 CONSUMABLE_CLASSIFIER_WEIGHTS。"
)
p = Path(raw).expanduser().resolve()
if not p.is_file():
raise ModelNotConfiguredError(f"耗材分类权重不存在: {p}")
return p
def _get_det(self) -> YOLO | None:
path = self._det_weights()
if path is None:
return None
if self._det is None:
with self._det_lock:
if self._det is None:
logger.info("加载手部检测权重: {}", path)
self._det = YOLO(str(path))
return self._det
def _get_cls(self) -> YOLO:
if self._cls is None:
with self._cls_lock:
if self._cls is None:
path = self._cls_weights()
logger.info("加载耗材分类权重: {}", path)
self._cls = YOLO(str(path))
return self._cls
def hand_crop(
self,
frame: np.ndarray,
det_model: YOLO,
*,
det_conf: float,
pad_ratio: float,
min_crop_px: int,
imgsz_det: int,
) -> np.ndarray | None:
h, w = frame.shape[:2]
device = resolve_inference_device(self._s.hand_detection_device)
results = det_model.predict(
frame,
conf=det_conf,
imgsz=imgsz_det,
device=device,
verbose=False,
)
hand_xyxys = collect_hand_boxes(det_model, results[0].boxes)
if not hand_xyxys:
return None
merged = union_boxes(hand_xyxys)
cx1, cy1, cx2, cy2 = pad_box(merged, w, h, pad_ratio)
if (cx2 - cx1) < min_crop_px or (cy2 - cy1) < min_crop_px:
return None
return frame[cy1:cy2, cx1:cx2]
def infer_frame_bgr(
self,
frame: np.ndarray,
name_to_code: dict[str, str],
) -> ClsTop3 | None:
"""单帧 BGR仅当 top1 通过置信度且落在白名单name_to_code 键)时返回。"""
whitelist = set(name_to_code.keys())
det_model = self._get_det()
cls_model = self._get_cls()
if det_model is not None:
crop = self.hand_crop(
frame,
det_model,
det_conf=self._s.hand_detection_conf,
pad_ratio=self._s.hand_detection_pad_ratio,
min_crop_px=self._s.hand_detection_min_crop_px,
imgsz_det=self._s.hand_detection_imgsz,
)
if crop is None:
return None
else:
crop = frame
device = resolve_inference_device(self._s.consumable_classifier_device)
try:
r = cls_model.predict(
crop,
imgsz=self._s.consumable_classifier_imgsz,
device=device,
verbose=False,
)
except Exception as exc:
raise PredictionError(f"耗材分类推理失败: {exc}") from exc
snap = cls_top3_from_result(cls_model, r, name_to_code)
if snap is None:
return None
if snap.t1_conf < self._s.consumable_min_cls_confidence:
return None
pname = snap.t1_name
if not pname or pname not in whitelist:
return None
return snap

View File

@@ -1,155 +0,0 @@
from __future__ import annotations
from io import BytesIO
import os
from pathlib import Path
from threading import Lock
import numpy as np
from fastapi.concurrency import run_in_threadpool
from loguru import logger
from PIL import Image, UnidentifiedImageError
os.environ["YOLO_CONFIG_DIR"] = "/tmp"
from ultralytics import YOLO
from app.config import settings
from app.services.consumable_classifier import (
InvalidImageError,
ModelNotConfiguredError,
PredictionCandidate,
PredictionError,
PredictionResult,
resolve_classifier_inference_device,
)
class TearActionService:
"""撕扯耗材动作识别(独立权重):判断是否存在/如何撕扯耗材等行为;与耗材分类 `ConsumableClassifierService` 分离。内部流水线调用,不对外 HTTP。"""
def __init__(self) -> None:
self._model: YOLO | None = None
self._model_lock = Lock()
@property
def weights_path(self) -> Path | None:
if not settings.tear_action_weights:
return None
return Path(settings.tear_action_weights).expanduser()
@property
def configured(self) -> bool:
return self.weights_path is not None
@property
def weights_found(self) -> bool:
path = self.weights_path
return path is not None and path.is_file()
@property
def model_loaded(self) -> bool:
return self._model is not None
async def predict_image_bytes(
self,
payload: bytes,
*,
topk: int | None = None,
) -> PredictionResult:
return await run_in_threadpool(self._predict_image_bytes, payload, topk)
def _predict_image_bytes(
self,
payload: bytes,
topk: int | None,
) -> PredictionResult:
model = self._get_model()
image = self._decode_image(payload)
try:
result = model.predict(
image,
imgsz=settings.tear_action_imgsz,
device=resolve_classifier_inference_device(settings.tear_action_device),
verbose=False,
)[0]
except Exception as exc: # pragma: no cover
raise PredictionError(
f"Failed to run tear-action inference: {exc}"
) from exc
return self._build_prediction_result(result, model, topk=topk)
def _get_model(self) -> YOLO:
path = self.weights_path
if path is None:
raise ModelNotConfiguredError(
"Tear-action weights are not configured. Set TEAR_ACTION_WEIGHTS."
)
path = path.resolve()
if not path.is_file():
raise ModelNotConfiguredError(f"Tear-action weights not found: {path}")
if self._model is None:
with self._model_lock:
if self._model is None:
logger.info("Loading tear-action weights from {}", path)
self._model = YOLO(str(path))
return self._model
def _decode_image(self, payload: bytes) -> np.ndarray:
if not payload:
raise InvalidImageError("Uploaded image is empty.")
try:
with Image.open(BytesIO(payload)) as image:
return np.asarray(image.convert("RGB"))
except (UnidentifiedImageError, OSError) as exc:
raise InvalidImageError("Uploaded file is not a valid image.") from exc
def _build_prediction_result(
self,
result: object,
model: YOLO,
*,
topk: int | None,
) -> PredictionResult:
probs = getattr(result, "probs", None)
data = getattr(probs, "data", None)
if probs is None or data is None:
raise PredictionError("Model did not return classification probabilities.")
scores = data.tolist()
if not isinstance(scores, list):
scores = [float(scores)]
names = self._names(model)
limit = max(1, topk or settings.tear_action_topk)
ranked = sorted(
((index, float(score)) for index, score in enumerate(scores)),
key=lambda item: item[1],
reverse=True,
)[:limit]
if not ranked:
raise PredictionError("Model returned an empty prediction result.")
candidates = [
PredictionCandidate(
label=names.get(index, str(index)),
confidence=confidence,
)
for index, confidence in ranked
]
return PredictionResult(
label=candidates[0].label,
confidence=candidates[0].confidence,
topk=candidates,
)
def _names(self, model: YOLO) -> dict[int, str]:
raw = getattr(model.model, "names", None) or {}
return {int(key): str(value) for key, value in raw.items()}

View File

@@ -13,14 +13,16 @@ from app.config import Settings
from app.database import AsyncSessionLocal
from app.repositories.surgery_results import SurgeryResultRepository
from app.schemas import SurgeryConsumptionDetail
from app.services.consumable_classifier import (
ConsumableClassifierService,
from app.services.consumable_vision_algorithm import (
ClsTop3,
ConsumableVisionAlgorithmService,
PredictionCandidate,
PredictionResult,
_norm_product_name,
cls_top3_to_prediction_result,
window_bucket_to_best_snap,
)
from app.services.tear_action import TearActionService
from app.services.video.backend_resolver import BackendResolver
from app.services.video.frame_encode import frame_to_jpeg_bytes
from app.services.video.hikvision_runtime import HikvisionInitRefCount, HikvisionRuntime
from app.services.video.rtsp_capture import RtspCapture
from app.services.video.types import VideoBackendKind
@@ -41,9 +43,21 @@ class PendingConsumableConfirmation:
model_top1_confidence: float
@dataclass
class CameraStreamInferState:
"""单路视频上的时间窗投票(与离线算法一致)。"""
votes: list[tuple[float, str, ClsTop3]] = field(default_factory=list)
stream_t0: float | None = None
next_bucket: int = 0
@dataclass
class SurgerySessionState:
candidate_consumables: list[str]
#: 分类类名(归一化) -> 业务物品 idExcel 产品编码或名称)。
name_to_code: dict[str, str] = field(default_factory=dict)
camera_infer: dict[str, CameraStreamInferState] = field(default_factory=dict)
details: list[SurgeryConsumptionDetail] = field(default_factory=list)
lock: asyncio.Lock = field(default_factory=asyncio.Lock)
ready: asyncio.Event = field(default_factory=asyncio.Event)
@@ -94,14 +108,12 @@ class CameraSessionManager:
self,
*,
settings: Settings,
consumable_classifier: ConsumableClassifierService,
tear_action: TearActionService,
vision_algorithm: ConsumableVisionAlgorithmService,
hikvision_runtime: HikvisionRuntime | None,
result_repository: SurgeryResultRepository | None = None,
) -> None:
self._s = settings
self._classifier = consumable_classifier
self._tear = tear_action
self._vision = vision_algorithm
self._hik = hikvision_runtime
self._repo = result_repository
self._resolver = BackendResolver(settings, hikvision_runtime=hikvision_runtime)
@@ -221,8 +233,10 @@ class CameraSessionManager:
"该手术号存在尚未写入数据库的历史结果,请修复数据库或等待自动重试成功后再开始。",
)
name_to_code = self._vision.build_name_mapping(candidate_consumables)
state = SurgerySessionState(
candidate_consumables=list(candidate_consumables),
name_to_code=name_to_code,
)
stop_event = asyncio.Event()
readies = [asyncio.Event() for _ in camera_ids]
@@ -388,6 +402,12 @@ class CameraSessionManager:
return None
return p
def get_surgery_candidate_consumables(self, surgery_id: str) -> list[str]:
"""本台手术开始手术时传入的耗材候选清单(语音可任选其中一项,不限于模型 topk"""
if surgery_id not in self._active:
return []
return list(self._active[surgery_id].state.candidate_consumables)
def next_pending_confirmation(
self, surgery_id: str
) -> PendingConsumableConfirmation | None:
@@ -436,20 +456,23 @@ class CameraSessionManager:
"CONFIRMATION_INVALID",
"请提供 chosen_label 或设置 rejected=true。",
)
allowed = {lbl.strip() for lbl, _ in pending.options if lbl.strip()}
allowed_pending = {lbl.strip() for lbl, _ in pending.options if lbl.strip()}
allowed_surgery = {c.strip() for c in st.candidate_consumables if c.strip()}
if rejected:
pending.status = "rejected"
else:
label = chosen_label.strip() if chosen_label else ""
if label not in allowed:
if label not in allowed_pending and label not in allowed_surgery:
raise SurgeryPipelineError(
"CONFIRMATION_INVALID",
f"所选耗材不在候选列表中:{chosen_label!r}",
f"所选耗材不在本台手术候选清单或本次追问选项中:{chosen_label!r}",
)
pending.status = "confirmed"
norm = _norm_product_name(label)
item_id = st.name_to_code.get(norm, label)
self._append_confirmed_detail_locked(
state=st,
item_id=label,
item_id=item_id,
item_name=label,
doctor_id=self._s.video_voice_confirm_doctor_id,
source="voice",
@@ -582,13 +605,11 @@ class CameraSessionManager:
continue
last_infer = now
try:
jpeg = await asyncio.to_thread(
frame_to_jpeg_bytes,
snap = await asyncio.to_thread(
self._vision.infer_frame_bgr,
frame,
quality=self._s.video_jpeg_quality,
state.name_to_code,
)
cls_res = await self._classifier.predict_image_bytes(jpeg)
tear_res = await self._tear.predict_image_bytes(jpeg)
except Exception as exc:
logger.debug(
"Inference skip camera={} surgery={}: {}",
@@ -598,11 +619,45 @@ class CameraSessionManager:
)
continue
await self._handle_classification_result(
state=state,
cls_res=cls_res,
tear_label=tear_res.label,
)
if snap is None:
continue
wsec = self._s.consumable_vision_window_sec
pending_preds: list[PredictionResult] = []
async with state.lock:
cis = state.camera_infer.setdefault(
camera_id, CameraStreamInferState()
)
if cis.stream_t0 is None:
cis.stream_t0 = time.monotonic()
t_rel = time.monotonic() - cis.stream_t0
cis.votes.append((t_rel, snap.t1_name, snap))
current_b = int(t_rel // wsec)
while cis.next_bucket < current_b:
b = cis.next_bucket
cis.next_bucket += 1
lo, hi = b * wsec, (b + 1) * wsec
bucket_pts = [
(p, sn) for (t, p, sn) in cis.votes if lo <= t < hi
]
cis.votes = [
(t, p, sn)
for (t, p, sn) in cis.votes
if not (lo <= t < hi)
]
if not bucket_pts:
continue
best = window_bucket_to_best_snap(bucket_pts)
if best is not None:
pending_preds.append(
cls_top3_to_prediction_result(best)
)
for cls_res in pending_preds:
await self._handle_classification_result(
state=state,
cls_res=cls_res,
)
finally:
if cap is not None:
await asyncio.to_thread(cap.release)
@@ -616,11 +671,10 @@ class CameraSessionManager:
*,
state: SurgerySessionState,
cls_res: PredictionResult,
tear_label: str,
) -> None:
_ = tear_label
conf = cls_res.confidence
label = (cls_res.label or "").strip()
item_id = state.name_to_code.get(label, label)
voice_floor = self._s.video_voice_confirm_min_confidence
if conf < voice_floor:
return
@@ -639,7 +693,7 @@ class CameraSessionManager:
if conf >= auto_th and in_allowed(label):
await self._append_confirmed_detail(
state=state,
item_id=label or "unknown",
item_id=item_id or label or "unknown",
item_name=label or "unknown",
doctor_id=self._s.video_result_doctor_id,
source="vision",

View File

@@ -78,6 +78,25 @@ def parse_voice_choice(asr_text: str, options: list[str]) -> str | None:
return None
def match_voice_choice_against_candidates(
asr_text: str, candidates: list[str]
) -> str | None:
"""
在未匹配 pending 展示的 topk 话术时,按本台手术「候选耗材清单」做名称子串匹配。
长名优先,减少短名误命中(如「纱」同时匹配多种耗材时优先更长全称)。
"""
raw = (asr_text or "").strip()
if not raw:
return None
stripped = [c.strip() for c in candidates if c and str(c).strip()]
if not stripped:
return None
for c in sorted(stripped, key=len, reverse=True):
if c in raw:
return c
return None
def is_rejection_phrase(asr_text: str) -> bool:
"""医生明确否认全部候选时返回 True须在 parse_voice_choice 之前调用)。"""
raw = (asr_text or "").strip()
@@ -88,7 +107,10 @@ def is_rejection_phrase(asr_text: str) -> bool:
def build_prompt_text(options: list[tuple[str, float]]) -> str:
parts = ["请确认刚才使用的耗材是下面哪一项,可以说序号或名称。"]
parts = [
"请确认刚才使用的耗材是下面哪一项,可以说序号或名称;"
"若是清单内其它耗材,也可以直接说该耗材名称。"
]
for i, (name, _conf) in enumerate(options, start=1):
parts.append(f"{i}个,{name}")
parts.append("若都不是请说不是。")

View File

@@ -15,7 +15,11 @@ from app.services.audio_wav import WavDecodeError, wav_bytes_to_pcm16k_mono_s16l
from app.services.baidu_speech import BaiduSpeechNotConfiguredError, BaiduSpeechService
from app.services.minio_audio_storage import MinioAudioStorageService, StoredAudio
from app.services.video.session_manager import CameraSessionManager
from app.services.voice_confirm import is_rejection_phrase, parse_voice_choice
from app.services.voice_confirm import (
is_rejection_phrase,
match_voice_choice_against_candidates,
parse_voice_choice,
)
from app.surgery_errors import SurgeryPipelineError
@@ -256,9 +260,19 @@ class VoiceConfirmationService:
chosen: str | None = None
if not rejected:
chosen = parse_voice_choice(text, option_labels)
if chosen is None:
surgery_candidates = self._sessions.get_surgery_candidate_consumables(
surgery_id
)
chosen = match_voice_choice_against_candidates(
text, surgery_candidates
)
if not rejected and not chosen:
msg = "无法从语音中匹配候选项,请重试或说「不是」否认全部"
msg = (
"无法从语音中匹配候选项或本台手术候选清单中的耗材名称,"
"请重试或说「不是」否认全部"
)
await self._persist_audit(
surgery_id=surgery_id,
confirmation_id=confirmation_id,