refactor: 统一耗材视觉算法并扩展语音确认至全量候选清单

- 以 ConsumableVisionAlgorithmService 替代 consumable_classifier 与 tear_action;
  可选手部检测权重,未配置时全帧分类;时间窗众数与 Excel 白名单配置。
- 语音待确认:ASR 先匹配 pending topk,再匹配本台 candidate_consumables;
  记账 item_id 与 vision 一致使用 name_to_code。
- 更新 config、Compose、.env.example、依赖(pandas/openpyxl)与测试。

Made-with: Cursor
This commit is contained in:
Kevin
2026-04-22 16:31:12 +08:00
parent 4c4550d58b
commit 132702aea9
18 changed files with 791 additions and 476 deletions

View File

@@ -13,14 +13,16 @@ from app.config import Settings
from app.database import AsyncSessionLocal
from app.repositories.surgery_results import SurgeryResultRepository
from app.schemas import SurgeryConsumptionDetail
from app.services.consumable_classifier import (
ConsumableClassifierService,
from app.services.consumable_vision_algorithm import (
ClsTop3,
ConsumableVisionAlgorithmService,
PredictionCandidate,
PredictionResult,
_norm_product_name,
cls_top3_to_prediction_result,
window_bucket_to_best_snap,
)
from app.services.tear_action import TearActionService
from app.services.video.backend_resolver import BackendResolver
from app.services.video.frame_encode import frame_to_jpeg_bytes
from app.services.video.hikvision_runtime import HikvisionInitRefCount, HikvisionRuntime
from app.services.video.rtsp_capture import RtspCapture
from app.services.video.types import VideoBackendKind
@@ -41,9 +43,21 @@ class PendingConsumableConfirmation:
model_top1_confidence: float
@dataclass
class CameraStreamInferState:
"""单路视频上的时间窗投票(与离线算法一致)。"""
votes: list[tuple[float, str, ClsTop3]] = field(default_factory=list)
stream_t0: float | None = None
next_bucket: int = 0
@dataclass
class SurgerySessionState:
candidate_consumables: list[str]
#: 分类类名(归一化) -> 业务物品 idExcel 产品编码或名称)。
name_to_code: dict[str, str] = field(default_factory=dict)
camera_infer: dict[str, CameraStreamInferState] = field(default_factory=dict)
details: list[SurgeryConsumptionDetail] = field(default_factory=list)
lock: asyncio.Lock = field(default_factory=asyncio.Lock)
ready: asyncio.Event = field(default_factory=asyncio.Event)
@@ -94,14 +108,12 @@ class CameraSessionManager:
self,
*,
settings: Settings,
consumable_classifier: ConsumableClassifierService,
tear_action: TearActionService,
vision_algorithm: ConsumableVisionAlgorithmService,
hikvision_runtime: HikvisionRuntime | None,
result_repository: SurgeryResultRepository | None = None,
) -> None:
self._s = settings
self._classifier = consumable_classifier
self._tear = tear_action
self._vision = vision_algorithm
self._hik = hikvision_runtime
self._repo = result_repository
self._resolver = BackendResolver(settings, hikvision_runtime=hikvision_runtime)
@@ -221,8 +233,10 @@ class CameraSessionManager:
"该手术号存在尚未写入数据库的历史结果,请修复数据库或等待自动重试成功后再开始。",
)
name_to_code = self._vision.build_name_mapping(candidate_consumables)
state = SurgerySessionState(
candidate_consumables=list(candidate_consumables),
name_to_code=name_to_code,
)
stop_event = asyncio.Event()
readies = [asyncio.Event() for _ in camera_ids]
@@ -388,6 +402,12 @@ class CameraSessionManager:
return None
return p
def get_surgery_candidate_consumables(self, surgery_id: str) -> list[str]:
"""本台手术开始手术时传入的耗材候选清单(语音可任选其中一项,不限于模型 topk"""
if surgery_id not in self._active:
return []
return list(self._active[surgery_id].state.candidate_consumables)
def next_pending_confirmation(
self, surgery_id: str
) -> PendingConsumableConfirmation | None:
@@ -436,20 +456,23 @@ class CameraSessionManager:
"CONFIRMATION_INVALID",
"请提供 chosen_label 或设置 rejected=true。",
)
allowed = {lbl.strip() for lbl, _ in pending.options if lbl.strip()}
allowed_pending = {lbl.strip() for lbl, _ in pending.options if lbl.strip()}
allowed_surgery = {c.strip() for c in st.candidate_consumables if c.strip()}
if rejected:
pending.status = "rejected"
else:
label = chosen_label.strip() if chosen_label else ""
if label not in allowed:
if label not in allowed_pending and label not in allowed_surgery:
raise SurgeryPipelineError(
"CONFIRMATION_INVALID",
f"所选耗材不在候选列表中:{chosen_label!r}",
f"所选耗材不在本台手术候选清单或本次追问选项中:{chosen_label!r}",
)
pending.status = "confirmed"
norm = _norm_product_name(label)
item_id = st.name_to_code.get(norm, label)
self._append_confirmed_detail_locked(
state=st,
item_id=label,
item_id=item_id,
item_name=label,
doctor_id=self._s.video_voice_confirm_doctor_id,
source="voice",
@@ -582,13 +605,11 @@ class CameraSessionManager:
continue
last_infer = now
try:
jpeg = await asyncio.to_thread(
frame_to_jpeg_bytes,
snap = await asyncio.to_thread(
self._vision.infer_frame_bgr,
frame,
quality=self._s.video_jpeg_quality,
state.name_to_code,
)
cls_res = await self._classifier.predict_image_bytes(jpeg)
tear_res = await self._tear.predict_image_bytes(jpeg)
except Exception as exc:
logger.debug(
"Inference skip camera={} surgery={}: {}",
@@ -598,11 +619,45 @@ class CameraSessionManager:
)
continue
await self._handle_classification_result(
state=state,
cls_res=cls_res,
tear_label=tear_res.label,
)
if snap is None:
continue
wsec = self._s.consumable_vision_window_sec
pending_preds: list[PredictionResult] = []
async with state.lock:
cis = state.camera_infer.setdefault(
camera_id, CameraStreamInferState()
)
if cis.stream_t0 is None:
cis.stream_t0 = time.monotonic()
t_rel = time.monotonic() - cis.stream_t0
cis.votes.append((t_rel, snap.t1_name, snap))
current_b = int(t_rel // wsec)
while cis.next_bucket < current_b:
b = cis.next_bucket
cis.next_bucket += 1
lo, hi = b * wsec, (b + 1) * wsec
bucket_pts = [
(p, sn) for (t, p, sn) in cis.votes if lo <= t < hi
]
cis.votes = [
(t, p, sn)
for (t, p, sn) in cis.votes
if not (lo <= t < hi)
]
if not bucket_pts:
continue
best = window_bucket_to_best_snap(bucket_pts)
if best is not None:
pending_preds.append(
cls_top3_to_prediction_result(best)
)
for cls_res in pending_preds:
await self._handle_classification_result(
state=state,
cls_res=cls_res,
)
finally:
if cap is not None:
await asyncio.to_thread(cap.release)
@@ -616,11 +671,10 @@ class CameraSessionManager:
*,
state: SurgerySessionState,
cls_res: PredictionResult,
tear_label: str,
) -> None:
_ = tear_label
conf = cls_res.confidence
label = (cls_res.label or "").strip()
item_id = state.name_to_code.get(label, label)
voice_floor = self._s.video_voice_confirm_min_confidence
if conf < voice_floor:
return
@@ -639,7 +693,7 @@ class CameraSessionManager:
if conf >= auto_th and in_allowed(label):
await self._append_confirmed_detail(
state=state,
item_id=label or "unknown",
item_id=item_id or label or "unknown",
item_name=label or "unknown",
doctor_id=self._s.video_result_doctor_id,
source="vision",