refactor: 统一耗材视觉算法并扩展语音确认至全量候选清单
- 以 ConsumableVisionAlgorithmService 替代 consumable_classifier 与 tear_action; 可选手部检测权重,未配置时全帧分类;时间窗众数与 Excel 白名单配置。 - 语音待确认:ASR 先匹配 pending topk,再匹配本台 candidate_consumables; 记账 item_id 与 vision 一致使用 name_to_code。 - 更新 config、Compose、.env.example、依赖(pandas/openpyxl)与测试。 Made-with: Cursor
This commit is contained in:
@@ -13,14 +13,16 @@ from app.config import Settings
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.repositories.surgery_results import SurgeryResultRepository
|
||||
from app.schemas import SurgeryConsumptionDetail
|
||||
from app.services.consumable_classifier import (
|
||||
ConsumableClassifierService,
|
||||
from app.services.consumable_vision_algorithm import (
|
||||
ClsTop3,
|
||||
ConsumableVisionAlgorithmService,
|
||||
PredictionCandidate,
|
||||
PredictionResult,
|
||||
_norm_product_name,
|
||||
cls_top3_to_prediction_result,
|
||||
window_bucket_to_best_snap,
|
||||
)
|
||||
from app.services.tear_action import TearActionService
|
||||
from app.services.video.backend_resolver import BackendResolver
|
||||
from app.services.video.frame_encode import frame_to_jpeg_bytes
|
||||
from app.services.video.hikvision_runtime import HikvisionInitRefCount, HikvisionRuntime
|
||||
from app.services.video.rtsp_capture import RtspCapture
|
||||
from app.services.video.types import VideoBackendKind
|
||||
@@ -41,9 +43,21 @@ class PendingConsumableConfirmation:
|
||||
model_top1_confidence: float
|
||||
|
||||
|
||||
@dataclass
|
||||
class CameraStreamInferState:
|
||||
"""单路视频上的时间窗投票(与离线算法一致)。"""
|
||||
|
||||
votes: list[tuple[float, str, ClsTop3]] = field(default_factory=list)
|
||||
stream_t0: float | None = None
|
||||
next_bucket: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class SurgerySessionState:
|
||||
candidate_consumables: list[str]
|
||||
#: 分类类名(归一化) -> 业务物品 id(Excel 产品编码或名称)。
|
||||
name_to_code: dict[str, str] = field(default_factory=dict)
|
||||
camera_infer: dict[str, CameraStreamInferState] = field(default_factory=dict)
|
||||
details: list[SurgeryConsumptionDetail] = field(default_factory=list)
|
||||
lock: asyncio.Lock = field(default_factory=asyncio.Lock)
|
||||
ready: asyncio.Event = field(default_factory=asyncio.Event)
|
||||
@@ -94,14 +108,12 @@ class CameraSessionManager:
|
||||
self,
|
||||
*,
|
||||
settings: Settings,
|
||||
consumable_classifier: ConsumableClassifierService,
|
||||
tear_action: TearActionService,
|
||||
vision_algorithm: ConsumableVisionAlgorithmService,
|
||||
hikvision_runtime: HikvisionRuntime | None,
|
||||
result_repository: SurgeryResultRepository | None = None,
|
||||
) -> None:
|
||||
self._s = settings
|
||||
self._classifier = consumable_classifier
|
||||
self._tear = tear_action
|
||||
self._vision = vision_algorithm
|
||||
self._hik = hikvision_runtime
|
||||
self._repo = result_repository
|
||||
self._resolver = BackendResolver(settings, hikvision_runtime=hikvision_runtime)
|
||||
@@ -221,8 +233,10 @@ class CameraSessionManager:
|
||||
"该手术号存在尚未写入数据库的历史结果,请修复数据库或等待自动重试成功后再开始。",
|
||||
)
|
||||
|
||||
name_to_code = self._vision.build_name_mapping(candidate_consumables)
|
||||
state = SurgerySessionState(
|
||||
candidate_consumables=list(candidate_consumables),
|
||||
name_to_code=name_to_code,
|
||||
)
|
||||
stop_event = asyncio.Event()
|
||||
readies = [asyncio.Event() for _ in camera_ids]
|
||||
@@ -388,6 +402,12 @@ class CameraSessionManager:
|
||||
return None
|
||||
return p
|
||||
|
||||
def get_surgery_candidate_consumables(self, surgery_id: str) -> list[str]:
|
||||
"""本台手术开始手术时传入的耗材候选清单(语音可任选其中一项,不限于模型 topk)。"""
|
||||
if surgery_id not in self._active:
|
||||
return []
|
||||
return list(self._active[surgery_id].state.candidate_consumables)
|
||||
|
||||
def next_pending_confirmation(
|
||||
self, surgery_id: str
|
||||
) -> PendingConsumableConfirmation | None:
|
||||
@@ -436,20 +456,23 @@ class CameraSessionManager:
|
||||
"CONFIRMATION_INVALID",
|
||||
"请提供 chosen_label 或设置 rejected=true。",
|
||||
)
|
||||
allowed = {lbl.strip() for lbl, _ in pending.options if lbl.strip()}
|
||||
allowed_pending = {lbl.strip() for lbl, _ in pending.options if lbl.strip()}
|
||||
allowed_surgery = {c.strip() for c in st.candidate_consumables if c.strip()}
|
||||
if rejected:
|
||||
pending.status = "rejected"
|
||||
else:
|
||||
label = chosen_label.strip() if chosen_label else ""
|
||||
if label not in allowed:
|
||||
if label not in allowed_pending and label not in allowed_surgery:
|
||||
raise SurgeryPipelineError(
|
||||
"CONFIRMATION_INVALID",
|
||||
f"所选耗材不在候选列表中:{chosen_label!r}",
|
||||
f"所选耗材不在本台手术候选清单或本次追问选项中:{chosen_label!r}",
|
||||
)
|
||||
pending.status = "confirmed"
|
||||
norm = _norm_product_name(label)
|
||||
item_id = st.name_to_code.get(norm, label)
|
||||
self._append_confirmed_detail_locked(
|
||||
state=st,
|
||||
item_id=label,
|
||||
item_id=item_id,
|
||||
item_name=label,
|
||||
doctor_id=self._s.video_voice_confirm_doctor_id,
|
||||
source="voice",
|
||||
@@ -582,13 +605,11 @@ class CameraSessionManager:
|
||||
continue
|
||||
last_infer = now
|
||||
try:
|
||||
jpeg = await asyncio.to_thread(
|
||||
frame_to_jpeg_bytes,
|
||||
snap = await asyncio.to_thread(
|
||||
self._vision.infer_frame_bgr,
|
||||
frame,
|
||||
quality=self._s.video_jpeg_quality,
|
||||
state.name_to_code,
|
||||
)
|
||||
cls_res = await self._classifier.predict_image_bytes(jpeg)
|
||||
tear_res = await self._tear.predict_image_bytes(jpeg)
|
||||
except Exception as exc:
|
||||
logger.debug(
|
||||
"Inference skip camera={} surgery={}: {}",
|
||||
@@ -598,11 +619,45 @@ class CameraSessionManager:
|
||||
)
|
||||
continue
|
||||
|
||||
await self._handle_classification_result(
|
||||
state=state,
|
||||
cls_res=cls_res,
|
||||
tear_label=tear_res.label,
|
||||
)
|
||||
if snap is None:
|
||||
continue
|
||||
|
||||
wsec = self._s.consumable_vision_window_sec
|
||||
pending_preds: list[PredictionResult] = []
|
||||
async with state.lock:
|
||||
cis = state.camera_infer.setdefault(
|
||||
camera_id, CameraStreamInferState()
|
||||
)
|
||||
if cis.stream_t0 is None:
|
||||
cis.stream_t0 = time.monotonic()
|
||||
t_rel = time.monotonic() - cis.stream_t0
|
||||
cis.votes.append((t_rel, snap.t1_name, snap))
|
||||
current_b = int(t_rel // wsec)
|
||||
while cis.next_bucket < current_b:
|
||||
b = cis.next_bucket
|
||||
cis.next_bucket += 1
|
||||
lo, hi = b * wsec, (b + 1) * wsec
|
||||
bucket_pts = [
|
||||
(p, sn) for (t, p, sn) in cis.votes if lo <= t < hi
|
||||
]
|
||||
cis.votes = [
|
||||
(t, p, sn)
|
||||
for (t, p, sn) in cis.votes
|
||||
if not (lo <= t < hi)
|
||||
]
|
||||
if not bucket_pts:
|
||||
continue
|
||||
best = window_bucket_to_best_snap(bucket_pts)
|
||||
if best is not None:
|
||||
pending_preds.append(
|
||||
cls_top3_to_prediction_result(best)
|
||||
)
|
||||
|
||||
for cls_res in pending_preds:
|
||||
await self._handle_classification_result(
|
||||
state=state,
|
||||
cls_res=cls_res,
|
||||
)
|
||||
finally:
|
||||
if cap is not None:
|
||||
await asyncio.to_thread(cap.release)
|
||||
@@ -616,11 +671,10 @@ class CameraSessionManager:
|
||||
*,
|
||||
state: SurgerySessionState,
|
||||
cls_res: PredictionResult,
|
||||
tear_label: str,
|
||||
) -> None:
|
||||
_ = tear_label
|
||||
conf = cls_res.confidence
|
||||
label = (cls_res.label or "").strip()
|
||||
item_id = state.name_to_code.get(label, label)
|
||||
voice_floor = self._s.video_voice_confirm_min_confidence
|
||||
if conf < voice_floor:
|
||||
return
|
||||
@@ -639,7 +693,7 @@ class CameraSessionManager:
|
||||
if conf >= auto_th and in_allowed(label):
|
||||
await self._append_confirmed_detail(
|
||||
state=state,
|
||||
item_id=label or "unknown",
|
||||
item_id=item_id or label or "unknown",
|
||||
item_name=label or "unknown",
|
||||
doctor_id=self._s.video_result_doctor_id,
|
||||
source="vision",
|
||||
|
||||
Reference in New Issue
Block a user