Files
operating-room-monitor-server/app/services/video/session_manager.py

763 lines
28 KiB
Python
Raw Normal View History

from __future__ import annotations
import asyncio
import time
import uuid
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Literal
from loguru import logger
from app.config import Settings
from app.database import AsyncSessionLocal
from app.repositories.surgery_results import SurgeryResultRepository
from app.schemas import SurgeryConsumptionDetail
from app.services.consumable_classifier import (
ConsumableClassifierService,
PredictionCandidate,
PredictionResult,
)
from app.services.tear_action import TearActionService
from app.services.video.backend_resolver import BackendResolver
from app.services.video.frame_encode import frame_to_jpeg_bytes
from app.services.video.hikvision_runtime import HikvisionInitRefCount, HikvisionRuntime
from app.services.video.rtsp_capture import RtspCapture
from app.services.video.types import VideoBackendKind
from app.services.voice_confirm import build_prompt_text
from app.surgery_errors import SurgeryPipelineError
@dataclass
class PendingConsumableConfirmation:
"""待客户端确认的一条低置信度识别(不阻塞后续帧推理)。"""
id: str
status: Literal["pending", "confirmed", "rejected"]
options: list[tuple[str, float]]
prompt_text: str
created_at: datetime
model_top1_label: str
model_top1_confidence: float
@dataclass
class SurgerySessionState:
candidate_consumables: list[str]
details: list[SurgeryConsumptionDetail] = field(default_factory=list)
lock: asyncio.Lock = field(default_factory=asyncio.Lock)
ready: asyncio.Event = field(default_factory=asyncio.Event)
last_detail_monotonic: dict[str, float] = field(default_factory=dict)
#: 仅含 status=pending 的确认任务 idFIFO。
pending_fifo: list[str] = field(default_factory=list)
pending_by_id: dict[str, PendingConsumableConfirmation] = field(default_factory=dict)
last_pending_prompt_snippet: str | None = None
#: 最近一次语音确认 ASR 文本(成功识别时写入)。
last_asr_text: str | None = None
#: 最近一次语音确认错误说明ASR/解析失败等)。
last_voice_error: str | None = None
@dataclass
class RunningSurgery:
stop_event: asyncio.Event
state: SurgerySessionState
tasks: list[asyncio.Task[None]]
@dataclass
class ArchivedSurgery:
details: list[SurgeryConsumptionDetail]
def _rank_topk_for_candidates(
topk: list[PredictionCandidate],
ordered_candidates: list[str],
*,
limit: int = 5,
) -> list[PredictionCandidate]:
if not topk:
return []
stripped_order = [c.strip() for c in ordered_candidates if c.strip()]
if not stripped_order:
return topk[:limit]
order_index = {name: i for i, name in enumerate(stripped_order)}
picked = [c for c in topk if c.label.strip() in order_index]
picked.sort(key=lambda c: order_index[c.label.strip()])
return picked[:limit]
class CameraSessionManager:
"""Per-surgery camera streams, RTSP + optional Hikvision SDK login, inference, client-side human confirm."""
def __init__(
self,
*,
settings: Settings,
consumable_classifier: ConsumableClassifierService,
tear_action: TearActionService,
hikvision_runtime: HikvisionRuntime | None,
result_repository: SurgeryResultRepository | None = None,
) -> None:
self._s = settings
self._classifier = consumable_classifier
self._tear = tear_action
self._hik = hikvision_runtime
self._repo = result_repository
self._resolver = BackendResolver(settings, hikvision_runtime=hikvision_runtime)
self._active: dict[str, RunningSurgery] = {}
self._archive: dict[str, ArchivedSurgery] = {}
self._manager_lock = asyncio.Lock()
self._retry_task: asyncio.Task[None] | None = None
self._retry_stop = asyncio.Event()
async def start_archive_retry_loop(self) -> None:
if self._retry_task is not None and not self._retry_task.done():
return
self._retry_stop.clear()
self._retry_task = asyncio.create_task(
self._archive_persist_retry_loop(),
name="archive_persist_retry",
)
async def shutdown(self) -> None:
self._retry_stop.set()
if self._retry_task is not None:
self._retry_task.cancel()
try:
await self._retry_task
except asyncio.CancelledError:
pass
except Exception as exc:
logger.debug("retry task shutdown: {}", exc)
self._retry_task = None
async with self._manager_lock:
ids = list(self._active.keys())
for sid in ids:
try:
await self.stop_surgery(sid, require_active=False)
except Exception as exc:
logger.warning("shutdown stop_surgery {}: {}", sid, exc)
async def _archive_persist_retry_loop(self) -> None:
while not self._retry_stop.is_set():
try:
await asyncio.wait_for(
self._retry_stop.wait(),
timeout=self._s.archive_persist_retry_interval_seconds,
)
break
except TimeoutError:
pass
ids = list(self._archive.keys())
for sid in ids:
if self._retry_stop.is_set():
break
await self._try_persist_archive(sid)
async def _try_persist_archive(self, surgery_id: str) -> bool:
if self._repo is None:
return False
async with self._manager_lock:
arch = self._archive.get(surgery_id)
if arch is None:
return True
try:
async with AsyncSessionLocal() as session:
async with session.begin():
await self._repo.save_final_result(
session,
surgery_id=surgery_id,
details=list(arch.details),
)
except Exception as exc:
logger.warning(
"Archive persist retry failed surgery_id={}: {}",
surgery_id,
exc,
)
return False
async with self._manager_lock:
self._archive.pop(surgery_id, None)
logger.info("Archive persisted after retry surgery_id={}", surgery_id)
return True
async def start_surgery(
self,
surgery_id: str,
camera_ids: list[str],
candidate_consumables: list[str],
) -> None:
stale_archive: ArchivedSurgery | None = None
async with self._manager_lock:
if surgery_id in self._active:
raise SurgeryPipelineError(
"RECORDING_CANNOT_START",
"该手术已在录制中,请勿重复开始。",
)
if surgery_id in self._archive:
logger.warning(
"surgery_id={} 仍有未落库归档,尝试写入数据库后再开始新会话",
surgery_id,
)
stale_archive = self._archive.pop(surgery_id)
if stale_archive is not None:
if self._repo is None:
logger.error(
"surgery_id={} 有内存归档但未配置数据库仓库,无法持久化;"
"开始新会话将丢弃该归档(仅开发/无库模式)",
surgery_id,
)
else:
ok = await self._persist_archived_details(
surgery_id, list(stale_archive.details)
)
if not ok:
async with self._manager_lock:
self._archive[surgery_id] = stale_archive
raise SurgeryPipelineError(
"RECORDING_CANNOT_START",
"该手术号存在尚未写入数据库的历史结果,请修复数据库或等待自动重试成功后再开始。",
)
state = SurgerySessionState(
candidate_consumables=list(candidate_consumables),
)
stop_event = asyncio.Event()
readies = [asyncio.Event() for _ in camera_ids]
tasks: list[asyncio.Task[None]] = []
open_timeout = self._s.video_open_timeout_sec + 5.0
for cam_id, ready in zip(camera_ids, readies, strict=True):
tasks.append(
asyncio.create_task(
self._camera_worker(
surgery_id=surgery_id,
camera_id=cam_id,
stream_ready=ready,
stop_event=stop_event,
state=state,
),
name=f"camera:{surgery_id}:{cam_id}",
)
)
run = RunningSurgery(stop_event=stop_event, state=state, tasks=tasks)
async with self._manager_lock:
self._active[surgery_id] = run
try:
await asyncio.wait_for(
asyncio.gather(*(r.wait() for r in readies)),
timeout=open_timeout,
)
state.ready.set()
except TimeoutError as exc:
logger.error(
"Surgery {} cameras not all ready within {}s",
surgery_id,
open_timeout,
)
await self.stop_surgery(surgery_id, require_active=True)
raise SurgeryPipelineError(
"RECORDING_CANNOT_START",
"开录未能确认:部分摄像头在超时内未成功拉到首帧。",
) from exc
except Exception:
await self.stop_surgery(surgery_id, require_active=True)
raise
async def _persist_archived_details(
self,
surgery_id: str,
details: list[SurgeryConsumptionDetail],
) -> bool:
if self._repo is None:
return True
try:
async with AsyncSessionLocal() as session:
async with session.begin():
await self._repo.save_final_result(
session,
surgery_id=surgery_id,
details=details,
)
except Exception as exc:
logger.exception(
"Persist archived surgery {} failed (will keep archive): {}",
surgery_id,
exc,
)
return False
return True
async def stop_surgery(self, surgery_id: str, *, require_active: bool = True) -> None:
async with self._manager_lock:
run = self._active.pop(surgery_id, None)
if run is None:
if require_active:
raise SurgeryPipelineError(
"RECORDING_NOT_STOPPED",
"停录未能完成:当前没有该手术的活跃录制会话。",
)
return
run.stop_event.set()
results = await asyncio.gather(*run.tasks, return_exceptions=True)
for res in results:
if isinstance(res, BaseException):
logger.warning("surgery task finished with error: {}", res)
details = list(run.state.details)
persisted = False
if self._repo is not None:
try:
async with AsyncSessionLocal() as session:
async with session.begin():
await self._repo.save_final_result(
session,
surgery_id=surgery_id,
details=details,
)
persisted = True
except Exception as exc:
logger.exception("Persist surgery {} failed: {}", surgery_id, exc)
async with self._manager_lock:
if not persisted:
self._archive[surgery_id] = ArchivedSurgery(details=details)
logger.error(
"Surgery {} final result kept in memory archive only; "
"background retry will attempt persist",
surgery_id,
)
def live_consumption_if_active(self, surgery_id: str) -> list[SurgeryConsumptionDetail] | None:
if surgery_id not in self._active:
return None
if not self._active[surgery_id].state.ready.is_set():
return None
rows = list(self._active[surgery_id].state.details)
if not rows:
return None
return rows
def archived_consumption_fallback(self, surgery_id: str) -> list[SurgeryConsumptionDetail] | None:
arch = self._archive.get(surgery_id)
if arch is None:
return None
return list(arch.details)
def voice_status(self, surgery_id: str) -> dict[str, object] | None:
if surgery_id not in self._active:
return None
st = self._active[surgery_id].state
return {
"surgery_id": surgery_id,
"voice_enabled": bool(self._s.voice_confirmation_enabled),
"pending_queue_approx": len(st.pending_fifo),
"last_prompt_snippet": st.last_pending_prompt_snippet,
"last_asr_text": st.last_asr_text,
"last_error": st.last_voice_error,
}
def record_voice_trace(
self,
surgery_id: str,
*,
asr_text: str | None,
error: str | None,
) -> None:
if surgery_id not in self._active:
return
st = self._active[surgery_id].state
st.last_asr_text = asr_text
st.last_voice_error = error
def get_pending_confirmation_by_id(
self,
surgery_id: str,
confirmation_id: str,
) -> PendingConsumableConfirmation | None:
if surgery_id not in self._active:
return None
p = self._active[surgery_id].state.pending_by_id.get(confirmation_id)
if p is None or p.status != "pending":
return None
return p
def next_pending_confirmation(
self, surgery_id: str
) -> PendingConsumableConfirmation | None:
if surgery_id not in self._active:
return None
st = self._active[surgery_id].state
for cid in st.pending_fifo:
p = st.pending_by_id.get(cid)
if p is not None and p.status == "pending":
return p
return None
async def resolve_pending_confirmation(
self,
surgery_id: str,
confirmation_id: str,
*,
chosen_label: str | None,
rejected: bool,
) -> None:
if surgery_id not in self._active:
raise SurgeryPipelineError(
"CONFIRMATION_NOT_ACTIVE",
"该手术当前不在进行中,无法提交确认。",
)
st = self._active[surgery_id].state
async with st.lock:
pending = st.pending_by_id.get(confirmation_id)
if pending is None:
raise SurgeryPipelineError(
"CONFIRMATION_NOT_FOUND",
"未找到该待确认项或已处理。",
)
if pending.status != "pending":
raise SurgeryPipelineError(
"CONFIRMATION_ALREADY_RESOLVED",
"该待确认项已处理。",
)
if rejected and chosen_label:
raise SurgeryPipelineError(
"CONFIRMATION_INVALID",
"拒绝确认时不应同时提供 chosen_label。",
)
if not rejected and not chosen_label:
raise SurgeryPipelineError(
"CONFIRMATION_INVALID",
"请提供 chosen_label 或设置 rejected=true。",
)
allowed = {lbl.strip() for lbl, _ in pending.options if lbl.strip()}
if rejected:
pending.status = "rejected"
else:
label = chosen_label.strip() if chosen_label else ""
if label not in allowed:
raise SurgeryPipelineError(
"CONFIRMATION_INVALID",
f"所选耗材不在候选列表中:{chosen_label!r}",
)
pending.status = "confirmed"
self._append_confirmed_detail_locked(
state=st,
item_id=label,
item_name=label,
doctor_id=self._s.video_voice_confirm_doctor_id,
source="voice",
)
try:
idx = st.pending_fifo.index(confirmation_id)
st.pending_fifo.pop(idx)
except ValueError:
pass
st.pending_by_id.pop(confirmation_id, None)
def _append_confirmed_detail_locked(
self,
*,
state: SurgerySessionState,
item_id: str,
item_name: str,
doctor_id: str,
source: str,
) -> None:
"""在已持有 `state.lock` 时追加一条消耗明细。"""
now_m = time.monotonic()
cooldown = self._s.video_detail_cooldown_sec
prev = state.last_detail_monotonic.get(item_id)
if prev is not None and (now_m - prev) < cooldown:
return
state.last_detail_monotonic[item_id] = now_m
state.details.append(
SurgeryConsumptionDetail(
item_id=item_id,
item_name=item_name,
quantity=1,
doctor_id=doctor_id,
timestamp=datetime.now(timezone.utc),
source=source,
)
)
async def _append_confirmed_detail(
self,
*,
state: SurgerySessionState,
item_id: str,
item_name: str,
doctor_id: str,
source: str,
) -> None:
async with state.lock:
self._append_confirmed_detail_locked(
state=state,
item_id=item_id,
item_name=item_name,
doctor_id=doctor_id,
source=source,
)
async def _camera_worker(
self,
*,
surgery_id: str,
camera_id: str,
stream_ready: asyncio.Event,
stop_event: asyncio.Event,
state: SurgerySessionState,
) -> None:
kind = self._resolver.backend_for_camera(camera_id)
cap: RtspCapture | None = None
hik_user_id: int | None = None
hik_init_retained = False
url: str | None = None
consecutive_failures = 0
first_ready = True
try:
url, hik_user_id, hik_init_retained = await self._resolve_rtsp_url(
camera_id=camera_id,
kind=kind,
)
assert url is not None
last_infer = 0.0
while not stop_event.is_set():
if cap is None:
try:
cap = RtspCapture(url, open_timeout_sec=self._s.video_open_timeout_sec)
await asyncio.to_thread(cap.open)
consecutive_failures = 0
if first_ready:
stream_ready.set()
first_ready = False
logger.info(
"RTSP stream opened camera={} surgery={}",
camera_id,
surgery_id,
)
except Exception as exc:
logger.warning(
"RTSP open failed camera={} surgery={}: {}",
camera_id,
surgery_id,
exc,
)
if cap is not None:
await asyncio.to_thread(cap.release)
cap = None
await asyncio.sleep(self._s.video_reconnect_backoff_seconds)
continue
ok, frame = await asyncio.to_thread(cap.read)
if not ok or frame is None:
consecutive_failures += 1
if consecutive_failures >= self._s.video_read_failure_reconnect_threshold:
logger.warning(
"RTSP reconnect camera={} surgery={} after {} read failures",
camera_id,
surgery_id,
consecutive_failures,
)
await asyncio.to_thread(cap.release)
cap = None
consecutive_failures = 0
await asyncio.sleep(self._s.video_reconnect_backoff_seconds)
else:
await asyncio.sleep(0.05)
continue
consecutive_failures = 0
now = time.monotonic()
if now - last_infer < self._s.video_inference_interval_sec:
await asyncio.sleep(0.01)
continue
last_infer = now
try:
jpeg = await asyncio.to_thread(
frame_to_jpeg_bytes,
frame,
quality=self._s.video_jpeg_quality,
)
cls_res = await self._classifier.predict_image_bytes(jpeg)
tear_res = await self._tear.predict_image_bytes(jpeg)
except Exception as exc:
logger.debug(
"Inference skip camera={} surgery={}: {}",
camera_id,
surgery_id,
exc,
)
continue
await self._handle_classification_result(
state=state,
cls_res=cls_res,
tear_label=tear_res.label,
)
finally:
if cap is not None:
await asyncio.to_thread(cap.release)
if hik_user_id is not None and self._hik is not None:
await asyncio.to_thread(self._hik.logout, hik_user_id)
if hik_init_retained and self._hik is not None:
HikvisionInitRefCount.release(self._hik)
async def _handle_classification_result(
self,
*,
state: SurgerySessionState,
cls_res: PredictionResult,
tear_label: str,
) -> None:
_ = tear_label
conf = cls_res.confidence
label = (cls_res.label or "").strip()
voice_floor = self._s.video_voice_confirm_min_confidence
if conf < voice_floor:
return
cand_order = [c.strip() for c in state.candidate_consumables if c.strip()]
if not cand_order:
return
cand_set = set(cand_order)
ranked = _rank_topk_for_candidates(cls_res.topk, cand_order)
auto_th = self._s.video_auto_confirm_confidence
def in_allowed(name: str) -> bool:
return name in cand_set
if conf >= auto_th and in_allowed(label):
await self._append_confirmed_detail(
state=state,
item_id=label or "unknown",
item_name=label or "unknown",
doctor_id=self._s.video_result_doctor_id,
source="vision",
)
return
if conf >= auto_th and not in_allowed(label):
if ranked and self._s.voice_confirmation_enabled:
await self._maybe_enqueue_pending_confirmation(
state, ranked, top_key=label, top_confidence=conf
)
return
if not self._s.voice_confirmation_enabled:
return
if ranked:
await self._maybe_enqueue_pending_confirmation(
state, ranked, top_key=label, top_confidence=conf
)
elif in_allowed(label):
await self._maybe_enqueue_pending_confirmation(
state,
[PredictionCandidate(label=label, confidence=conf)],
top_key=label,
top_confidence=conf,
)
async def _maybe_enqueue_pending_confirmation(
self,
state: SurgerySessionState,
ranked: list[PredictionCandidate],
*,
top_key: str,
top_confidence: float,
) -> None:
opts = [(c.label.strip(), float(c.confidence)) for c in ranked if c.label.strip()]
if not opts:
return
now_m = time.monotonic()
cooldown = self._s.video_detail_cooldown_sec
dedupe_key = f"pending_confirm:{top_key}:{opts[0][0]}"
async with state.lock:
prev = state.last_detail_monotonic.get(dedupe_key)
if prev is not None and (now_m - prev) < cooldown:
return
state.last_detail_monotonic[dedupe_key] = now_m
confirm_id = str(uuid.uuid4())
prompt = build_prompt_text(opts)
pending = PendingConsumableConfirmation(
id=confirm_id,
status="pending",
options=list(opts),
prompt_text=prompt,
created_at=datetime.now(timezone.utc),
model_top1_label=top_key,
model_top1_confidence=top_confidence,
)
state.pending_by_id[confirm_id] = pending
state.pending_fifo.append(confirm_id)
state.last_pending_prompt_snippet = prompt[:200]
logger.info(
"Enqueued pending consumable confirmation id={} top_key={}",
confirm_id,
top_key,
)
async def _resolve_rtsp_url(
self,
*,
camera_id: str,
kind: VideoBackendKind,
) -> tuple[str, int | None, bool]:
"""Returns (url, hikvision_user_id, whether NET_DVR_Init refcount was retained)."""
if kind != VideoBackendKind.HIKVISION_SDK:
return self._resolver.rtsp_url_for_camera(camera_id), None, False
if self._hik is None:
if self._s.hikvision_sdk_fallback_to_rtsp:
logger.warning(
"Hikvision SDK not loaded; fallback to RTSP for camera {}",
camera_id,
)
return self._resolver.rtsp_url_for_camera(camera_id), None, False
raise RuntimeError("Hikvision SDK requested but libhcnetsdk.so not loaded")
if not (
self._s.hikvision_device_ip.strip()
and self._s.hikvision_user.strip()
and self._s.hikvision_password.strip()
):
if self._s.hikvision_sdk_fallback_to_rtsp:
logger.warning(
"Hikvision credentials incomplete; fallback to RTSP for camera {}",
camera_id,
)
return self._resolver.rtsp_url_for_camera(camera_id), None, False
raise RuntimeError("Hikvision SDK requires HIKVISION_DEVICE_IP, user, password")
HikvisionInitRefCount.retain(self._hik)
try:
login = await asyncio.to_thread(
lambda: self._hik.login_v30(
ip=self._s.hikvision_device_ip.strip(),
port=int(self._s.hikvision_device_port),
username=self._s.hikvision_user.strip(),
password=self._s.hikvision_password.strip(),
)
)
except Exception as exc:
HikvisionInitRefCount.release(self._hik)
if self._s.hikvision_sdk_fallback_to_rtsp:
logger.warning("Hikvision login failed ({}); fallback to RTSP", exc)
return self._resolver.rtsp_url_for_camera(camera_id), None, False
raise
url = self._resolver.rtsp_url_after_hikvision_login(camera_id)
return url, login.user_id, True