feat: surgery pipeline API, video inference, voice confirm, and tests

- Add FastAPI routes for surgery start/end, results, pending confirmation (WAV upload), and health checks.
- Implement RTSP/Hikvision capture, consumable classification, session manager, MinIO/Baidu voice resolution, and DB persistence.
- Add documentation (client API, video backends, staging checklist) and sample camera/RTSP config.
- Add pytest suite (API contract, session manager, voice, repositories, pipeline persistence) and httpx dev dependency.
- Replace deprecated HTTP_422_UNPROCESSABLE_ENTITY with HTTP_422_UNPROCESSABLE_CONTENT.
- Fix SurgeryPipeline DB reads to use an explicit transaction with autobegin disabled.

Made-with: Cursor
This commit is contained in:
Kevin
2026-04-21 18:33:54 +08:00
parent d1a3d029ec
commit 04866559db
56 changed files with 7196 additions and 43 deletions

View File

@@ -0,0 +1,5 @@
"""Video capture backends: RTSP (OpenCV) and optional Hikvision HCNetSDK (Linux .so)."""
from app.services.video.session_manager import CameraSessionManager
__all__ = ["CameraSessionManager"]

View File

@@ -0,0 +1,103 @@
from __future__ import annotations
import json
from typing import Any
from loguru import logger
from app.config import Settings
from app.services.video.hikvision_runtime import HikvisionRuntime
from app.services.video.types import VideoBackendKind
class BackendResolver:
"""Resolve per-camera backend (RTSP vs Hikvision SDK) and RTSP URL."""
def __init__(
self,
settings: Settings,
*,
hikvision_runtime: HikvisionRuntime | None,
) -> None:
self._s = settings
self._hik = hikvision_runtime
self._rtsp_urls_map = settings.video_rtsp_url_map()
def _parse_json_object(self, raw: str) -> dict[str, Any]:
raw = (raw or "").strip()
if not raw:
return {}
try:
data = json.loads(raw)
except json.JSONDecodeError as exc:
raise ValueError(f"Invalid JSON mapping: {exc}") from exc
if not isinstance(data, dict):
raise ValueError("JSON mapping must be an object")
return {str(k): v for k, v in data.items()}
def backend_for_camera(self, camera_id: str) -> VideoBackendKind:
overrides = self._parse_json_object(self._s.video_camera_backend_overrides_json)
if camera_id in overrides:
v = str(overrides[camera_id]).lower()
if v in ("rtsp", "hikvision_sdk", "sdk"):
return (
VideoBackendKind.HIKVISION_SDK
if v in ("hikvision_sdk", "sdk")
else VideoBackendKind.RTSP
)
default = self._s.video_default_backend.strip().lower()
if default in ("auto", ""):
if self._hik is not None and self._s.hikvision_sdk_enabled:
return VideoBackendKind.HIKVISION_SDK
return VideoBackendKind.RTSP
if default in ("hikvision_sdk", "sdk"):
return VideoBackendKind.HIKVISION_SDK
return VideoBackendKind.RTSP
def rtsp_url_for_camera(self, camera_id: str) -> str:
if camera_id in self._rtsp_urls_map:
return self._rtsp_urls_map[camera_id]
tpl = (self._s.video_rtsp_url_template or "").strip()
if tpl:
try:
return tpl.format(camera_id=camera_id)
except KeyError as exc:
raise ValueError(
f"video_rtsp_url_template missing placeholder: {exc}"
) from exc
raise ValueError(
f"No RTSP URL for camera_id={camera_id!r}: set VIDEO_RTSP_URLS_JSON_FILE, "
f"VIDEO_RTSP_URLS_JSON, or VIDEO_RTSP_URL_TEMPLATE"
)
def rtsp_url_after_hikvision_login(self, camera_id: str) -> str:
"""RTSP URL used after SDK login (often same as device preview URL)."""
urls = self._parse_json_object(self._s.hikvision_camera_rtsp_urls_json)
if camera_id in urls:
return str(urls[camera_id])
tpl = (self._s.hikvision_preview_rtsp_template or "").strip()
if not tpl:
logger.warning(
"Hikvision backend without HIKVISION_PREVIEW_RTSP_TEMPLATE / "
"HIKVISION_CAMERA_RTSP_URLS_JSON — falling back to generic RTSP map"
)
return self.rtsp_url_for_camera(camera_id)
return self._format_hikvision_rtsp(tpl, camera_id)
def _format_hikvision_rtsp(self, template: str, camera_id: str) -> str:
ip = self._s.hikvision_device_ip.strip()
user = self._s.hikvision_user.strip()
password = self._s.hikvision_password.strip()
channel = self._s.hikvision_channel
try:
return template.format(
camera_id=camera_id,
ip=ip,
user=user,
password=password,
channel=channel,
)
except KeyError as exc:
raise ValueError(
f"hikvision_preview_rtsp_template missing key: {exc}"
) from exc

View File

@@ -0,0 +1,13 @@
from __future__ import annotations
import cv2
import numpy as np
def frame_to_jpeg_bytes(frame: np.ndarray, *, quality: int = 85) -> bytes:
"""Encode BGR frame to JPEG bytes for model services expecting image bytes."""
params = [int(cv2.IMWRITE_JPEG_QUALITY), int(quality)]
ok, buf = cv2.imencode(".jpg", frame, params)
if not ok or buf is None:
raise RuntimeError("cv2.imencode failed for JPEG")
return buf.tobytes()

View File

@@ -0,0 +1,157 @@
from __future__ import annotations
import os
import threading
from ctypes import (
CDLL,
POINTER,
RTLD_GLOBAL,
byref,
c_byte,
c_char_p,
c_int,
c_uint16,
)
from ctypes import Structure
from dataclasses import dataclass
from pathlib import Path
from loguru import logger
@dataclass
class HikvisionLoginResult:
user_id: int
device_info_raw: bytes
class NET_DVR_DEVICEINFO_V30(Structure):
"""Opaque device info buffer for NET_DVR_Login_V30 (layout varies by SDK version)."""
_fields_ = [("data", c_byte * 512)]
@dataclass
class HikvisionRuntime:
"""Loaded HCNetSDK with Init/Login/Logout/Cleanup."""
lib: CDLL
_inited: bool = False
@classmethod
def try_load(cls, lib_dir: str | None) -> HikvisionRuntime | None:
candidates: list[Path] = []
if lib_dir and lib_dir.strip():
base = Path(lib_dir).expanduser()
candidates.append(base / "libhcnetsdk.so")
candidates.append(base / "libHCNetSDK.so")
env_path = os.environ.get("HIKVISION_LIB_PATH", "").strip()
if env_path:
candidates.append(Path(env_path).expanduser())
candidates.append(Path("/opt/hikvision/lib/libhcnetsdk.so"))
candidates.append(Path("/opt/hikvision/lib/libHCNetSDK.so"))
for path in candidates:
if path.is_file():
try:
lib = CDLL(str(path), mode=RTLD_GLOBAL)
logger.info("Loaded Hikvision SDK: {}", path)
return cls(lib=lib)
except OSError as exc:
logger.warning("Failed CDLL {}: {}", path, exc)
return None
def init(self) -> None:
if self._inited:
return
fn = getattr(self.lib, "NET_DVR_Init", None)
if fn is None:
raise RuntimeError("NET_DVR_Init not found in HCNetSDK")
fn.restype = c_int
fn.argtypes = []
ret = int(fn())
if ret == 0:
raise RuntimeError("NET_DVR_Init returned false")
self._inited = True
def cleanup(self) -> None:
fn = getattr(self.lib, "NET_DVR_Cleanup", None)
if fn is None:
self._inited = False
return
fn.restype = None
fn.argtypes = []
fn()
self._inited = False
def login_v30(
self,
*,
ip: str,
port: int,
username: str,
password: str,
) -> HikvisionLoginResult:
login = getattr(self.lib, "NET_DVR_Login_V30", None)
if login is None:
raise RuntimeError("NET_DVR_Login_V30 not found in HCNetSDK")
login.restype = c_int
login.argtypes = [
c_char_p,
c_uint16,
c_char_p,
c_char_p,
POINTER(NET_DVR_DEVICEINFO_V30),
]
dev = NET_DVR_DEVICEINFO_V30()
user_id = login(
ip.encode("utf-8"),
c_uint16(port),
username.encode("utf-8"),
password.encode("utf-8"),
byref(dev),
)
if user_id < 0:
err = self._last_error()
raise RuntimeError(f"NET_DVR_Login_V30 failed (user_id={user_id}, err={err})")
raw = bytes(dev.data)
return HikvisionLoginResult(user_id=int(user_id), device_info_raw=raw)
def logout(self, user_id: int) -> None:
fn = getattr(self.lib, "NET_DVR_Logout", None)
if fn is None:
return
fn.restype = c_int
fn.argtypes = [c_int]
fn(c_int(user_id))
def _last_error(self) -> str:
get_err = getattr(self.lib, "NET_DVR_GetLastError", None)
if get_err is None:
return "unknown"
get_err.restype = c_int
get_err.argtypes = []
return str(int(get_err()))
class HikvisionInitRefCount:
"""Process-wide NET_DVR_Init / NET_DVR_Cleanup pairing."""
_lock = threading.Lock()
_count = 0
@classmethod
def retain(cls, rt: HikvisionRuntime) -> None:
with cls._lock:
cls._count += 1
if cls._count == 1:
rt.init()
@classmethod
def release(cls, rt: HikvisionRuntime) -> None:
with cls._lock:
if cls._count <= 0:
return
cls._count -= 1
if cls._count == 0:
rt.cleanup()

View File

@@ -0,0 +1,56 @@
from __future__ import annotations
import time
from dataclasses import dataclass
from typing import Any
import cv2
import numpy as np
from loguru import logger
@dataclass
class RtspCapture:
"""Thin OpenCV RTSP wrapper (blocking). Use from asyncio via to_thread."""
url: str
open_timeout_sec: float
def __post_init__(self) -> None:
self._cap: cv2.VideoCapture | None = None
def open(self) -> None:
self._cap = cv2.VideoCapture(self.url, cv2.CAP_FFMPEG)
if not self._cap.isOpened():
raise RuntimeError(f"RTSP open failed (isOpened=False): {self.url!r}")
# Reduce internal buffering where supported
try:
self._cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)
except Exception:
pass
deadline = time.monotonic() + self.open_timeout_sec
while time.monotonic() < deadline:
ok, frame = self._cap.read()
if ok and frame is not None:
return
time.sleep(0.05)
raise TimeoutError(
f"RTSP first frame timeout after {self.open_timeout_sec}s: {self.url!r}"
)
def read(self) -> tuple[bool, np.ndarray | None]:
if self._cap is None:
return False, None
return self._cap.read()
def release(self) -> None:
if self._cap is not None:
try:
self._cap.release()
except Exception as exc:
logger.debug("VideoCapture.release: {}", exc)
self._cap = None
@property
def cap(self) -> Any:
return self._cap

View File

@@ -0,0 +1,762 @@
from __future__ import annotations
import asyncio
import time
import uuid
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Literal
from loguru import logger
from app.config import Settings
from app.database import AsyncSessionLocal
from app.repositories.surgery_results import SurgeryResultRepository
from app.schemas import SurgeryConsumptionDetail
from app.services.consumable_classifier import (
ConsumableClassifierService,
PredictionCandidate,
PredictionResult,
)
from app.services.tear_action import TearActionService
from app.services.video.backend_resolver import BackendResolver
from app.services.video.frame_encode import frame_to_jpeg_bytes
from app.services.video.hikvision_runtime import HikvisionInitRefCount, HikvisionRuntime
from app.services.video.rtsp_capture import RtspCapture
from app.services.video.types import VideoBackendKind
from app.services.voice_confirm import build_prompt_text
from app.surgery_errors import SurgeryPipelineError
@dataclass
class PendingConsumableConfirmation:
"""待客户端确认的一条低置信度识别(不阻塞后续帧推理)。"""
id: str
status: Literal["pending", "confirmed", "rejected"]
options: list[tuple[str, float]]
prompt_text: str
created_at: datetime
model_top1_label: str
model_top1_confidence: float
@dataclass
class SurgerySessionState:
candidate_consumables: list[str]
details: list[SurgeryConsumptionDetail] = field(default_factory=list)
lock: asyncio.Lock = field(default_factory=asyncio.Lock)
ready: asyncio.Event = field(default_factory=asyncio.Event)
last_detail_monotonic: dict[str, float] = field(default_factory=dict)
#: 仅含 status=pending 的确认任务 idFIFO。
pending_fifo: list[str] = field(default_factory=list)
pending_by_id: dict[str, PendingConsumableConfirmation] = field(default_factory=dict)
last_pending_prompt_snippet: str | None = None
#: 最近一次语音确认 ASR 文本(成功识别时写入)。
last_asr_text: str | None = None
#: 最近一次语音确认错误说明ASR/解析失败等)。
last_voice_error: str | None = None
@dataclass
class RunningSurgery:
stop_event: asyncio.Event
state: SurgerySessionState
tasks: list[asyncio.Task[None]]
@dataclass
class ArchivedSurgery:
details: list[SurgeryConsumptionDetail]
def _rank_topk_for_candidates(
topk: list[PredictionCandidate],
ordered_candidates: list[str],
*,
limit: int = 5,
) -> list[PredictionCandidate]:
if not topk:
return []
stripped_order = [c.strip() for c in ordered_candidates if c.strip()]
if not stripped_order:
return topk[:limit]
order_index = {name: i for i, name in enumerate(stripped_order)}
picked = [c for c in topk if c.label.strip() in order_index]
picked.sort(key=lambda c: order_index[c.label.strip()])
return picked[:limit]
class CameraSessionManager:
"""Per-surgery camera streams, RTSP + optional Hikvision SDK login, inference, client-side human confirm."""
def __init__(
self,
*,
settings: Settings,
consumable_classifier: ConsumableClassifierService,
tear_action: TearActionService,
hikvision_runtime: HikvisionRuntime | None,
result_repository: SurgeryResultRepository | None = None,
) -> None:
self._s = settings
self._classifier = consumable_classifier
self._tear = tear_action
self._hik = hikvision_runtime
self._repo = result_repository
self._resolver = BackendResolver(settings, hikvision_runtime=hikvision_runtime)
self._active: dict[str, RunningSurgery] = {}
self._archive: dict[str, ArchivedSurgery] = {}
self._manager_lock = asyncio.Lock()
self._retry_task: asyncio.Task[None] | None = None
self._retry_stop = asyncio.Event()
async def start_archive_retry_loop(self) -> None:
if self._retry_task is not None and not self._retry_task.done():
return
self._retry_stop.clear()
self._retry_task = asyncio.create_task(
self._archive_persist_retry_loop(),
name="archive_persist_retry",
)
async def shutdown(self) -> None:
self._retry_stop.set()
if self._retry_task is not None:
self._retry_task.cancel()
try:
await self._retry_task
except asyncio.CancelledError:
pass
except Exception as exc:
logger.debug("retry task shutdown: {}", exc)
self._retry_task = None
async with self._manager_lock:
ids = list(self._active.keys())
for sid in ids:
try:
await self.stop_surgery(sid, require_active=False)
except Exception as exc:
logger.warning("shutdown stop_surgery {}: {}", sid, exc)
async def _archive_persist_retry_loop(self) -> None:
while not self._retry_stop.is_set():
try:
await asyncio.wait_for(
self._retry_stop.wait(),
timeout=self._s.archive_persist_retry_interval_seconds,
)
break
except TimeoutError:
pass
ids = list(self._archive.keys())
for sid in ids:
if self._retry_stop.is_set():
break
await self._try_persist_archive(sid)
async def _try_persist_archive(self, surgery_id: str) -> bool:
if self._repo is None:
return False
async with self._manager_lock:
arch = self._archive.get(surgery_id)
if arch is None:
return True
try:
async with AsyncSessionLocal() as session:
async with session.begin():
await self._repo.save_final_result(
session,
surgery_id=surgery_id,
details=list(arch.details),
)
except Exception as exc:
logger.warning(
"Archive persist retry failed surgery_id={}: {}",
surgery_id,
exc,
)
return False
async with self._manager_lock:
self._archive.pop(surgery_id, None)
logger.info("Archive persisted after retry surgery_id={}", surgery_id)
return True
async def start_surgery(
self,
surgery_id: str,
camera_ids: list[str],
candidate_consumables: list[str],
) -> None:
stale_archive: ArchivedSurgery | None = None
async with self._manager_lock:
if surgery_id in self._active:
raise SurgeryPipelineError(
"RECORDING_CANNOT_START",
"该手术已在录制中,请勿重复开始。",
)
if surgery_id in self._archive:
logger.warning(
"surgery_id={} 仍有未落库归档,尝试写入数据库后再开始新会话",
surgery_id,
)
stale_archive = self._archive.pop(surgery_id)
if stale_archive is not None:
if self._repo is None:
logger.error(
"surgery_id={} 有内存归档但未配置数据库仓库,无法持久化;"
"开始新会话将丢弃该归档(仅开发/无库模式)",
surgery_id,
)
else:
ok = await self._persist_archived_details(
surgery_id, list(stale_archive.details)
)
if not ok:
async with self._manager_lock:
self._archive[surgery_id] = stale_archive
raise SurgeryPipelineError(
"RECORDING_CANNOT_START",
"该手术号存在尚未写入数据库的历史结果,请修复数据库或等待自动重试成功后再开始。",
)
state = SurgerySessionState(
candidate_consumables=list(candidate_consumables),
)
stop_event = asyncio.Event()
readies = [asyncio.Event() for _ in camera_ids]
tasks: list[asyncio.Task[None]] = []
open_timeout = self._s.video_open_timeout_sec + 5.0
for cam_id, ready in zip(camera_ids, readies, strict=True):
tasks.append(
asyncio.create_task(
self._camera_worker(
surgery_id=surgery_id,
camera_id=cam_id,
stream_ready=ready,
stop_event=stop_event,
state=state,
),
name=f"camera:{surgery_id}:{cam_id}",
)
)
run = RunningSurgery(stop_event=stop_event, state=state, tasks=tasks)
async with self._manager_lock:
self._active[surgery_id] = run
try:
await asyncio.wait_for(
asyncio.gather(*(r.wait() for r in readies)),
timeout=open_timeout,
)
state.ready.set()
except TimeoutError as exc:
logger.error(
"Surgery {} cameras not all ready within {}s",
surgery_id,
open_timeout,
)
await self.stop_surgery(surgery_id, require_active=True)
raise SurgeryPipelineError(
"RECORDING_CANNOT_START",
"开录未能确认:部分摄像头在超时内未成功拉到首帧。",
) from exc
except Exception:
await self.stop_surgery(surgery_id, require_active=True)
raise
async def _persist_archived_details(
self,
surgery_id: str,
details: list[SurgeryConsumptionDetail],
) -> bool:
if self._repo is None:
return True
try:
async with AsyncSessionLocal() as session:
async with session.begin():
await self._repo.save_final_result(
session,
surgery_id=surgery_id,
details=details,
)
except Exception as exc:
logger.exception(
"Persist archived surgery {} failed (will keep archive): {}",
surgery_id,
exc,
)
return False
return True
async def stop_surgery(self, surgery_id: str, *, require_active: bool = True) -> None:
async with self._manager_lock:
run = self._active.pop(surgery_id, None)
if run is None:
if require_active:
raise SurgeryPipelineError(
"RECORDING_NOT_STOPPED",
"停录未能完成:当前没有该手术的活跃录制会话。",
)
return
run.stop_event.set()
results = await asyncio.gather(*run.tasks, return_exceptions=True)
for res in results:
if isinstance(res, BaseException):
logger.warning("surgery task finished with error: {}", res)
details = list(run.state.details)
persisted = False
if self._repo is not None:
try:
async with AsyncSessionLocal() as session:
async with session.begin():
await self._repo.save_final_result(
session,
surgery_id=surgery_id,
details=details,
)
persisted = True
except Exception as exc:
logger.exception("Persist surgery {} failed: {}", surgery_id, exc)
async with self._manager_lock:
if not persisted:
self._archive[surgery_id] = ArchivedSurgery(details=details)
logger.error(
"Surgery {} final result kept in memory archive only; "
"background retry will attempt persist",
surgery_id,
)
def live_consumption_if_active(self, surgery_id: str) -> list[SurgeryConsumptionDetail] | None:
if surgery_id not in self._active:
return None
if not self._active[surgery_id].state.ready.is_set():
return None
rows = list(self._active[surgery_id].state.details)
if not rows:
return None
return rows
def archived_consumption_fallback(self, surgery_id: str) -> list[SurgeryConsumptionDetail] | None:
arch = self._archive.get(surgery_id)
if arch is None:
return None
return list(arch.details)
def voice_status(self, surgery_id: str) -> dict[str, object] | None:
if surgery_id not in self._active:
return None
st = self._active[surgery_id].state
return {
"surgery_id": surgery_id,
"voice_enabled": bool(self._s.voice_confirmation_enabled),
"pending_queue_approx": len(st.pending_fifo),
"last_prompt_snippet": st.last_pending_prompt_snippet,
"last_asr_text": st.last_asr_text,
"last_error": st.last_voice_error,
}
def record_voice_trace(
self,
surgery_id: str,
*,
asr_text: str | None,
error: str | None,
) -> None:
if surgery_id not in self._active:
return
st = self._active[surgery_id].state
st.last_asr_text = asr_text
st.last_voice_error = error
def get_pending_confirmation_by_id(
self,
surgery_id: str,
confirmation_id: str,
) -> PendingConsumableConfirmation | None:
if surgery_id not in self._active:
return None
p = self._active[surgery_id].state.pending_by_id.get(confirmation_id)
if p is None or p.status != "pending":
return None
return p
def next_pending_confirmation(
self, surgery_id: str
) -> PendingConsumableConfirmation | None:
if surgery_id not in self._active:
return None
st = self._active[surgery_id].state
for cid in st.pending_fifo:
p = st.pending_by_id.get(cid)
if p is not None and p.status == "pending":
return p
return None
async def resolve_pending_confirmation(
self,
surgery_id: str,
confirmation_id: str,
*,
chosen_label: str | None,
rejected: bool,
) -> None:
if surgery_id not in self._active:
raise SurgeryPipelineError(
"CONFIRMATION_NOT_ACTIVE",
"该手术当前不在进行中,无法提交确认。",
)
st = self._active[surgery_id].state
async with st.lock:
pending = st.pending_by_id.get(confirmation_id)
if pending is None:
raise SurgeryPipelineError(
"CONFIRMATION_NOT_FOUND",
"未找到该待确认项或已处理。",
)
if pending.status != "pending":
raise SurgeryPipelineError(
"CONFIRMATION_ALREADY_RESOLVED",
"该待确认项已处理。",
)
if rejected and chosen_label:
raise SurgeryPipelineError(
"CONFIRMATION_INVALID",
"拒绝确认时不应同时提供 chosen_label。",
)
if not rejected and not chosen_label:
raise SurgeryPipelineError(
"CONFIRMATION_INVALID",
"请提供 chosen_label 或设置 rejected=true。",
)
allowed = {lbl.strip() for lbl, _ in pending.options if lbl.strip()}
if rejected:
pending.status = "rejected"
else:
label = chosen_label.strip() if chosen_label else ""
if label not in allowed:
raise SurgeryPipelineError(
"CONFIRMATION_INVALID",
f"所选耗材不在候选列表中:{chosen_label!r}",
)
pending.status = "confirmed"
self._append_confirmed_detail_locked(
state=st,
item_id=label,
item_name=label,
doctor_id=self._s.video_voice_confirm_doctor_id,
source="voice",
)
try:
idx = st.pending_fifo.index(confirmation_id)
st.pending_fifo.pop(idx)
except ValueError:
pass
st.pending_by_id.pop(confirmation_id, None)
def _append_confirmed_detail_locked(
self,
*,
state: SurgerySessionState,
item_id: str,
item_name: str,
doctor_id: str,
source: str,
) -> None:
"""在已持有 `state.lock` 时追加一条消耗明细。"""
now_m = time.monotonic()
cooldown = self._s.video_detail_cooldown_sec
prev = state.last_detail_monotonic.get(item_id)
if prev is not None and (now_m - prev) < cooldown:
return
state.last_detail_monotonic[item_id] = now_m
state.details.append(
SurgeryConsumptionDetail(
item_id=item_id,
item_name=item_name,
quantity=1,
doctor_id=doctor_id,
timestamp=datetime.now(timezone.utc),
source=source,
)
)
async def _append_confirmed_detail(
self,
*,
state: SurgerySessionState,
item_id: str,
item_name: str,
doctor_id: str,
source: str,
) -> None:
async with state.lock:
self._append_confirmed_detail_locked(
state=state,
item_id=item_id,
item_name=item_name,
doctor_id=doctor_id,
source=source,
)
async def _camera_worker(
self,
*,
surgery_id: str,
camera_id: str,
stream_ready: asyncio.Event,
stop_event: asyncio.Event,
state: SurgerySessionState,
) -> None:
kind = self._resolver.backend_for_camera(camera_id)
cap: RtspCapture | None = None
hik_user_id: int | None = None
hik_init_retained = False
url: str | None = None
consecutive_failures = 0
first_ready = True
try:
url, hik_user_id, hik_init_retained = await self._resolve_rtsp_url(
camera_id=camera_id,
kind=kind,
)
assert url is not None
last_infer = 0.0
while not stop_event.is_set():
if cap is None:
try:
cap = RtspCapture(url, open_timeout_sec=self._s.video_open_timeout_sec)
await asyncio.to_thread(cap.open)
consecutive_failures = 0
if first_ready:
stream_ready.set()
first_ready = False
logger.info(
"RTSP stream opened camera={} surgery={}",
camera_id,
surgery_id,
)
except Exception as exc:
logger.warning(
"RTSP open failed camera={} surgery={}: {}",
camera_id,
surgery_id,
exc,
)
if cap is not None:
await asyncio.to_thread(cap.release)
cap = None
await asyncio.sleep(self._s.video_reconnect_backoff_seconds)
continue
ok, frame = await asyncio.to_thread(cap.read)
if not ok or frame is None:
consecutive_failures += 1
if consecutive_failures >= self._s.video_read_failure_reconnect_threshold:
logger.warning(
"RTSP reconnect camera={} surgery={} after {} read failures",
camera_id,
surgery_id,
consecutive_failures,
)
await asyncio.to_thread(cap.release)
cap = None
consecutive_failures = 0
await asyncio.sleep(self._s.video_reconnect_backoff_seconds)
else:
await asyncio.sleep(0.05)
continue
consecutive_failures = 0
now = time.monotonic()
if now - last_infer < self._s.video_inference_interval_sec:
await asyncio.sleep(0.01)
continue
last_infer = now
try:
jpeg = await asyncio.to_thread(
frame_to_jpeg_bytes,
frame,
quality=self._s.video_jpeg_quality,
)
cls_res = await self._classifier.predict_image_bytes(jpeg)
tear_res = await self._tear.predict_image_bytes(jpeg)
except Exception as exc:
logger.debug(
"Inference skip camera={} surgery={}: {}",
camera_id,
surgery_id,
exc,
)
continue
await self._handle_classification_result(
state=state,
cls_res=cls_res,
tear_label=tear_res.label,
)
finally:
if cap is not None:
await asyncio.to_thread(cap.release)
if hik_user_id is not None and self._hik is not None:
await asyncio.to_thread(self._hik.logout, hik_user_id)
if hik_init_retained and self._hik is not None:
HikvisionInitRefCount.release(self._hik)
async def _handle_classification_result(
self,
*,
state: SurgerySessionState,
cls_res: PredictionResult,
tear_label: str,
) -> None:
_ = tear_label
conf = cls_res.confidence
label = (cls_res.label or "").strip()
voice_floor = self._s.video_voice_confirm_min_confidence
if conf < voice_floor:
return
cand_order = [c.strip() for c in state.candidate_consumables if c.strip()]
if not cand_order:
return
cand_set = set(cand_order)
ranked = _rank_topk_for_candidates(cls_res.topk, cand_order)
auto_th = self._s.video_auto_confirm_confidence
def in_allowed(name: str) -> bool:
return name in cand_set
if conf >= auto_th and in_allowed(label):
await self._append_confirmed_detail(
state=state,
item_id=label or "unknown",
item_name=label or "unknown",
doctor_id=self._s.video_result_doctor_id,
source="vision",
)
return
if conf >= auto_th and not in_allowed(label):
if ranked and self._s.voice_confirmation_enabled:
await self._maybe_enqueue_pending_confirmation(
state, ranked, top_key=label, top_confidence=conf
)
return
if not self._s.voice_confirmation_enabled:
return
if ranked:
await self._maybe_enqueue_pending_confirmation(
state, ranked, top_key=label, top_confidence=conf
)
elif in_allowed(label):
await self._maybe_enqueue_pending_confirmation(
state,
[PredictionCandidate(label=label, confidence=conf)],
top_key=label,
top_confidence=conf,
)
async def _maybe_enqueue_pending_confirmation(
self,
state: SurgerySessionState,
ranked: list[PredictionCandidate],
*,
top_key: str,
top_confidence: float,
) -> None:
opts = [(c.label.strip(), float(c.confidence)) for c in ranked if c.label.strip()]
if not opts:
return
now_m = time.monotonic()
cooldown = self._s.video_detail_cooldown_sec
dedupe_key = f"pending_confirm:{top_key}:{opts[0][0]}"
async with state.lock:
prev = state.last_detail_monotonic.get(dedupe_key)
if prev is not None and (now_m - prev) < cooldown:
return
state.last_detail_monotonic[dedupe_key] = now_m
confirm_id = str(uuid.uuid4())
prompt = build_prompt_text(opts)
pending = PendingConsumableConfirmation(
id=confirm_id,
status="pending",
options=list(opts),
prompt_text=prompt,
created_at=datetime.now(timezone.utc),
model_top1_label=top_key,
model_top1_confidence=top_confidence,
)
state.pending_by_id[confirm_id] = pending
state.pending_fifo.append(confirm_id)
state.last_pending_prompt_snippet = prompt[:200]
logger.info(
"Enqueued pending consumable confirmation id={} top_key={}",
confirm_id,
top_key,
)
async def _resolve_rtsp_url(
self,
*,
camera_id: str,
kind: VideoBackendKind,
) -> tuple[str, int | None, bool]:
"""Returns (url, hikvision_user_id, whether NET_DVR_Init refcount was retained)."""
if kind != VideoBackendKind.HIKVISION_SDK:
return self._resolver.rtsp_url_for_camera(camera_id), None, False
if self._hik is None:
if self._s.hikvision_sdk_fallback_to_rtsp:
logger.warning(
"Hikvision SDK not loaded; fallback to RTSP for camera {}",
camera_id,
)
return self._resolver.rtsp_url_for_camera(camera_id), None, False
raise RuntimeError("Hikvision SDK requested but libhcnetsdk.so not loaded")
if not (
self._s.hikvision_device_ip.strip()
and self._s.hikvision_user.strip()
and self._s.hikvision_password.strip()
):
if self._s.hikvision_sdk_fallback_to_rtsp:
logger.warning(
"Hikvision credentials incomplete; fallback to RTSP for camera {}",
camera_id,
)
return self._resolver.rtsp_url_for_camera(camera_id), None, False
raise RuntimeError("Hikvision SDK requires HIKVISION_DEVICE_IP, user, password")
HikvisionInitRefCount.retain(self._hik)
try:
login = await asyncio.to_thread(
lambda: self._hik.login_v30(
ip=self._s.hikvision_device_ip.strip(),
port=int(self._s.hikvision_device_port),
username=self._s.hikvision_user.strip(),
password=self._s.hikvision_password.strip(),
)
)
except Exception as exc:
HikvisionInitRefCount.release(self._hik)
if self._s.hikvision_sdk_fallback_to_rtsp:
logger.warning("Hikvision login failed ({}); fallback to RTSP", exc)
return self._resolver.rtsp_url_for_camera(camera_id), None, False
raise
url = self._resolver.rtsp_url_after_hikvision_login(camera_id)
return url, login.user_id, True

View File

@@ -0,0 +1,19 @@
from __future__ import annotations
from enum import StrEnum
from typing import Protocol, runtime_checkable
class VideoBackendKind(StrEnum):
"""Which transport is used for a camera stream."""
RTSP = "rtsp"
HIKVISION_SDK = "hikvision_sdk"
@runtime_checkable
class StreamStopHandle(Protocol):
"""Handle returned after a stream is started; call to release resources."""
async def stop(self) -> None:
...