diff --git a/.dockerignore b/.dockerignore index 95dc54e..b83ddab 100644 --- a/.dockerignore +++ b/.dockerignore @@ -8,6 +8,8 @@ __pycache__ .ruff_cache .env .env.* +!.env.example +!.env.prod refs *.md .dockerignore diff --git a/.env.example b/.env.example index 36eaf89..17fb301 100644 --- a/.env.example +++ b/.env.example @@ -15,6 +15,8 @@ POSTGRES_PORT=35432 # --- HTTP(python -m main / 容器等入口)--- # SERVER_HOST=0.0.0.0 # SERVER_PORT=38080 +# 开发热重载(等价 uvicorn --reload,修改 *.py 后自动重启;生产勿开) +# UVICORN_RELOAD=true # --- Video:RTSP 与按路后端(须与客户端 camera_ids 一致)--- # VIDEO_DEFAULT_BACKEND=rtsp @@ -24,8 +26,7 @@ POSTGRES_PORT=35432 # OR_SITE_CONFIG_JSON_FILE=app/resources/or_site_config.sample.json # VIDEO_RTSP_URL_TEMPLATE=rtsp://user:pass@host:554/path/{camera_id} # -# 语音桌面客户端的「本机是哪一个 voice_terminal_id」不在此文件配置:系统级 voice_client.json -# 或界面填写(保存到用户目录,见 voice_confirmation_client/README.md)。 +# 语音终端 ID 在站点 JSON 的 voice_or_room_bindings 中配置;浏览器语音确认页(web/voice-confirmation)在页面内填写本机终端 ID。 # --- 海康 SDK(Linux x86_64;二进制勿提交仓库)--- # HIKVISION_LIB_DIR=/opt/hikvision/lib diff --git a/.gitignore b/.gitignore index fc3d562..e31f40a 100644 --- a/.gitignore +++ b/.gitignore @@ -22,6 +22,8 @@ logs/ # Demo 一键联调写入的 RTSP 映射(可被覆盖) scripts/demo_client/.runtime/ +# demo server --live 时由 YAML 生成的静态 labels(与 livereload 共用) +scripts/demo_client/labels.json # IDE / OS .idea/ diff --git a/app/api.py b/app/api.py index a4cbc8a..fcf3d97 100644 --- a/app/api.py +++ b/app/api.py @@ -42,6 +42,11 @@ from app.surgery_errors import SurgeryPipelineError router = APIRouter() +# 上传 WAV 后 ASR/解析失败:HTTP 200 + status=failed,待确认项仍留在 FIFO 队首,便于桌面端重试。 +_RECOVERABLE_VOICE_RESOLVE_CODES = frozenset( + {"VOICE_ASR_FAILED", "VOICE_TEXT_EMPTY", "VOICE_PARSE_FAILED"} +) + def _pipeline_error_detail(exc: SurgeryPipelineError, surgery_id: str) -> dict: d: dict = { @@ -367,7 +372,7 @@ async def get_surgery_result( tags=["client"], summary="拉取待确认耗材(含 TTS 音频)", description=( - "返回当前 FIFO 队首的一条低置信度识别;" + "返回当前 FIFO 队首的一条低置信度识别;`pending_queue_length` 为仍排队中的 pending 条数(含本条)。" "响应内 `prompt_audio_mp3_base64` 为与 `prompt_text` 一致的 MP3(Base64),客户端可直接解码播放。" "无待确认项时返回 404;提示文本为空为 422;未配置百度或 TTS 失败为 503(不返回空音频兜底)。" "医生确认后请使用 `POST .../resolve` 上传 WAV。" @@ -416,6 +421,7 @@ async def get_pending_consumable_confirmation( "multipart/form-data 上传单个 WAV 文件(字段名 `audio`)。" "服务端将音频存入 MinIO、调用百度 ASR 识别、解析候选项并完成确认。" "解析并确认后记一条消耗明细;若语音表示否认全部候选则不记消耗。" + "ASR/解析可重试失败时仍返回 HTTP 200,`status`=`failed`,队首待确认项不弹出,便于桌面端重试。" ), ) async def resolve_pending_consumable_confirmation( @@ -467,6 +473,21 @@ async def resolve_pending_consumable_confirmation( content_type=audio.content_type, ) except SurgeryPipelineError as exc: + if exc.code in _RECOVERABLE_VOICE_RESOLVE_CODES: + extra = exc.extra or {} + asr_txt = extra.get("asr_text") + akey = extra.get("audio_object_key") + return SurgeryPendingConfirmationResolveResponse( + surgery_id=surgery_id, + confirmation_id=confirmation_id, + status="failed", + message=exc.message, + resolved_label=None, + rejected=False, + asr_text=asr_txt if isinstance(asr_txt, str) else None, + audio_object_key=akey if isinstance(akey, str) else None, + error_code=exc.code, + ) _raise_confirmation_http(exc, surgery_id) return SurgeryPendingConfirmationResolveResponse( surgery_id=surgery_id, @@ -477,4 +498,5 @@ async def resolve_pending_consumable_confirmation( rejected=result.rejected, asr_text=result.asr_text, audio_object_key=result.audio_object_key, + error_code=None, ) diff --git a/app/config.py b/app/config.py index b67fddd..eaa0183 100644 --- a/app/config.py +++ b/app/config.py @@ -4,7 +4,7 @@ from pathlib import Path from urllib.parse import quote_plus from typing import Any, Literal -from pydantic import Field +from pydantic import AliasChoices, Field from pydantic_settings import BaseSettings, SettingsConfigDict from app.baked import algorithm as baked_algorithm @@ -111,6 +111,7 @@ class _ServerGroup(_SettingsGroup): _FIELDS = ( "server_host", "server_port", + "server_reload", ) _PACKAGE_DIR = Path(__file__).resolve().parent @@ -132,6 +133,11 @@ class Settings(BaseSettings): server_host: str = "0.0.0.0" server_port: int = Field(default=38080, ge=1, le=65535) + #: 开发用:等价 ``uvicorn --reload``,Python 代码变更时重载进程(勿在生产开启)。 + server_reload: bool = Field( + default=False, + validation_alias=AliasChoices("server_reload", "UVICORN_RELOAD"), + ) video_default_backend: Literal["rtsp", "hikvision_sdk", "auto"] = "rtsp" video_camera_backend_overrides_json: str = "" diff --git a/app/dependencies.py b/app/dependencies.py index 9628405..996fae9 100644 --- a/app/dependencies.py +++ b/app/dependencies.py @@ -98,7 +98,18 @@ def build_container( voice_confirmation=voice, session_factory=sf, ) - voice_hub = VoiceTerminalHub(s) + voice_hub = VoiceTerminalHub( + s, + pending_head_fetcher=pipeline.get_pending_confirmation_for_client, + ) + + async def _on_pending_queue_advanced(surgery_id: str) -> None: + tid = camera_mgr.get_voice_terminal_id_if_active(surgery_id) + if tid: + voice_hub.schedule_notify_pending_head(tid, surgery_id) + + voice.set_on_pending_queue_advanced(_on_pending_queue_advanced) + camera_mgr.set_voice_terminal_hub(voice_hub) return AppContainer( settings=s, consumable_vision_algorithm_service=vision, diff --git a/app/resources/or_site_config.sample.json b/app/resources/or_site_config.sample.json index b5220e5..ec414a0 100644 --- a/app/resources/or_site_config.sample.json +++ b/app/resources/or_site_config.sample.json @@ -1,14 +1,19 @@ { "video_rtsp_urls": { - "or-cam-01": "rtsp://127.0.0.1:18554/demo1" + "or-cam-01": "rtsp://admin:Aa183137@192.168.3.2:554/Streaming/Channels/101", + "or-cam-02": "rtsp://admin:Aa183137@192.168.3.3:554/Streaming/Channels/101", + "or-cam-03": "rtsp://admin:Aa183137@192.168.3.4:554/Streaming/Channels/101", + "or-cam-04": "rtsp://admin:Aa183137@192.168.3.5:554/Streaming/Channels/101" }, "voice_or_room_bindings": [ { "camera_ids": [ "or-cam-01", - "or-cam-02" + "or-cam-02", + "or-cam-03", + "or-cam-04" ], - "or_room_id": "OR-DEMO", + "or_room_id": "OR-TEST", "voice_terminal_id": "desktop-1" } ] diff --git a/app/schemas.py b/app/schemas.py index 72c8e17..ea7062f 100644 --- a/app/schemas.py +++ b/app/schemas.py @@ -15,7 +15,12 @@ class SurgeryStartRequest(BaseModel): json_schema_extra={ "example": { "surgery_id": "123456", - "camera_ids": ["or-cam-01", "or-cam-02"], + "camera_ids": [ + "or-cam-01", + "or-cam-02", + "or-cam-03", + "or-cam-04", + ], "candidate_consumables": ["纱布", "缝线", "止血钳"], } } @@ -164,6 +169,18 @@ class SurgeryPendingConfirmationResponse(BaseModel): surgery_id: str confirmation_id: str + pending_queue_length: int = Field( + ge=1, + description="本台手术待确认 FIFO 中仍为 pending 的条数(含本条),用于客户端按序播报。", + ) + pending_queue_position: int = Field( + ge=1, + description="本条在当前 FIFO 中的排队序号(1-based,队首为 1,与队尾相隔 pending_queue_length-1 条等待)。", + ) + pending_cumulative_ordinal: int = Field( + ge=1, + description="本场手术中待确认任务自入队以来的累计序号(第几条入队任务)。", + ) prompt_text: str = Field(description="可直接用于展示或无障碍朗读的话术(与 MP3 内容一致)。") prompt_audio_mp3_base64: str = Field( description=( @@ -181,8 +198,17 @@ class SurgeryPendingConfirmationResponse(BaseModel): class SurgeryPendingConfirmationResolveResponse(BaseModel): surgery_id: str confirmation_id: str - status: str = Field(description="accepted") + status: str = Field( + description=( + "``accepted``:已确认或已否认并结案;" + "``failed``:ASR/解析等可重试失败,队首待确认项未移除。" + ), + ) message: str + error_code: str | None = Field( + default=None, + description="仅 status=failed 时与错误码一致(如 VOICE_ASR_FAILED)。", + ) resolved_label: str | None = Field( default=None, description="解析并确认后的耗材名称;否认全部候选时为 null。", diff --git a/app/services/audio_wav.py b/app/services/audio_wav.py index e25cf89..5eace99 100644 --- a/app/services/audio_wav.py +++ b/app/services/audio_wav.py @@ -2,6 +2,7 @@ from __future__ import annotations +import array import io import shutil import subprocess @@ -9,12 +10,57 @@ import wave from typing import Final _BAIDU_RATE: Final[int] = 16000 +# 诊室麦克风常见音量偏小,百度短语音 3301「语音质量错误」多与有效幅度过低有关。 +_NORM_TARGET_PEAK: Final[int] = 12000 +_NORM_MAX_GAIN: Final[float] = 80.0 class WavDecodeError(ValueError): """Uploaded bytes are not a valid WAV or cannot be converted.""" +def pcm_s16le_to_wav_bytes(pcm: bytes, *, sample_rate: int = _BAIDU_RATE) -> bytes: + """将 raw s16le mono PCM 打成标准 WAV,供百度 ``format=wav`` 重试等场景。""" + if not pcm: + raise WavDecodeError("Empty PCM") + buf = io.BytesIO() + with wave.open(buf, "wb") as wf: + wf.setnchannels(1) + wf.setsampwidth(2) + wf.setframerate(sample_rate) + wf.writeframes(pcm) + return buf.getvalue() + + +def normalize_pcm_s16le_for_baidu(pcm: bytes) -> bytes: + """提升过弱信号幅度,降低 err_no=3301(speech quality)概率;已足够响的音频不改。""" + if len(pcm) < 2 or len(pcm) % 2 != 0: + return pcm + samples = array.array("h") + samples.frombytes(pcm) + if not samples: + return pcm + peak = 0 + for s in samples: + a = abs(int(s)) + if a > peak: + peak = a + if peak == 0 or peak >= _NORM_TARGET_PEAK: + return pcm + scale = min(_NORM_MAX_GAIN, float(_NORM_TARGET_PEAK) / float(peak)) + if scale <= 1.0: + return pcm + out = array.array("h") + for s in samples: + v = int(round(float(s) * scale)) + if v > 32767: + v = 32767 + elif v < -32768: + v = -32768 + out.append(v) + return out.tobytes() + + def wav_bytes_to_pcm16k_mono_s16le(wav_bytes: bytes) -> bytes: """ Prefer ffmpeg for arbitrary channel count / sample rate. @@ -57,7 +103,7 @@ def _ffmpeg_to_pcm16k(wav_bytes: bytes, ffmpeg: str) -> bytes: raise WavDecodeError(f"ffmpeg wav decode failed: {err or proc.returncode}") if not proc.stdout: raise WavDecodeError("ffmpeg produced empty PCM") - return proc.stdout + return normalize_pcm_s16le_for_baidu(proc.stdout) def _stdlib_wave_to_pcm16k(wav_bytes: bytes) -> bytes: @@ -96,6 +142,6 @@ def _stdlib_wave_to_pcm16k(wav_bytes: bytes) -> bytes: l_s, r_s = struct.unpack(" dict[str, Any]: + """先按 raw PCM 识别;若返回 err_no=3301(语音质量错误),再用 WAV 封装重试一次。 + + 部分环境下 PCM 与 WAV 路径对边界样本表现不一致,重试可提高成功率。 + """ + r = self.asr(pcm_s16le, "pcm", rate, options) + if not isinstance(r, dict): + return r + if r.get("err_no") != 3301: + return r + if len(pcm_s16le) < 1000: + return r + try: + wav = pcm_s16le_to_wav_bytes(pcm_s16le, sample_rate=rate) + except Exception: + return r + r2 = self.asr(wav, "wav", rate, options) + return r2 if isinstance(r2, dict) else r + def synthesis( self, text: str, diff --git a/app/services/surgery_pipeline.py b/app/services/surgery_pipeline.py index f243b9e..b1ccbf8 100644 --- a/app/services/surgery_pipeline.py +++ b/app/services/surgery_pipeline.py @@ -111,6 +111,10 @@ class SurgeryPipeline: pending = self._sessions.next_pending_confirmation(surgery_id) if pending is None: return None + queue_len = self._sessions.pending_queue_pending_count(surgery_id) + qpos = self._sessions.pending_queue_position_1based(surgery_id, pending.id) + if qpos is None or qpos < 1: + qpos = 1 mp3 = await run_in_threadpool( self._voice.synthesize_prompt_to_mp3, pending.prompt_text, @@ -119,6 +123,9 @@ class SurgeryPipeline: return SurgeryPendingConfirmationResponse( surgery_id=surgery_id, confirmation_id=pending.id, + pending_queue_length=max(1, queue_len), + pending_queue_position=qpos, + pending_cumulative_ordinal=max(1, pending.enqueue_ordinal), prompt_text=pending.prompt_text, prompt_audio_mp3_base64=b64, options=[ diff --git a/app/services/video/classification_handler.py b/app/services/video/classification_handler.py index 012efb2..26d0577 100644 --- a/app/services/video/classification_handler.py +++ b/app/services/video/classification_handler.py @@ -14,6 +14,8 @@ from __future__ import annotations import time +from app.services.voice_terminal_hub import VoiceTerminalHub + from app.baked import pipeline as bp from app.services.consumable_vision_algorithm import ( PredictionCandidate, @@ -56,8 +58,13 @@ class VisionClassificationHandler: self, *, registry: SurgerySessionRegistry, + voice_terminal_hub: VoiceTerminalHub | None = None, ) -> None: self._registry = registry + self._voice_hub = voice_terminal_hub + + def attach_voice_terminal_hub(self, hub: VoiceTerminalHub | None) -> None: + self._voice_hub = hub def _append_vision_consumption_window_if_ready( self, @@ -212,3 +219,7 @@ class VisionClassificationHandler: confirmation_id=cid, doctor_id=bp.VIDEO_RESULT_DOCTOR_ID, ) + hub = self._voice_hub + vtid = (state.voice_terminal_id or "").strip() + if hub is not None and vtid and surgery_id: + hub.schedule_notify_pending_head(vtid, surgery_id) diff --git a/app/services/video/session_manager.py b/app/services/video/session_manager.py index ed5a0ae..4624638 100644 --- a/app/services/video/session_manager.py +++ b/app/services/video/session_manager.py @@ -22,6 +22,7 @@ from app.services.consumable_vision_algorithm import ( from app.services.video.archive_persister import ArchivePersister from app.services.video.backend_resolver import BackendResolver from app.services.video.classification_handler import VisionClassificationHandler +from app.services.voice_terminal_hub import VoiceTerminalHub from app.services.video.hikvision_runtime import HikvisionInitRefCount, HikvisionRuntime from app.services.video.inference_aggregator import WindowInferenceAggregator from app.services.video.session_registry import ( @@ -101,6 +102,16 @@ class CameraSessionManager: registry=self._registry, ) + def set_voice_terminal_hub(self, hub: VoiceTerminalHub | None) -> None: + self._classifier_handler.attach_voice_terminal_hub(hub) + + def get_voice_terminal_id_if_active(self, surgery_id: str) -> str | None: + run = self._registry.get_running(surgery_id) + if run is None: + return None + tid = (run.state.voice_terminal_id or "").strip() + return tid or None + # ------------------------------------------------------------------ # 生命周期 # ------------------------------------------------------------------ @@ -310,6 +321,16 @@ class CameraSessionManager: ) -> PendingConsumableConfirmation | None: return self._registry.next_pending_confirmation(surgery_id) + def pending_queue_pending_count(self, surgery_id: str) -> int: + return self._registry.pending_queue_pending_count(surgery_id) + + def pending_queue_position_1based( + self, surgery_id: str, confirmation_id: str + ) -> int | None: + return self._registry.pending_queue_position_1based( + surgery_id, confirmation_id + ) + async def resolve_pending_confirmation( self, surgery_id: str, diff --git a/app/services/video/session_registry.py b/app/services/video/session_registry.py index 7c3d1e2..bd96b0d 100644 --- a/app/services/video/session_registry.py +++ b/app/services/video/session_registry.py @@ -52,6 +52,8 @@ class PendingConsumableConfirmation: model_top1_confidence: float #: 本轮待确认在解析失败时累计次数(首败 + 重试),供 API 计算 retry_remaining。 voice_parse_failures: int = 0 + #: 本场手术中待确认任务入队时的累计序号(自 1 起,入队时递增)。 + enqueue_ordinal: int = 1 @dataclass @@ -87,6 +89,8 @@ class SurgerySessionState: surgery_started_wall: float | None = None #: 术间绑定配置解析出的语音桌面终端 ID;停录时用于推送 end。 voice_terminal_id: str | None = None + #: 待确认入队已分配到的最大序号(与 ``pending_by_id`` 中 ``enqueue_ordinal`` 一致递增)。 + pending_ordinal_next: int = 0 @dataclass @@ -203,6 +207,32 @@ class SurgerySessionRegistry: return p return None + def pending_queue_pending_count(self, surgery_id: str) -> int: + """FIFO 中仍为 pending 的条数(与 ``next_pending_confirmation`` 遍历规则一致)。""" + run = self._active.get(surgery_id) + if run is None: + return 0 + st = run.state + n = 0 + for cid in st.pending_fifo: + p = st.pending_by_id.get(cid) + if p is not None and p.status == "pending": + n += 1 + return n + + def pending_queue_position_1based( + self, surgery_id: str, confirmation_id: str + ) -> int | None: + """``confirmation_id`` 在当前 ``pending_fifo`` 中的 1-based 位置(队首为 1)。""" + run = self._active.get(surgery_id) + if run is None: + return None + st = run.state + try: + return st.pending_fifo.index(confirmation_id) + 1 + except ValueError: + return None + async def resolve_pending_confirmation( self, surgery_id: str, @@ -436,6 +466,8 @@ class SurgerySessionRegistry: return None state.last_detail_monotonic[dedupe_key] = now_m + state.pending_ordinal_next += 1 + ordinal = state.pending_ordinal_next confirm_id = str(uuid.uuid4()) prompt = build_prompt_text(opts) pending = PendingConsumableConfirmation( @@ -446,6 +478,7 @@ class SurgerySessionRegistry: created_at=datetime.now(timezone.utc), model_top1_label=top_key, model_top1_confidence=top_confidence, + enqueue_ordinal=ordinal, ) state.pending_by_id[confirm_id] = pending state.pending_fifo.append(confirm_id) diff --git a/app/services/voice_confirm.py b/app/services/voice_confirm.py index f4b824c..2278b17 100644 --- a/app/services/voice_confirm.py +++ b/app/services/voice_confirm.py @@ -183,7 +183,7 @@ def is_rejection_phrase(asr_text: str) -> bool: def build_prompt_text(options: list[tuple[str, float]]) -> str: - parts = ["请确认刚才使用的耗材是下面哪一项。"] + parts = ["请确认。"] for i, (name, _conf) in enumerate(options, start=1): parts.append(f"第{i}个,{name}。") return "".join(parts) diff --git a/app/services/voice_resolution.py b/app/services/voice_resolution.py index 7a194f5..ace00e8 100644 --- a/app/services/voice_resolution.py +++ b/app/services/voice_resolution.py @@ -7,6 +7,7 @@ from __future__ import annotations import json +from collections.abc import Awaitable, Callable from dataclasses import dataclass from fastapi.concurrency import run_in_threadpool @@ -52,6 +53,7 @@ class VoiceConfirmationService: audits: VoiceAuditRepository, session_factory: async_sessionmaker | None = None, audit_emitter: VoiceAuditEmitter | None = None, + on_pending_queue_advanced: Callable[[str], Awaitable[None]] | None = None, ) -> None: self._s = settings self._sessions = sessions @@ -64,6 +66,21 @@ class VoiceConfirmationService: audits=audits, session_factory=self._session_factory, ) + self._on_pending_queue_advanced = on_pending_queue_advanced + + def set_on_pending_queue_advanced( + self, cb: Callable[[str], Awaitable[None]] | None + ) -> None: + self._on_pending_queue_advanced = cb + + async def _notify_pending_queue_advanced(self, surgery_id: str) -> None: + cb = self._on_pending_queue_advanced + if cb is None: + return + try: + await cb(surgery_id) + except Exception as exc: + logger.warning("on_pending_queue_advanced 回调失败: {}", exc) # ------------------------------------------------------------------ # TTS:保持对外接口不变 @@ -240,6 +257,7 @@ class VoiceConfirmationService: chosen_label=chosen, rejected=rejected, ) + await self._notify_pending_queue_advanced(surgery_id) final_status = "rejected" if rejected else "recognized" await self._emitter.success( source="wav", @@ -344,6 +362,7 @@ class VoiceConfirmationService: chosen_label=chosen, rejected=rejected, ) + await self._notify_pending_queue_advanced(surgery_id) final_status = "rejected" if rejected else "recognized" await self._emitter.success( source="text", @@ -448,7 +467,9 @@ class VoiceConfirmationService: session_trace, ) -> object: try: - return await run_in_threadpool(self._baidu.asr, pcm, "pcm", 16000, None) + return await run_in_threadpool( + self._baidu.asr_16k_mono_pcm_or_wav_fallback, pcm + ) except BaiduSpeechNotConfiguredError as exc: raise await self._emitter.fail( source="wav", @@ -600,5 +621,6 @@ class VoiceConfirmationService: include_extra={ "confirmation_id": confirmation_id, "retry_remaining": retry_remaining, + "asr_text": text, }, ) diff --git a/app/services/voice_terminal_hub.py b/app/services/voice_terminal_hub.py index f2dac64..697acc9 100644 --- a/app/services/voice_terminal_hub.py +++ b/app/services/voice_terminal_hub.py @@ -1,11 +1,13 @@ -"""语音桌面终端:assignment 状态、WebSocket 推送与 HTTP 轮询兜底。""" +"""语音桌面终端:assignment 状态、WebSocket 推送与 HTTP 拉取兜底。""" from __future__ import annotations +import asyncio import json from asyncio import Lock from collections import defaultdict -from collections.abc import Callable +from collections.abc import Awaitable, Callable +from typing import Any from fastapi import WebSocket from loguru import logger @@ -14,6 +16,8 @@ from starlette.websockets import WebSocketDisconnect from app.config import Settings from app.services.voice_terminal_binding import VoiceTerminalBindingIndex +PendingHeadFetcher = Callable[[str], Awaitable[Any]] + async def assign_voice_terminal_after_recording_started( hub: VoiceTerminalHub, @@ -45,12 +49,18 @@ async def assign_voice_terminal_after_recording_started( class VoiceTerminalHub: """进程内终端连接与当前手术分配(多 worker 需另行同步)。""" - def __init__(self, settings: Settings) -> None: + def __init__( + self, + settings: Settings, + *, + pending_head_fetcher: PendingHeadFetcher | None = None, + ) -> None: cfg = settings.load_or_site_config() self._bindings = cfg.voice_bindings if cfg else None self._assignments: dict[str, str] = {} self._lock = Lock() self._connections: dict[str, set[WebSocket]] = defaultdict(set) + self._pending_head_fetcher = pending_head_fetcher @property def bindings(self) -> VoiceTerminalBindingIndex | None: @@ -81,6 +91,15 @@ class VoiceTerminalHub: tid, surgery_id, ) + self.schedule_notify_pending_head(tid, surgery_id) + + def schedule_notify_pending_head(self, terminal_id: str, surgery_id: str) -> None: + """异步推送队首(含 TTS),不阻塞调用方。""" + tid = terminal_id.strip() + sid = (surgery_id or "").strip() + if not tid or not sid: + return + asyncio.create_task(self._notify_pending_head_safe(tid, sid)) async def notify_end(self, terminal_id: str | None, surgery_id: str) -> None: if not terminal_id: @@ -103,6 +122,50 @@ class VoiceTerminalHub: surgery_id, ) + async def notify_pending_head(self, terminal_id: str, surgery_id: str) -> None: + """向终端推送当前 FIFO 队首(含 TTS),无队首时推送 voice_pending_empty。""" + fetcher = self._pending_head_fetcher + tid = terminal_id.strip() + sid = (surgery_id or "").strip() + if not fetcher or not tid or not sid: + return + try: + payload = await fetcher(sid) + except Exception as exc: + logger.warning( + "voice_pending 构建失败 surgery_id={} terminal_id={}: {}", + sid, + tid, + exc, + ) + return + if payload is None: + await self._broadcast( + tid, + {"type": "voice_pending_empty", "surgery_id": sid}, + ) + return + try: + data = payload.model_dump(mode="json") + except Exception as exc: + logger.warning("voice_pending 序列化失败 surgery_id={}: {}", sid, exc) + return + data["type"] = "voice_pending" + await self._broadcast(tid, data) + + async def _notify_pending_head_safe( + self, terminal_id: str, surgery_id: str + ) -> None: + try: + await self.notify_pending_head(terminal_id, surgery_id) + except Exception as exc: + logger.warning( + "后台 voice_pending 推送失败 terminal_id={} surgery_id={}: {}", + terminal_id, + surgery_id, + exc, + ) + async def handle_websocket(self, websocket: WebSocket, terminal_id: str) -> None: tid = terminal_id.strip() if not tid: @@ -125,6 +188,7 @@ class VoiceTerminalHub: ensure_ascii=False, ) ) + self.schedule_notify_pending_head(tid, sid) # 不能用 receive_text():桌面端 websocket-client 会发 ping/二进制控制帧, # ASGI 可能呈现为无 "text" 的 websocket.receive,receive_text 会 KeyError 并掐断连接。 while True: diff --git a/build_voice_confirmation_client.bat b/build_voice_confirmation_client.bat deleted file mode 100644 index 6195ad2..0000000 --- a/build_voice_confirmation_client.bat +++ /dev/null @@ -1,30 +0,0 @@ -@echo off -REM 在 Windows 上将语音确认客户端打成 PyInstaller 目录包(内含 .exe)。 -REM 需在仓库根目录双击运行,或在 cmd 中执行;首次会自动 uv sync。 -REM 可选:build_voice_confirmation_client.bat --clean (先清空 build、dist) - -setlocal EnableExtensions -cd /d "%~dp0" - -echo [1/2] uv sync --group voice-client-build -uv sync --group voice-client-build -if errorlevel 1 goto :failed - -echo [2/2] PyInstaller ^(voice_client.spec^) -if /i "%~1"=="--clean" ( - uv run --group voice-client-build python scripts\build_voice_client.py --clean -) else ( - uv run --group voice-client-build python scripts\build_voice_client.py -) -if errorlevel 1 goto :failed - -echo. -echo 完成。输出目录: %CD%\dist\voice-confirmation-client\ -echo 主程序: dist\voice-confirmation-client\voice-confirmation-client.exe -pause -exit /b 0 - -:failed -echo 构建失败(见上方日志)。 -pause -exit /b 1 diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index 11ddbfe..eb65970 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -1,5 +1,5 @@ # 本地仅起 PostgreSQL + MinIO;FastAPI 在宿主机跑(如 ./start.sh、uv run uvicorn)。 -# 基础镜像经 DaoCloud 公开镜像(大陆可访问):docker.io → docker.m.daocloud.io +# 基础镜像:DaoCloud「增加前缀」m.daocloud.io/docker.io/...(见 public-image-mirror README) # # docker compose -f docker-compose.dev.yml up -d # @@ -9,7 +9,7 @@ services: db: - image: docker.m.daocloud.io/library/postgres:16-alpine + image: m.daocloud.io/docker.io/library/postgres:16-alpine environment: POSTGRES_USER: ${POSTGRES_USER:-postgres} POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres} @@ -27,7 +27,7 @@ services: # S3 兼容:语音确认原始 WAV;与本项目 .env 中 MINIO_ACCESS_KEY / MINIO_SECRET_KEY 一致 minio: - image: docker.m.daocloud.io/minio/minio:latest + image: m.daocloud.io/docker.io/minio/minio:latest command: server /data --console-address ":9001" environment: MINIO_ROOT_USER: ${MINIO_ACCESS_KEY:-minioadmin} diff --git a/docs/客户端手术通信接口说明.md b/docs/客户端手术通信接口说明.md index 44239d7..7eb3a33 100644 --- a/docs/客户端手术通信接口说明.md +++ b/docs/客户端手术通信接口说明.md @@ -8,7 +8,7 @@ | **开始手术** | `POST /client/surgeries/start`,只有在开录确认成功后才返回 `200`。 | | **结束手术** | `POST /client/surgeries/end`,只有在停录确认成功后才返回 `200`。 | | **查询结果** | `GET /client/surgeries/{surgery_id}/result`,至少存在一条消耗明细时返回 `200`;否则返回 `503`,常见错误码为 `RESULT_NOT_READY`。 | -| **待确认播报** | `GET /client/surgeries/{surgery_id}/pending-confirmation`,拉取队首低置信度任务,返回话术文本和 MP3 Base64。 | +| **待确认播报** | **官方浏览器客户端**(仓库 `web/voice-confirmation/`,可独立部署):同一 WebSocket 上推送 `voice_pending`(载荷与 `GET .../pending-confirmation` 成功体一致,另含 `type`);无队首时 `voice_pending_empty`;**不轮询 GET**。**第三方**仍可用 `GET .../pending-confirmation` 拉取队首。 | | **待确认答复** | `POST /client/surgeries/{surgery_id}/pending-confirmation/{confirmation_id}/resolve`,上传医生答复的 WAV 录音,服务端完成 ASR 后入账或关闭。该录音与播报音频无关。 | ## 1. 服务与基础信息 @@ -21,15 +21,18 @@ | **`start` / `end` 请求体** | JSON | | **`resolve` 请求体** | `multipart/form-data`,字段名为 `audio` | | **在线文档** | `/docs`、`/redoc` | +| **语音确认官方页面** | 仓库 `web/voice-confirmation/`(静态资源,与 API 分宿;需为浏览器配置 CORS,见该目录 `README.md`) | ## 2. 摄像头 ID 与 RTSP RTSP 地址、账号、口令等由客户端对接工程师提供给服务端运维,运维再写入服务端环境。客户端只在 `POST /client/surgeries/start` 中传 `camera_ids`。 -| **camera_id** | **RTSP** | **备注** | -| ------------- | -------------------------------- | ------ | -| `or-cam-01` | `rtsp://...`(由现场或 NVR 文档整理后交给运维) | 术间、机位 | -| `or-cam-02` | `...` | `...` | +| **camera_id** | **RTSP** | **备注** | +| ------------- | -------- | -------- | +| `or-cam-01` | `rtsp://admin:Aa183137@192.168.3.2:554/Streaming/Channels/101` | 测试手术室,主码流 ch.101 | +| `or-cam-02` | `rtsp://admin:Aa183137@192.168.3.3:554/Streaming/Channels/101` | 同上 | +| `or-cam-03` | `rtsp://admin:Aa183137@192.168.3.4:554/Streaming/Channels/101` | 同上 | +| `or-cam-04` | `rtsp://admin:Aa183137@192.168.3.5:554/Streaming/Channels/101` | 同上 | ## 3. HTTP 路由一览 @@ -41,8 +44,8 @@ RTSP 地址、账号、口令等由客户端对接工程师提供给服务端运 | 4 | `GET` | `/client/surgeries/{surgery_id}/result` | 查询手术结果 | | 5 | `GET` | `/client/surgeries/{surgery_id}/pending-confirmation` | 拉取待确认耗材 | | 6 | `POST` | `/client/surgeries/{surgery_id}/pending-confirmation/{confirmation_id}/resolve` | 提交医生答复 | -| 7 | `GET` | `/client/voice-terminals/{terminal_id}/assignment` | 可选:查询当前指派(调试或简易集成;**官方桌面客户端仅用 WebSocket**) | -| 8 | `WS` | `/client/voice-terminals/ws?terminal_id=...` | 语音桌面终端长连接,接收开录/停录指派(**推荐**) | +| 7 | `GET` | `/client/voice-terminals/{terminal_id}/assignment` | 可选:查询当前指派(调试或简易集成;**官方浏览器客户端仅用 WebSocket**) | +| 8 | `WS` | `/client/voice-terminals/ws?terminal_id=...` | 语音终端长连接,接收开录/停录指派(**推荐**;与 `web/voice-confirmation` 共用) | | | | | | **术间与语音终端绑定(服务端配置)** @@ -51,6 +54,7 @@ RTSP 地址、账号、口令等由客户端对接工程师提供给服务端运 - **`POST /client/surgeries/start`** 在 **HTTP 200 且开录已成功** 后:用请求体中的 `camera_ids` 在 `voice_or_room_bindings` 中解析终端(**精确匹配**术间 camera 集合,或 **开录路集为某术间 camera 集合的子集** 时匹配该术间);命中则向对应 `voice_terminal_id` 推送 **`action":"start"`**(并更新 assignment);未配置站点文件、或数组为空、或未命中则仅打日志,不影响 200。 - **`POST /client/surgeries/end`** 在停录 **HTTP 200** 后:向该手术会话记录的终端推送 **`action":"end"`**(并清除 assignment)。 - 推送 JSON 形如:`{"type":"voice_assignment","action":"start"|"end","surgery_id":"123456"}`。 +- **待确认队列(与 HTTP GET 成功体对齐)**:有新队首或队首变化时推送 `{"type":"voice_pending", "surgery_id":"...", "confirmation_id":"...", "pending_queue_length":1, "pending_queue_position":1, "pending_cumulative_ordinal":1, "prompt_text":"...", "prompt_audio_mp3_base64":"...", "options":[...], "model_top1_label":"...", "model_top1_confidence":0.0, "created_at":"..."}`(字段与 `GET /client/surgeries/{surgery_id}/pending-confirmation` 的 200 响应相同,并多一个 `type`)。FIFO 无待确认项时推送 `{"type":"voice_pending_empty","surgery_id":"123456"}`。触发时机包括:开录指派后(后台任务)、视觉入队待确认后、医生 `resolve` 成功弹出队首后、终端 **WebSocket 刚连接且当前已有指派** 时(先补发 `voice_assignment` start,再补发队首或 empty)。 - **多 worker**:当前实现为进程内内存;多 Uvicorn worker 时需 sticky session 或 Redis 等另行同步。 ## 4. 流程 @@ -189,7 +193,7 @@ flowchart LR ``` { "surgery_id": "123456", - "camera_ids": ["or-cam-01", "or-cam-02"], + "camera_ids": ["or-cam-01", "or-cam-02", "or-cam-03", "or-cam-04"], "candidate_consumables": ["纱布", "缝线", "止血钳"] } ``` @@ -352,6 +356,9 @@ flowchart LR | ------------------------- | -------- | ------------------------- | | `surgery_id` | `string` | 手术号 | | `confirmation_id` | `string` | 待确认项 ID;提交 5.6 节接口时原样放入路径 | +| `pending_queue_length` | `int` | 当前 FIFO 中仍为 pending 的条数(含本条) | +| `pending_queue_position` | `int` | 本条在 `pending_fifo` 中的排队序号(1-based,队首为 1) | +| `pending_cumulative_ordinal` | `int` | 本场手术中待确认任务累计入队序号(第几条入队) | | `prompt_text` | `string` | 播报或展示用语,与 MP3 内容一致 | | `prompt_audio_mp3_base64` | `string` | MP3 的 Base64 | | `options` | `array` | 候选项列表,字段见下文 | @@ -437,7 +444,7 @@ curl -sS -X POST \ **路径** `GET /client/voice-terminals/{terminal_id}/assignment` -仓库内 **手术室耗材语音确认桌面客户端** 仅通过 **§5.8 WebSocket** 接收指派,**不调用**本接口。此处供运维脚本、未实现 WS 的第三方临时拉取 `active_surgery_id`。 +仓库内 **手术室耗材语音确认浏览器客户端**(`web/voice-confirmation/`)仅通过 **§5.8 WebSocket** 接收指派,**不调用**本接口。此处供运维脚本、未实现 WS 的第三方临时拉取 `active_surgery_id`。 **响应 200** diff --git a/main.py b/main.py index 43bdb5b..b1458de 100644 --- a/main.py +++ b/main.py @@ -1,5 +1,7 @@ +import logging import sys from contextlib import asynccontextmanager +from pathlib import Path import uvicorn from fastapi import FastAPI @@ -12,6 +14,22 @@ from app.database import check_database, engine from app.dependencies import build_container +def _configure_uvicorn_access_log_filters() -> None: + """第三方或 Demo 若轮询 pending-confirmation,无条目时 404 为常态;压低 uvicorn access 刷屏。""" + + class _SuppressPendingPoll404(logging.Filter): + def filter(self, record: logging.LogRecord) -> bool: + try: + msg = record.getMessage() + except Exception: + return True + if "/pending-confirmation" in msg and "GET" in msg and " 404 " in msg: + return False + return True + + logging.getLogger("uvicorn.access").addFilter(_SuppressPendingPoll404()) + + def configure_logging() -> None: """集中配置 loguru sink;由 create_app 显式调用,避免 import-time 副作用。""" logger.remove() @@ -45,6 +63,7 @@ async def lifespan(app: FastAPI): def create_app() -> FastAPI: configure_logging() + _configure_uvicorn_access_log_filters() application = FastAPI( title="Operation Room Monitor", lifespan=lifespan, @@ -81,12 +100,23 @@ app = create_app() def main() -> None: - uvicorn.run( - "main:app", - host=settings.server_host, - port=settings.server_port, - reload=False, - ) + root = Path(__file__).resolve().parent + kwargs: dict = { + "host": settings.server_host, + "port": settings.server_port, + } + if settings.server_reload: + kwargs["reload"] = True + kwargs["reload_dirs"] = [str(root)] + kwargs["reload_includes"] = ["*.py"] + kwargs["reload_excludes"] = [ + "**/.venv/**", + "**/__pycache__/**", + "**/web/**", + "**/scripts/demo_client/**", + "**/.git/**", + ] + uvicorn.run("main:app", **kwargs) if __name__ == "__main__": diff --git a/pyproject.toml b/pyproject.toml index 85d5419..7954f5d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,6 @@ dependencies = [ [project.scripts] operation-room-monitor-server = "main:main" -voice-confirmation-client = "voice_confirmation_client.__main__:main" # Use PyTorch CPU wheels from the official index so: # - Linux Docker builds (incl. Docker Desktop on Mac) do not install NVIDIA CUDA pip bundles. @@ -48,23 +47,7 @@ dev = [ "pytest-asyncio>=0.25.0", "aiosqlite>=0.21.0", "alembic>=1.14.0", -] -voice-client = [ - "httpx>=0.28.0", - "loguru>=0.7.3", - "numpy>=2.0.0", - "PySide6>=6.8.0", - "sounddevice>=0.5.0", - "websocket-client>=1.8.0", -] -voice-client-build = [ - "httpx>=0.28.0", - "loguru>=0.7.3", - "numpy>=2.0.0", - "PySide6>=6.8.0", - "sounddevice>=0.5.0", - "websocket-client>=1.8.0", - "pyinstaller>=6.0.0", + "livereload>=2.7.1", ] [tool.pytest.ini_options] diff --git a/scripts/build_voice_client.py b/scripts/build_voice_client.py deleted file mode 100644 index 0344f09..0000000 --- a/scripts/build_voice_client.py +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env python3 -"""Build the voice confirmation desktop client with PyInstaller (run on target OS).""" - -from __future__ import annotations - -import argparse -import shutil -import subprocess -import sys -from pathlib import Path - - -def main() -> None: - root = Path(__file__).resolve().parents[1] - spec = root / "voice_client.spec" - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument( - "--clean", - action="store_true", - help="Remove build/ and dist/ before building", - ) - args = parser.parse_args() - if args.clean: - for name in ("build", "dist"): - p = root / name - if p.is_dir(): - shutil.rmtree(p) - if not spec.is_file(): - print(f"Missing {spec}", file=sys.stderr) - sys.exit(1) - cmd = [sys.executable, "-m", "PyInstaller", str(spec), "--noconfirm"] - print("Running:", " ".join(cmd)) - raise SystemExit(subprocess.call(cmd, cwd=root)) - - -if __name__ == "__main__": - main() diff --git a/scripts/demo_client/README.md b/scripts/demo_client/README.md index 4210978..0f6c2f3 100644 --- a/scripts/demo_client/README.md +++ b/scripts/demo_client/README.md @@ -46,7 +46,7 @@ python3 scripts/demo_client/fake_rtsp_from_file.py --port 18554 \ - 给该服务容器加 `--add-host=host.docker.internal:host-gateway`(Docker 20.10+),或 - 直接把 URL 写成宿主在 **docker0/桥接网** 上可达的局域网 IP(如 `192.168.x.x`),保证从容器内 `curl`/`ffprobe` 能通 -生产/容器环境请使用 **`OR_SITE_CONFIG_JSON_FILE`** 指向完整站点 JSON(含 `video_rtsp_urls` 与 `voice_or_room_bindings`)。**不要**在仅容器可解析的配置里写 `127.0.0.1` 去指宿主机上的 RTSP(`127.0.0.1` 在容器内是容器自己)。 +生产/容器环境请使用 `**OR_SITE_CONFIG_JSON_FILE`** 指向完整站点 JSON(含 `video_rtsp_urls` 与 `voice_or_room_bindings`)。**不要**在仅容器可解析的配置里写 `127.0.0.1` 去指宿主机上的 RTSP(`127.0.0.1` 在容器内是容器自己)。 若监控与假 RTSP **都在宿主机同一系统**里直接跑(非容器),则用 `rtsp://127.0.0.1:...` 即可;否则应使用上面「容器连宿主」的写法。 @@ -64,27 +64,30 @@ Demo 页面「调试:两路视频」中可用 **选择视频** / **拖放** 在 §4.1 勾选 **「一键联调」** 后,在「调试」里为**路1/路2**各选一段视频,再点 **开始手术**,浏览器会把两路视频 **multipart 上传到监控 API**(`POST /internal/demo/orchestrate-and-start`),由服务进程依次: -1. 落盘两路视频到临时目录 -2. 用 Docker 起 MediaMTX、两路 ffmpeg 推 RTSP(与 `fake_rtsp_from_file.py` 等效) -3. 把当前假流的 **video_rtsp_urls** 合并写入 `OR_SITE_CONFIG_JSON_FILE`(保留已有 `voice_or_room_bindings`;与开录/拉流同进程,固定本机回环) -4. 调用与普通开录相同逻辑 +1. 落盘两路视频到临时目录 +2. 用 Docker 起 MediaMTX、两路 ffmpeg 推 RTSP(与 `fake_rtsp_from_file.py` 等效) +3. 把当前假流的 **video_rtsp_urls** 合并写入 `OR_SITE_CONFIG_JSON_FILE`(保留已有 `voice_or_room_bindings`;与开录/拉流同进程,固定本机回环) +4. 调用与普通开录相同逻辑 **需同时满足**: - `.env` 中 `DEMO_ORCHESTRATOR_ENABLED=true`(并重启 API) - 已设置 `OR_SITE_CONFIG_JSON_FILE` 指向**可写**的站点 JSON;Docker 中请用 **bind-mount** 到容器内同一路径 -- **运行 `main.py` 的进程**能执行本机 `docker` 与 `ffmpeg`(与手动跑 `fake_rtsp_from_file` 相同)。**仅将 API 放 Docker、且不挂载** ` /var/run/docker.sock` 时,容器内往往无法为你在宿主机起 MediaMTX,此时请继续用手动假流方式。 +- **运行 `main.py` 的进程**能执行本机 `docker` 与 `ffmpeg`(与手动跑 `fake_rtsp_from_file` 相同)。**仅将 API 放 Docker、且不挂载** `/var/run/docker.sock` 时,容器内往往无法为你在宿主机起 MediaMTX,此时请继续用手动假流方式。 由于每次解析都会重新读取 `video_rtsp_url_map()`,覆盖 JSON 后**无需重启**主服务即可被下一次开录用到。 ## 运行方式 ```bash -# 1) 启动后端(默认 38080)。CORS 中间件在 settings.demo_cors_enabled=True 时自动挂载。 +# 1) 启动后端(默认 38080)。开发时建议开启热重载(.env: UVICORN_RELOAD=true 与 python main.py 等效),或: +# uv run uvicorn main:app --host 0.0.0.0 --port 38080 --reload uv run python main.py -# 2) 启动 demo 客户端静态服务(默认 127.0.0.1:38081)。 +# 2) 启动 demo 客户端静态服务(默认 127.0.0.1:38081) python scripts/demo_client/server.py +# 浏览器热重载(需 dev 依赖;会生成本目录 labels.json 供静态托管): +uv run --group dev python scripts/demo_client/server.py --live # 或指定端口: python scripts/demo_client/server.py -p 9000 --host 0.0.0.0 @@ -94,6 +97,12 @@ open http://localhost:38081/ 页面顶部的「服务端 Base URL」默认是 `http://localhost:38080`;如果后端部署在其他主机/端口,直接改这里即可。 +### 与「语音确认」页一致的热重载 + +- **API**:环境变量 `UVICORN_RELOAD=true` 或 `server_reload=true`(`python -m main` 使用 `app.config.Settings`)。 +- **本 Demo**:`--live`(livereload,监视本目录与 `app/resources/consumable_classifier_labels.yaml`)。 +- **web/voice-confirmation**:`./start_voice_confirmation_web.sh` 或 [web/voice-confirmation/README.md](../../web/voice-confirmation/README.md)。 + ## 页面包含什么 - `GET /health` 连通性检查 @@ -130,3 +139,4 @@ open http://localhost:38081/ - `DEMO_CORS_ENABLED`(默认 `True`,生产请在 `.env` 里置 `false`) - `DEMO_CORS_ORIGINS`(默认 `*`,可写 `http://my-host:38081,https://or-demo.example.com`) + diff --git a/scripts/demo_client/server.py b/scripts/demo_client/server.py index 5c171c7..8f95277 100644 --- a/scripts/demo_client/server.py +++ b/scripts/demo_client/server.py @@ -4,10 +4,13 @@ - Exposes `GET /labels.json`, which parses the repo's `app/resources/consumable_classifier_labels.yaml` and returns its label list so the page can prefill the candidate-consumables input. +- ``--live`` 使用 dev 依赖 ``livereload``:修改本目录下 HTML/JS/CSS + 或更新 ``labels.json``/YAML 后,浏览器自动刷新(需 ``uv run --group dev``)。 Run: python scripts/demo_client/server.py # 127.0.0.1:38081 python scripts/demo_client/server.py -p 9000 # custom port + uv run --group dev python scripts/demo_client/server.py --live """ from __future__ import annotations @@ -16,6 +19,8 @@ import argparse import json import re import sys +import threading +import time from http import HTTPStatus from http.server import SimpleHTTPRequestHandler, ThreadingHTTPServer from pathlib import Path @@ -24,6 +29,8 @@ from typing import Any SCRIPT_DIR = Path(__file__).resolve().parent REPO_ROOT = SCRIPT_DIR.parents[1] LABELS_YAML = REPO_ROOT / "app" / "resources" / "consumable_classifier_labels.yaml" +# ``--live`` 模式写入静态 `labels.json`,与 livereload 兼容(勿提交,见 .gitignore) +LABELS_JSON_ARTIFACT = SCRIPT_DIR / "labels.json" def _load_labels_with_pyyaml(path: Path) -> list[str] | None: @@ -85,6 +92,66 @@ def load_labels() -> list[str]: return labels +def write_labels_json_artifact() -> None: + body = json.dumps({"labels": load_labels()}, ensure_ascii=False) + "\n" + LABELS_JSON_ARTIFACT.write_text(body, encoding="utf-8") + + +def _spawn_labels_yaml_poll() -> None: + p = LABELS_YAML + state: list[float] = [0.0] + if p.is_file(): + state[0] = p.stat().st_mtime + + def loop() -> None: + while True: + time.sleep(1.0) + if not p.is_file(): + continue + m = p.stat().st_mtime + if m > state[0]: + state[0] = m + try: + write_labels_json_artifact() + except OSError as e: + print( + f"[demo-client] labels.json 写入失败: {e}", + file=sys.stderr, + ) + + threading.Thread(target=loop, daemon=True, name="DemoClientLabelsYamlWatch").start() + + +def run_livereload(host: str, port: int) -> None: + try: + from livereload import Server + except ImportError as exc: # pragma: no cover + print( + "``--live`` 需要 dev 依赖。请执行: uv sync --group dev", + file=sys.stderr, + ) + raise SystemExit(1) from exc + + write_labels_json_artifact() + _spawn_labels_yaml_poll() + server = Server() + server.watch(str(SCRIPT_DIR)) + if LABELS_YAML.is_file(): + server.watch(str(LABELS_YAML)) + url = f"http://{host}:{port}/" + print(f"Demo client (livereload) at {url}") + print(f" static dir : {SCRIPT_DIR}") + print(f" labels yaml: {LABELS_YAML} -> {LABELS_JSON_ARTIFACT.name}") + print("Press Ctrl+C to stop.") + server.serve( + root=str(SCRIPT_DIR), + host=host, + port=port, + open_url=False, + debug=False, + ) + + class DemoHandler(SimpleHTTPRequestHandler): def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, directory=str(SCRIPT_DIR), **kwargs) @@ -110,14 +177,9 @@ class DemoHandler(SimpleHTTPRequestHandler): ) -def main() -> None: - parser = argparse.ArgumentParser(description="Operation room demo client server") - parser.add_argument("--host", default="127.0.0.1") - parser.add_argument("-p", "--port", type=int, default=38081) - args = parser.parse_args() - - server = ThreadingHTTPServer((args.host, args.port), DemoHandler) - url = f"http://{args.host}:{args.port}/" +def run_plain(host: str, port: int) -> None: + server = ThreadingHTTPServer((host, port), DemoHandler) + url = f"http://{host}:{port}/" print(f"Demo client serving at {url}") print(f" static dir : {SCRIPT_DIR}") print(f" labels yaml: {LABELS_YAML}") @@ -130,5 +192,21 @@ def main() -> None: server.server_close() +def main() -> None: + parser = argparse.ArgumentParser(description="Operation room demo client server") + parser.add_argument("--host", default="127.0.0.1") + parser.add_argument("-p", "--port", type=int, default=38081) + parser.add_argument( + "--live", + action="store_true", + help="浏览器热重载(需 uv sync --group dev;会生成同目录 labels.json)", + ) + args = parser.parse_args() + if args.live: + run_livereload(args.host, args.port) + else: + run_plain(args.host, args.port) + + if __name__ == "__main__": main() diff --git a/scripts/dev_static_livereload.py b/scripts/dev_static_livereload.py new file mode 100644 index 0000000..c77fb7f --- /dev/null +++ b/scripts/dev_static_livereload.py @@ -0,0 +1,70 @@ +"""以 livereload 提供静态资源目录:保存 HTML/JS/CSS 时浏览器自动刷新。 + +需 dev 依赖:``uv sync --group dev``(``livereload``)。 + +用法: + uv run --group dev python scripts/dev_static_livereload.py --root web/voice-confirmation + uv run --group dev python scripts/dev_static_livereload.py --root web/voice-confirmation -p 8080 --host 127.0.0.1 +""" + +from __future__ import annotations + +import argparse +import sys +from pathlib import Path + + +def main() -> None: + try: + from livereload import Server + except ImportError as exc: # pragma: no cover + print( + "需要安装 dev 依赖: uv sync --group dev (需含 livereload)", + file=sys.stderr, + ) + raise SystemExit(1) from exc + + p = argparse.ArgumentParser(description="Static site + browser live reload (livereload)") + p.add_argument( + "--root", + type=Path, + required=True, + help="要托管的目录(如 web/voice-confirmation)", + ) + p.add_argument("--host", default="127.0.0.1") + p.add_argument("-p", "--port", type=int, default=8080) + p.add_argument( + "--extra-watch", + type=Path, + action="append", + default=[], + help="额外监视路径(可重复),变更时触发自动刷新", + ) + args = p.parse_args() + root: Path = args.root.resolve() + if not root.is_dir(): + print(f"不是目录: {root}", file=sys.stderr) + raise SystemExit(2) + + server = Server() + server.watch(str(root)) + for w in args.extra_watch: + wp = w.resolve() + if wp.exists(): + server.watch(str(wp if wp.is_file() else wp)) + + url = f"http://{args.host}:{args.port}/" + print(f"Livereload 静态根目录: {root}") + print(f"访问: {url} (编辑目录内文件后应自动刷新浏览器;首次请手动打开)") + print("按 Ctrl+C 停止。") + server.serve( + host=args.host, + port=args.port, + root=str(root), + open_url=False, + debug=False, + ) + + +if __name__ == "__main__": + main() diff --git a/start_voice_confirmation_client.bat b/start_voice_confirmation_client.bat deleted file mode 100644 index da89896..0000000 --- a/start_voice_confirmation_client.bat +++ /dev/null @@ -1,5 +0,0 @@ -@echo off -REM 启动手术室耗材语音确认桌面客户端。需已安装 uv 并完成 uv sync --group voice-client -setlocal -cd /d "%~dp0" -uv run --group voice-client python -m voice_confirmation_client %* diff --git a/start_voice_confirmation_client.sh b/start_voice_confirmation_client.sh deleted file mode 100755 index 4839769..0000000 --- a/start_voice_confirmation_client.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash -# 启动手术室耗材语音确认桌面客户端(PySide6)。 -# 依赖:本机已安装 uv,并已执行过 uv sync --group voice-client -# 用法:./start_voice_confirmation_client.sh - -set -euo pipefail - -ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -cd "$ROOT" - -exec uv run --group voice-client python -m voice_confirmation_client "$@" diff --git a/start_voice_confirmation_web.bat b/start_voice_confirmation_web.bat new file mode 100644 index 0000000..230c13f --- /dev/null +++ b/start_voice_confirmation_web.bat @@ -0,0 +1,17 @@ +@echo off +REM 用法: start_voice_confirmation_web.bat [端口] +REM 或: start_voice_confirmation_web.bat --plain [端口] 无热重载 +setlocal +cd /d "%~dp0" +set "PLA=0" +if /I "%~1"=="--plain" set "PLA=1" & shift +set "PORT=%~1" +if "%PORT%"=="" set "PORT=8080" +if "%PLA%"=="1" ( + cd web\voice-confirmation + echo 语音确认(无热重载) http://127.0.0.1:%PORT%/ + python -m http.server %PORT% --bind 127.0.0.1 + exit /b 0 +) +echo 语音确认(livereload) http://127.0.0.1:%PORT%/ +uv run --group dev python scripts\dev_static_livereload.py --root web\voice-confirmation --host 127.0.0.1 --port %PORT% diff --git a/start_voice_confirmation_web.sh b/start_voice_confirmation_web.sh new file mode 100755 index 0000000..3a19783 --- /dev/null +++ b/start_voice_confirmation_web.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# 本地启动「语音确认」静态页(勿用 file:// 打开) +# 用法: ./start_voice_confirmation_web.sh [端口] (默认 livereload 热重载,需 uv sync --group dev) +# 或: ./start_voice_confirmation_web.sh --plain [端口] (无热重载,仅 stdlib http.server) +set -euo pipefail +ROOT="$(cd "$(dirname "$0")" && pwd)" +cd "$ROOT" +PLAIN=0 +if [ "${1:-}" = "--plain" ]; then + PLAIN=1 + shift +fi +PORT="${1:-8080}" +if [ "$PLAIN" = 1 ]; then + cd web/voice-confirmation + echo "语音确认页面(无热重载): http://127.0.0.1:${PORT}/" + echo "按 Ctrl+C 停止" + exec python3 -m http.server "$PORT" --bind 127.0.0.1 +fi +echo "语音确认(livereload 热重载,需: uv sync --group dev): http://127.0.0.1:${PORT}/" +echo "按 Ctrl+C 停止" +exec uv run --group dev python scripts/dev_static_livereload.py --root web/voice-confirmation --host 127.0.0.1 --port "$PORT" diff --git a/tests/test_api_contract.py b/tests/test_api_contract.py index b8ecc1e..1b38f1e 100644 --- a/tests/test_api_contract.py +++ b/tests/test_api_contract.py @@ -6,7 +6,7 @@ import asyncio import json from datetime import datetime, timezone from pathlib import Path -from unittest.mock import AsyncMock, MagicMock +from unittest.mock import AsyncMock, MagicMock, patch import pytest from fastapi import FastAPI @@ -107,6 +107,43 @@ def test_assign_voice_terminal_helper_matches_start_surgery_behavior( assert hub.get_assignment("TERM-X") == "123456" +async def test_voice_terminal_hub_notify_pending_head_payload() -> None: + payload = SurgeryPendingConfirmationResponse( + surgery_id="123456", + confirmation_id="cid-a", + pending_queue_length=1, + pending_queue_position=1, + pending_cumulative_ordinal=1, + prompt_text="请确认", + prompt_audio_mp3_base64="QQ==", + options=[], + model_top1_label="x", + model_top1_confidence=0.5, + created_at=datetime.now(timezone.utc), + ) + fetcher = AsyncMock(return_value=payload) + hub = VoiceTerminalHub(Settings(), pending_head_fetcher=fetcher) + with patch.object(hub, "_broadcast", new_callable=AsyncMock) as bc: + await hub.notify_pending_head("TERM-1", "123456") + fetcher.assert_awaited_once_with("123456") + sent = bc.await_args[0][1] + assert sent["type"] == "voice_pending" + assert sent["confirmation_id"] == "cid-a" + assert sent["surgery_id"] == "123456" + + +async def test_voice_terminal_hub_notify_pending_head_empty() -> None: + fetcher = AsyncMock(return_value=None) + hub = VoiceTerminalHub(Settings(), pending_head_fetcher=fetcher) + with patch.object(hub, "_broadcast", new_callable=AsyncMock) as bc: + await hub.notify_pending_head("TERM-1", "123456") + fetcher.assert_awaited_once_with("123456") + assert bc.await_args[0][1] == { + "type": "voice_pending_empty", + "surgery_id": "123456", + } + + def test_start_surgery_notifies_voice_terminal_when_binding_matches( api_app: FastAPI, instant_sleep: None, tmp_path: Path ) -> None: @@ -265,6 +302,9 @@ def test_pending_confirmation_200_and_404(api_app: FastAPI) -> None: payload = SurgeryPendingConfirmationResponse( surgery_id="123456", confirmation_id="cid", + pending_queue_length=1, + pending_queue_position=1, + pending_cumulative_ordinal=1, prompt_text="请确认", prompt_audio_mp3_base64="//uQ", options=[], @@ -280,6 +320,9 @@ def test_pending_confirmation_200_and_404(api_app: FastAPI) -> None: assert r.status_code == 200 body_ok = r.json() assert body_ok["confirmation_id"] == "cid" + assert body_ok["pending_queue_length"] == 1 + assert body_ok["pending_queue_position"] == 1 + assert body_ok["pending_cumulative_ordinal"] == 1 assert body_ok["prompt_audio_mp3_base64"] == "//uQ" pipeline_none = MagicMock() @@ -333,11 +376,34 @@ def test_resolve_200(api_app: FastAPI) -> None: ) assert r.status_code == 200 body = r.json() + assert body["status"] == "accepted" + assert body["error_code"] is None assert body["resolved_label"] == "纱布" assert body["rejected"] is False assert body["asr_text"] == "第一个" +def test_resolve_voice_recoverable_error_returns_200_failed(api_app: FastAPI) -> None: + pipeline = MagicMock() + pipeline.resolve_pending_confirmation_from_audio = AsyncMock( + side_effect=SurgeryPipelineError( + "VOICE_ASR_FAILED", + "asr_err_3301: speech quality error.", + ) + ) + api_app.dependency_overrides[get_surgery_pipeline] = lambda: pipeline + client = TestClient(api_app) + r = client.post( + "/client/surgeries/123456/pending-confirmation/cid/resolve", + files={"audio": ("a.wav", b"RIFF", "audio/wav")}, + ) + assert r.status_code == 200 + body = r.json() + assert body["status"] == "failed" + assert body["error_code"] == "VOICE_ASR_FAILED" + assert "3301" in body["message"] + + def test_resolve_maps_surgery_pipeline_error_to_http(api_app: FastAPI) -> None: pipeline = MagicMock() pipeline.resolve_pending_confirmation_from_audio = AsyncMock( diff --git a/tests/test_app_integration.py b/tests/test_app_integration.py index 7924d2e..0ad736a 100644 --- a/tests/test_app_integration.py +++ b/tests/test_app_integration.py @@ -259,6 +259,7 @@ def test_full_flow_start_pending_resolve_end_result( assert r3.status_code == 200, r3.text body3 = r3.json() assert body3["confirmation_id"] == cid + assert body3["pending_queue_length"] == 1 import base64 decoded = base64.b64decode(body3["prompt_audio_mp3_base64"].encode("ascii")) @@ -270,6 +271,7 @@ def test_full_flow_start_pending_resolve_end_result( ) assert r4.status_code == 200, r4.text body4 = r4.json() + assert body4["status"] == "accepted" assert body4["resolved_label"] == "纱布" assert body4["rejected"] is False diff --git a/tests/test_audio_wav_normalize.py b/tests/test_audio_wav_normalize.py new file mode 100644 index 0000000..0921b44 --- /dev/null +++ b/tests/test_audio_wav_normalize.py @@ -0,0 +1,45 @@ +"""PCM 归一化与 WAV 封装。""" + +from __future__ import annotations + +import array + +import pytest + +from app.services.audio_wav import ( + WavDecodeError, + normalize_pcm_s16le_for_baidu, + pcm_s16le_to_wav_bytes, + wav_bytes_to_pcm16k_mono_s16le, +) + + +def test_normalize_boosts_quiet_pcm() -> None: + samples = array.array("h", [50, -80, 30] * 500) + pcm = samples.tobytes() + out = normalize_pcm_s16le_for_baidu(pcm) + arr = array.array("h") + arr.frombytes(out) + assert max(abs(x) for x in arr) > max(abs(x) for x in samples) + + +def test_normalize_skips_loud_pcm() -> None: + samples = array.array("h", [15000, -14000]) + pcm = samples.tobytes() + out = normalize_pcm_s16le_for_baidu(pcm) + assert out == pcm + + +def test_pcm_wav_roundtrip_is_valid_wav() -> None: + pcm = array.array("h", [100, -200, 300] * 100).tobytes() + wav = pcm_s16le_to_wav_bytes(pcm) + back = wav_bytes_to_pcm16k_mono_s16le(wav) + assert len(back) == len(pcm) + assert max(abs(x) for x in array.array("h", back)) >= max( + abs(x) for x in array.array("h", pcm) + ) + + +def test_pcm_s16le_to_wav_empty_raises() -> None: + with pytest.raises(WavDecodeError): + pcm_s16le_to_wav_bytes(b"") diff --git a/tests/test_baidu_asr_fallback.py b/tests/test_baidu_asr_fallback.py new file mode 100644 index 0000000..e4cd4d6 --- /dev/null +++ b/tests/test_baidu_asr_fallback.py @@ -0,0 +1,29 @@ +"""百度 ASR:3301 时 WAV 重试。""" + +from __future__ import annotations + +import array +from unittest.mock import MagicMock + +from app.config import Settings +from app.services.baidu_speech import BaiduSpeechService + + +def test_asr_pcm_or_wav_fallback_retries_on_3301() -> None: + ok = {"err_no": 0, "result": ["好"]} + pcm = array.array("h", [100] * 800).tobytes() + client = MagicMock() + client.asr = MagicMock(side_effect=[{"err_no": 3301, "err_msg": "q"}, ok]) + svc = BaiduSpeechService( + app_settings=Settings( + BAIDU_APP_ID="1", + BAIDU_API_KEY="k", + BAIDU_SECRET_KEY="s", + ) + ) + svc._client = client # type: ignore[attr-defined] + r = svc.asr_16k_mono_pcm_or_wav_fallback(pcm) + assert r == ok + assert client.asr.call_count == 2 + assert client.asr.call_args_list[0][0][1] == "pcm" + assert client.asr.call_args_list[1][0][1] == "wav" diff --git a/tests/test_pending_resolve_url_encoding.py b/tests/test_pending_resolve_url_encoding.py new file mode 100644 index 0000000..3cbb00a --- /dev/null +++ b/tests/test_pending_resolve_url_encoding.py @@ -0,0 +1,45 @@ +"""resolve 路径中 confirmation_id 的 URL 编码(与浏览器 / 历史客户端行为一致)。""" + +from __future__ import annotations + +from urllib.parse import quote, urljoin + +import httpx + + +def test_post_resolve_url_encodes_confirmation_id() -> None: + captured: dict = {} + + def handler(request: httpx.Request) -> httpx.Response: + captured["url"] = str(request.url) + return httpx.Response(200, json={"status": "accepted"}) + + transport = httpx.MockTransport(handler) + base = "http://example.test:8080/" + cid = "c/id+here" + path = f"client/surgeries/123456/pending-confirmation/{quote(cid, safe='')}/resolve" + url = urljoin(base, path) + with httpx.Client(transport=transport) as client: + r = client.post(url, files={"audio": ("voice.wav", b"RIFF....", "audio/wav")}) + assert r.status_code == 200 + assert captured["url"].endswith( + "/client/surgeries/123456/pending-confirmation/c%2Fid%2Bhere/resolve" + ) + + +def test_pending_payload_field_names_match_contract() -> None: + raw = { + "surgery_id": "123456", + "confirmation_id": "abc", + "pending_queue_length": 1, + "pending_queue_position": 1, + "pending_cumulative_ordinal": 1, + "prompt_text": "请确认", + "prompt_audio_mp3_base64": "AA", + "options": [{"label": "纱布", "confidence": 0.4}], + "model_top1_label": "x", + "model_top1_confidence": 0.41, + "created_at": "2026-01-01T00:00:00+00:00", + } + assert raw["confirmation_id"] == "abc" + assert raw["prompt_text"] == "请确认" diff --git a/tests/test_session_manager_unit.py b/tests/test_session_manager_unit.py index 6b17c3f..ebffa08 100644 --- a/tests/test_session_manager_unit.py +++ b/tests/test_session_manager_unit.py @@ -463,3 +463,29 @@ async def test_resolve_already_resolved_status() -> None: "123456", pid, chosen_label="纱布", rejected=False ) assert excinfo.value.code == "CONFIRMATION_ALREADY_RESOLVED" + + +def test_pending_queue_pending_count_fifo() -> None: + settings = Settings() + mgr = CameraSessionManager( + settings=settings, + vision_algorithm=MagicMock(), + hikvision_runtime=None, + result_repository=None, + ) + st = SurgerySessionState(candidate_consumables=["纱布"]) + for pid in ("p1", "p2"): + st.pending_by_id[pid] = PendingConsumableConfirmation( + id=pid, + status="pending", + options=[("纱布", 0.4)], + prompt_text="x", + created_at=datetime.now(timezone.utc), + model_top1_label="x", + model_top1_confidence=0.4, + ) + st.pending_fifo.append(pid) + mgr._registry._active["123456"] = RunningSurgery( + stop_event=asyncio.Event(), state=st, tasks=[] + ) + assert mgr.pending_queue_pending_count("123456") == 2 diff --git a/tests/test_voice_client_machine_config.py b/tests/test_voice_client_machine_config.py deleted file mode 100644 index e05b45c..0000000 --- a/tests/test_voice_client_machine_config.py +++ /dev/null @@ -1,96 +0,0 @@ -"""语音客户端配置:系统级 + 用户级合并与保存。""" - -from __future__ import annotations - -import json - -import pytest - -from voice_confirmation_client.core.machine_config import ( - http_base_url_from_config, - load_voice_client_config, - machine_config_file_path, - save_user_voice_client_config, - user_voice_client_config_path, - voice_terminal_id_from_config, -) - - -def test_fields_from_system_file_only(tmp_path, monkeypatch: pytest.MonkeyPatch) -> None: - cfg = tmp_path / "voice_client.json" - cfg.write_text( - json.dumps( - {"voice_terminal_id": "t-1", "http_base_url": "http://api.example:38080"}, - ), - encoding="utf-8", - ) - monkeypatch.setenv("VOICE_CLIENT_MACHINE_CONFIG_FILE", str(cfg)) - monkeypatch.setenv("VOICE_CLIENT_USER_CONFIG_FILE", str(tmp_path / "none.json")) - (tmp_path / "none.json").write_text("{}", encoding="utf-8") - data = load_voice_client_config() - assert voice_terminal_id_from_config(data) == "t-1" - assert http_base_url_from_config(data) == "http://api.example:38080" - - -def test_user_file_overrides_system(tmp_path, monkeypatch: pytest.MonkeyPatch) -> None: - sys_f = tmp_path / "sys.json" - sys_f.write_text( - json.dumps({"voice_terminal_id": "sys", "http_base_url": "http://sys:1"}), - encoding="utf-8", - ) - usr_f = tmp_path / "usr.json" - usr_f.write_text( - json.dumps({"voice_terminal_id": "usr", "http_base_url": "http://usr:2"}), - encoding="utf-8", - ) - monkeypatch.setenv("VOICE_CLIENT_MACHINE_CONFIG_FILE", str(sys_f)) - monkeypatch.setenv("VOICE_CLIENT_USER_CONFIG_FILE", str(usr_f)) - data = load_voice_client_config() - assert voice_terminal_id_from_config(data) == "usr" - assert http_base_url_from_config(data) == "http://usr:2" - - -def test_http_base_default_when_missing_key( - tmp_path, monkeypatch: pytest.MonkeyPatch -) -> None: - cfg = tmp_path / "voice_client.json" - cfg.write_text(json.dumps({"voice_terminal_id": "x"}), encoding="utf-8") - monkeypatch.setenv("VOICE_CLIENT_MACHINE_CONFIG_FILE", str(cfg)) - monkeypatch.setenv("VOICE_CLIENT_USER_CONFIG_FILE", str(tmp_path / "empty.json")) - (tmp_path / "empty.json").write_text("{}", encoding="utf-8") - data = load_voice_client_config() - assert http_base_url_from_config(data) == "http://127.0.0.1:38080" - - -def test_machine_config_file_path_respects_override( - tmp_path, monkeypatch: pytest.MonkeyPatch -) -> None: - p = tmp_path / "custom.json" - monkeypatch.setenv("VOICE_CLIENT_MACHINE_CONFIG_FILE", str(p)) - assert machine_config_file_path() == p - - -def test_user_config_file_path_respects_override( - tmp_path, monkeypatch: pytest.MonkeyPatch -) -> None: - p = tmp_path / "u.json" - monkeypatch.setenv("VOICE_CLIENT_USER_CONFIG_FILE", str(p)) - assert user_voice_client_config_path() == p - - -def test_missing_files_return_empty_merge(monkeypatch: pytest.MonkeyPatch) -> None: - monkeypatch.setenv("VOICE_CLIENT_MACHINE_CONFIG_FILE", "/nonexistent/a.json") - monkeypatch.setenv("VOICE_CLIENT_USER_CONFIG_FILE", "/nonexistent/b.json") - assert load_voice_client_config() == {} - - -def test_save_user_voice_client_config(tmp_path, monkeypatch: pytest.MonkeyPatch) -> None: - out = tmp_path / "out.json" - monkeypatch.setenv("VOICE_CLIENT_USER_CONFIG_FILE", str(out)) - save_user_voice_client_config( - voice_terminal_id=" t99 ", - http_base_url="http://host:38080/", - ) - assert out.is_file() - data = json.loads(out.read_text(encoding="utf-8")) - assert data == {"voice_terminal_id": "t99", "http_base_url": "http://host:38080"} diff --git a/tests/test_voice_confirm.py b/tests/test_voice_confirm.py index 98eca5b..8b6d43b 100644 --- a/tests/test_voice_confirm.py +++ b/tests/test_voice_confirm.py @@ -34,8 +34,9 @@ def test_parse_voice_choice_negative() -> None: def test_build_prompt_contains_options() -> None: text = build_prompt_text([("纱布", 0.4), ("缝线", 0.3)]) - assert "纱布" in text - assert "缝线" in text + assert text.startswith("请确认以下耗材。") + assert "第1个,纱布。" in text + assert "第2个,缝线。" in text def test_match_voice_against_full_candidate_list() -> None: diff --git a/tests/test_voice_confirmation_client_api.py b/tests/test_voice_confirmation_client_api.py deleted file mode 100644 index a5b21eb..0000000 --- a/tests/test_voice_confirmation_client_api.py +++ /dev/null @@ -1,48 +0,0 @@ -"""Core HTTP client tests (no PySide6).""" - -from __future__ import annotations - -import httpx -import pytest - -from voice_confirmation_client.core.api import ConfirmationApiClient - - -def test_post_resolve_url_encoding(monkeypatch: pytest.MonkeyPatch) -> None: - captured: dict = {} - - def handler(request: httpx.Request) -> httpx.Response: - captured["url"] = str(request.url) - return httpx.Response(200, json={"status": "accepted"}) - - transport = httpx.MockTransport(handler) - client = ConfirmationApiClient("http://example.test:8080") - client._client = httpx.Client(transport=transport) # noqa: SLF001 - - st, body = client.post_resolve("123456", "c/id+here", b"RIFF....", "voice.wav") - assert st == 200 - assert isinstance(body, dict) - assert body.get("status") == "accepted" - assert captured["url"].endswith( - "/client/surgeries/123456/pending-confirmation/c%2Fid%2Bhere/resolve" - ) - - client.close() - - -def test_parse_pending() -> None: - client = ConfirmationApiClient("http://localhost") - raw = { - "surgery_id": "123456", - "confirmation_id": "abc", - "prompt_text": "请确认", - "prompt_audio_mp3_base64": "AA", - "options": [{"label": "纱布", "confidence": 0.4}], - "model_top1_label": "x", - "model_top1_confidence": 0.41, - "created_at": "2026-01-01T00:00:00+00:00", - } - p = client.parse_pending(raw) - assert p.confirmation_id == "abc" - assert p.prompt_text == "请确认" - client.close() diff --git a/tests/test_voice_resolution_service.py b/tests/test_voice_resolution_service.py index 045ea97..9a27809 100644 --- a/tests/test_voice_resolution_service.py +++ b/tests/test_voice_resolution_service.py @@ -120,7 +120,7 @@ async def test_resolve_recognized_appends_voice_detail_and_audit( ) baidu = MagicMock() baidu.configured = True - baidu.asr = MagicMock(return_value={"err_no": 0, "result": ["第一个"]}) + baidu.asr_16k_mono_pcm_or_wav_fallback = MagicMock(return_value={"err_no": 0, "result": ["第一个"]}) svc = _make_service( settings=settings, @@ -166,7 +166,7 @@ async def test_resolve_rejected_audit( ) baidu = MagicMock() baidu.configured = True - baidu.asr = MagicMock(return_value={"err_no": 0, "result": ["不是"]}) + baidu.asr_16k_mono_pcm_or_wav_fallback = MagicMock(return_value={"err_no": 0, "result": ["不是"]}) svc = _make_service( settings=settings, @@ -215,7 +215,7 @@ async def test_resolve_recognizes_label_not_in_topk_but_in_surgery_candidates( ) baidu = MagicMock() baidu.configured = True - baidu.asr = MagicMock( + baidu.asr_16k_mono_pcm_or_wav_fallback = MagicMock( return_value={"err_no": 0, "result": ["刚才用的是止血钳"]} ) svc = _make_service( @@ -355,7 +355,7 @@ async def test_asr_failed_audit( ) baidu = MagicMock() baidu.configured = True - baidu.asr = MagicMock(return_value={"err_no": 3300, "err_msg": "bad"}) + baidu.asr_16k_mono_pcm_or_wav_fallback = MagicMock(return_value={"err_no": 3300, "err_msg": "bad"}) svc = _make_service( settings=settings, sessions=sessions, @@ -395,7 +395,7 @@ async def test_parse_failed_audit( baidu = MagicMock() baidu.configured = True # Avoid substrings like 「无」that trigger `is_rejection_phrase`. - baidu.asr = MagicMock(return_value={"err_no": 0, "result": ["西红柿土豆"]}) + baidu.asr_16k_mono_pcm_or_wav_fallback = MagicMock(return_value={"err_no": 0, "result": ["西红柿土豆"]}) svc = _make_service( settings=settings, sessions=sessions, diff --git a/uv.lock b/uv.lock index 21b1c4f..905ad25 100644 --- a/uv.lock +++ b/uv.lock @@ -33,15 +33,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d2/29/6533c317b74f707ea28f8d633734dbda2119bbadfc61b2f3640ba835d0f7/alembic-1.18.4-py3-none-any.whl", hash = "sha256:a5ed4adcf6d8a4cb575f3d759f071b03cd6e5c7618eb796cb52497be25bfe19a", size = 263893, upload-time = "2026-02-10T16:00:49.997Z" }, ] -[[package]] -name = "altgraph" -version = "0.17.5" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/7e/f8/97fdf103f38fed6792a1601dbc16cc8aac56e7459a9fff08c812d8ae177a/altgraph-0.17.5.tar.gz", hash = "sha256:c87b395dd12fabde9c99573a9749d67da8d29ef9de0125c7f536699b4a9bc9e7", size = 48428, upload-time = "2025-11-21T20:35:50.583Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a9/ba/000a1996d4308bc65120167c21241a3b205464a2e0b58deda26ae8ac21d1/altgraph-0.17.5-py2.py3-none-any.whl", hash = "sha256:f3a22400bce1b0c701683820ac4f3b159cd301acab067c51c653e06961600597", size = 21228, upload-time = "2025-11-21T20:35:49.444Z" }, -] - [[package]] name = "annotated-doc" version = "0.0.4" @@ -637,6 +628,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/99/a2/ca7dc962848040befed12732dff6acae7fb3c4f6fc4272b3f6c9a30b8713/kiwisolver-1.5.0-cp314-cp314t-win_arm64.whl", hash = "sha256:58f812017cd2985c21fbffb4864d59174d4903dd66fa23815e74bbc7a0e2dd57", size = 70032, upload-time = "2026-03-09T13:15:34.411Z" }, ] +[[package]] +name = "livereload" +version = "2.7.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "tornado" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/43/6e/f2748665839812a9bbe5c75d3f983edbf3ab05fa5cd2f7c2f36fffdf65bd/livereload-2.7.1.tar.gz", hash = "sha256:3d9bf7c05673df06e32bea23b494b8d36ca6d10f7d5c3c8a6989608c09c986a9", size = 22255, upload-time = "2024-12-18T13:42:01.461Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e4/3e/de54dc7f199e85e6ca37e2e5dae2ec3bce2151e9e28f8eb9076d71e83d56/livereload-2.7.1-py3-none-any.whl", hash = "sha256:5201740078c1b9433f4b2ba22cd2729a39b9d0ec0a2cc6b4d3df257df5ad0564", size = 22657, upload-time = "2024-12-18T13:41:56.35Z" }, +] + [[package]] name = "loguru" version = "0.7.3" @@ -650,18 +653,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0c/29/0348de65b8cc732daa3e33e67806420b2ae89bdce2b04af740289c5c6c8c/loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c", size = 61595, upload-time = "2024-12-06T11:20:54.538Z" }, ] -[[package]] -name = "macholib" -version = "1.16.4" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "altgraph", marker = "sys_platform == 'darwin'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/10/2f/97589876ea967487978071c9042518d28b958d87b17dceb7cdc1d881f963/macholib-1.16.4.tar.gz", hash = "sha256:f408c93ab2e995cd2c46e34fe328b130404be143469e41bc366c807448979362", size = 59427, upload-time = "2025-11-22T08:28:38.373Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c7/d1/a9f36f8ecdf0fb7c9b1e78c8d7af12b8c8754e74851ac7b94a8305540fc7/macholib-1.16.4-py2.py3-none-any.whl", hash = "sha256:da1a3fa8266e30f0ce7e97c6a54eefaae8edd1e5f86f3eb8b95457cae90265ea", size = 38117, upload-time = "2025-11-22T08:28:36.939Z" }, -] - [[package]] name = "mako" version = "1.3.11" @@ -895,26 +886,10 @@ dev = [ { name = "aiosqlite" }, { name = "alembic" }, { name = "httpx" }, + { name = "livereload" }, { name = "pytest" }, { name = "pytest-asyncio" }, ] -voice-client = [ - { name = "httpx" }, - { name = "loguru" }, - { name = "numpy" }, - { name = "pyside6" }, - { name = "sounddevice" }, - { name = "websocket-client" }, -] -voice-client-build = [ - { name = "httpx" }, - { name = "loguru" }, - { name = "numpy" }, - { name = "pyinstaller" }, - { name = "pyside6" }, - { name = "sounddevice" }, - { name = "websocket-client" }, -] [package.metadata] requires-dist = [ @@ -941,26 +916,10 @@ dev = [ { name = "aiosqlite", specifier = ">=0.21.0" }, { name = "alembic", specifier = ">=1.14.0" }, { name = "httpx", specifier = ">=0.28.0" }, + { name = "livereload", specifier = ">=2.7.1" }, { name = "pytest", specifier = ">=8.3.0" }, { name = "pytest-asyncio", specifier = ">=0.25.0" }, ] -voice-client = [ - { name = "httpx", specifier = ">=0.28.0" }, - { name = "loguru", specifier = ">=0.7.3" }, - { name = "numpy", specifier = ">=2.0.0" }, - { name = "pyside6", specifier = ">=6.8.0" }, - { name = "sounddevice", specifier = ">=0.5.0" }, - { name = "websocket-client", specifier = ">=1.8.0" }, -] -voice-client-build = [ - { name = "httpx", specifier = ">=0.28.0" }, - { name = "loguru", specifier = ">=0.7.3" }, - { name = "numpy", specifier = ">=2.0.0" }, - { name = "pyinstaller", specifier = ">=6.0.0" }, - { name = "pyside6", specifier = ">=6.8.0" }, - { name = "sounddevice", specifier = ">=0.5.0" }, - { name = "websocket-client", specifier = ">=1.8.0" }, -] [[package]] name = "packaging" @@ -971,15 +930,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7a/c2/920ef838e2f0028c8262f16101ec09ebd5969864e5a64c4c05fad0617c56/packaging-26.1-py3-none-any.whl", hash = "sha256:5d9c0669c6285e491e0ced2eee587eaf67b670d94a19e94e3984a481aba6802f", size = 95831, upload-time = "2026-04-14T21:12:47.56Z" }, ] -[[package]] -name = "pefile" -version = "2024.8.26" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/03/4f/2750f7f6f025a1507cd3b7218691671eecfd0bbebebe8b39aa0fe1d360b8/pefile-2024.8.26.tar.gz", hash = "sha256:3ff6c5d8b43e8c37bb6e6dd5085658d658a7a0bdcd20b6a07b1fcfc1c4e9d632", size = 76008, upload-time = "2024-08-26T20:58:38.155Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/54/16/12b82f791c7f50ddec566873d5bdd245baa1491bac11d15ffb98aecc8f8b/pefile-2024.8.26-py3-none-any.whl", hash = "sha256:76f8b485dcd3b1bb8166f1128d395fa3d87af26360c2358fb75b80019b957c6f", size = 74766, upload-time = "2024-08-26T21:01:02.632Z" }, -] - [[package]] name = "pillow" version = "12.2.0" @@ -1282,47 +1232,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f4/7e/a72dd26f3b0f4f2bf1dd8923c85f7ceb43172af56d63c7383eb62b332364/pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176", size = 1231151, upload-time = "2026-03-29T13:29:30.038Z" }, ] -[[package]] -name = "pyinstaller" -version = "6.20.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "altgraph" }, - { name = "macholib", marker = "sys_platform == 'darwin'" }, - { name = "packaging" }, - { name = "pefile", marker = "sys_platform == 'win32'" }, - { name = "pyinstaller-hooks-contrib" }, - { name = "pywin32-ctypes", marker = "sys_platform == 'win32'" }, - { name = "setuptools" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/46/60/d03d52e6690d4e9caf333dcd14550cde634ce6c118b3bc8fa3112c3186fd/pyinstaller-6.20.0.tar.gz", hash = "sha256:95c5c7e03d5d61e9dfb8ef259c699cf492bb1041beb6dbe83696608cec07347a", size = 4048728, upload-time = "2026-04-22T20:59:36.96Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d0/e4/e228d6d1bbb7fd62dc660a8fb202a583b023d3a3624ca95d1a9290ee4d6a/pyinstaller-6.20.0-py3-none-macosx_10_13_universal2.whl", hash = "sha256:bf3be4e1284ee78ddccba5e29f99443a12a7b4673168288ffc4c9d38c6f7b90e", size = 1047642, upload-time = "2026-04-22T20:58:32.006Z" }, - { url = "https://files.pythonhosted.org/packages/ce/bd/afb631bcb3f9040efebd4f6d067f0828b51710818f69fb41a2d4b7787f52/pyinstaller-6.20.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:72ae9c1fdea134afa791f58bdc9a1934d5c7609753c111e0026bfc272b32b712", size = 742494, upload-time = "2026-04-22T20:58:36.285Z" }, - { url = "https://files.pythonhosted.org/packages/76/08/0729a5bac14754150e5d83b39d87d842eb42b0bffcaa03dbad6252e23a39/pyinstaller-6.20.0-py3-none-manylinux2014_i686.whl", hash = "sha256:1031bcc307f3fbeffd4e162723e64d46dbf591c82dd0997413afb2a07328b941", size = 754191, upload-time = "2026-04-22T20:58:40.603Z" }, - { url = "https://files.pythonhosted.org/packages/e6/82/bc0ee4c7b97db1958eb651e0da9fb1e672e5ae53ca8867fd97701de52906/pyinstaller-6.20.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:8df3b3f347659fa2562d8d193a98ad4600133b8b8d07c268df89e4154376750e", size = 751902, upload-time = "2026-04-22T20:58:44.7Z" }, - { url = "https://files.pythonhosted.org/packages/3d/e7/770002d6aaa54173881cb2c49bb195ba67b97bf39bac1cdf320f28401629/pyinstaller-6.20.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:b0d3cc9dd8120d448459bd3880a12e2f9774c51443af49047801446377999a59", size = 748634, upload-time = "2026-04-22T20:58:48.579Z" }, - { url = "https://files.pythonhosted.org/packages/fe/db/68ba1fccb71278b2124fb90b37b7c8c0bc4c1173fba45b94466df3d9cb7f/pyinstaller-6.20.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:03696bb6350177c6bc23bcaf78e71a33c4a89b6754dd90d1be2f318e978c918b", size = 748490, upload-time = "2026-04-22T20:58:52.749Z" }, - { url = "https://files.pythonhosted.org/packages/03/0f/ac77ffa996a56be3d5c8f85734a007f8347240691657f9704e7de2527fa3/pyinstaller-6.20.0-py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:6357f1699f6af84f37e7367f031d4f68abdba65543b83990c9e8f5a4cebed0b7", size = 747650, upload-time = "2026-04-22T20:58:57.093Z" }, - { url = "https://files.pythonhosted.org/packages/e0/56/1ee91c3a2bc10ca1f36da10a6fd55ff7efc4dec367171eb25992a827874f/pyinstaller-6.20.0-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:0ab39c690abad26ba148e8f664f0478acc82a733997f4f22e757774832802da9", size = 747413, upload-time = "2026-04-22T20:59:01.174Z" }, - { url = "https://files.pythonhosted.org/packages/d7/55/ae264339996953c4cdf9d89d916a0a8fa26a83cf917a742fff8b9d5f3fe8/pyinstaller-6.20.0-py3-none-win32.whl", hash = "sha256:9a7637e8e44b4387b13667fdcaac86ab6b29c446c16d34d8401539b81838759c", size = 1331584, upload-time = "2026-04-22T20:59:07.201Z" }, - { url = "https://files.pythonhosted.org/packages/76/8c/300f57578882cce259bfb5ae56fda3b69caa3fe9df40a176c719920ea6e2/pyinstaller-6.20.0-py3-none-win_amd64.whl", hash = "sha256:d588844e890ee80c4365867f98146636e1849bbca8e4284bbf0c809aff0f161a", size = 1391851, upload-time = "2026-04-22T20:59:14.024Z" }, - { url = "https://files.pythonhosted.org/packages/8a/ea/b2f8e1642aecda78c0b75c7321f708e49e10bb3c00dd4f148c40761a1527/pyinstaller-6.20.0-py3-none-win_arm64.whl", hash = "sha256:bd53282c0a73e5c95573e1ddc8e5d564d4932bec91efbaed4dc5fdff9c2ae7f2", size = 1332259, upload-time = "2026-04-22T20:59:20.509Z" }, -] - -[[package]] -name = "pyinstaller-hooks-contrib" -version = "2026.4" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "packaging" }, - { name = "setuptools" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/c7/fe/9278c29394bf69169febc21f96b4252c3ee7c8ec22c2fc545004bed47e71/pyinstaller_hooks_contrib-2026.4.tar.gz", hash = "sha256:766c281acb1ecc32e21c8c667056d7ebf5da0aabd5e30c219f9c2a283620eeaa", size = 173050, upload-time = "2026-03-31T14:10:51.188Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/88/f4/035fb8c06deff827f540a9a4ed9122c54e5376fca3e42eddf0c263730775/pyinstaller_hooks_contrib-2026.4-py3-none-any.whl", hash = "sha256:1de1a5e49a878122010b88c7e295502bc69776c157c4a4dc78741a4e6178b00f", size = 455496, upload-time = "2026-03-31T14:10:49.867Z" }, -] - [[package]] name = "pyparsing" version = "3.3.2" @@ -1332,54 +1241,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/10/bd/c038d7cc38edc1aa5bf91ab8068b63d4308c66c4c8bb3cbba7dfbc049f9c/pyparsing-3.3.2-py3-none-any.whl", hash = "sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d", size = 122781, upload-time = "2026-01-21T03:57:55.912Z" }, ] -[[package]] -name = "pyside6" -version = "6.11.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pyside6-addons" }, - { name = "pyside6-essentials" }, - { name = "shiboken6" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/f4/95/f3f5a2799163b6658126d78a85bc1dec9eda88c75c26780556b26071a1d8/pyside6-6.11.0-cp310-abi3-macosx_13_0_universal2.whl", hash = "sha256:1f2735dc4f2bd4ec452ae50502c8a22128bba0aced35358a2bbc58384b820c6f", size = 571544, upload-time = "2026-03-23T12:47:20.263Z" }, - { url = "https://files.pythonhosted.org/packages/da/89/9a1f521051714e6694ebbe2b979ded279845ec8e25cb309ca3960158d74f/pyside6-6.11.0-cp310-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:c642e2d25704ca746fd37f56feacf25c5aecc4cd40bef23d18eec81f87d9dc00", size = 571725, upload-time = "2026-03-23T12:47:21.727Z" }, - { url = "https://files.pythonhosted.org/packages/c2/3d/f779d8bba00fcde31a7d7fb6b59347a70773c9cc8135592dea9972579877/pyside6-6.11.0-cp310-abi3-manylinux_2_39_aarch64.whl", hash = "sha256:267b344c73580ac938ca63c611881fb42a3922ebfe043e271005f4f06c372c4e", size = 571722, upload-time = "2026-03-23T12:47:22.761Z" }, - { url = "https://files.pythonhosted.org/packages/ac/98/150e01a026df3e9697310236821fa825319bb4b9d6137539cb25a3032968/pyside6-6.11.0-cp310-abi3-win_amd64.whl", hash = "sha256:9092cb002ca43c64006afb2e0d0f6f51aef17aa737c33a45e502326a081ddcbc", size = 577988, upload-time = "2026-03-23T12:47:23.795Z" }, - { url = "https://files.pythonhosted.org/packages/50/e7/55960f7c6b41d058e95cb4af02652c46c48702c506c8bbf12e99550e1fb3/pyside6-6.11.0-cp310-abi3-win_arm64.whl", hash = "sha256:b15f39acc2b8f46251a630acad0d97f9a0a0461f2baffcd66d7adfada8eb641e", size = 561372, upload-time = "2026-03-23T12:47:25.073Z" }, -] - -[[package]] -name = "pyside6-addons" -version = "6.11.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pyside6-essentials" }, - { name = "shiboken6" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/c0/df/241f311c61a46b7b1195927da77b2537692ee3442aa9ccd87981164ff78d/pyside6_addons-6.11.0-cp310-abi3-macosx_13_0_universal2.whl", hash = "sha256:d5eaa4643302e3a0fa94c5766234bee4073d7d5ab9c2b7fd222692a176faf182", size = 331554157, upload-time = "2026-03-23T12:40:40.497Z" }, - { url = "https://files.pythonhosted.org/packages/31/b9/e81172835ccc9d8b9792cc6bf7524a252a0db9a76ddd693de230402697f9/pyside6_addons-6.11.0-cp310-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:ac6fe3d4ef4497dde3efc5e896b0acd53ff6c93be4bf485f045690f919419f35", size = 174948482, upload-time = "2026-03-23T12:41:05.379Z" }, - { url = "https://files.pythonhosted.org/packages/a8/a4/426d9333782bf65ab2a20257d6b4b3af9b8d5d7a710da719865fab49d492/pyside6_addons-6.11.0-cp310-abi3-manylinux_2_39_aarch64.whl", hash = "sha256:8ffb40222456078930816ebcac2f2511716d2acbc11716dd5acc5c365179a753", size = 170430798, upload-time = "2026-03-23T12:41:38.134Z" }, - { url = "https://files.pythonhosted.org/packages/35/9a/46d271fedfabad8c6dce2ebb69bb593745487ed33753a56a47c3ba4fdb1c/pyside6_addons-6.11.0-cp310-abi3-win_amd64.whl", hash = "sha256:413e6121c24f5ffdce376298059eddecff74aa6d638e94e0f6015b33d29b889e", size = 168723088, upload-time = "2026-03-23T12:42:00.668Z" }, - { url = "https://files.pythonhosted.org/packages/16/cd/1b28264f7dc9a642da2e4e7c02f67418d0949eb7ce329ae20869703c2630/pyside6_addons-6.11.0-cp310-abi3-win_arm64.whl", hash = "sha256:aaaee83385977a0fe134b2f4fbfb92b45a880d5b656e4d90a708eef10b1b6de8", size = 35698324, upload-time = "2026-03-23T12:42:13.748Z" }, -] - -[[package]] -name = "pyside6-essentials" -version = "6.11.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "shiboken6" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/d2/00/8a8583d3429c737cc20e61a43eba8ab1ec13ddb101e99802c2ffeedf3b41/pyside6_essentials-6.11.0-cp310-abi3-macosx_13_0_universal2.whl", hash = "sha256:85d6ca87ef35fa6565d385ede72ae48420dd3f63113929d10fc800f6b0360e01", size = 108085251, upload-time = "2026-03-23T12:42:52.872Z" }, - { url = "https://files.pythonhosted.org/packages/f3/a9/07c9e5c014b871c1b19caf8f994bcd50b345559b81f81671217b49559b67/pyside6_essentials-6.11.0-cp310-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:dc20e7afd5fc6fe51297db91cef997ce60844be578f7a49fc61b7ab9657a8849", size = 78316055, upload-time = "2026-03-23T12:43:04.19Z" }, - { url = "https://files.pythonhosted.org/packages/7c/35/f06b1b641d7600ec46374c16cd37c66fa4a22870326b4eb073a95471035f/pyside6_essentials-6.11.0-cp310-abi3-manylinux_2_39_aarch64.whl", hash = "sha256:4854cb0a1b061e7a576d8fb7bb7cf9f49540d558b1acb7df0742a7afefe61e4e", size = 77380821, upload-time = "2026-03-23T12:43:24.649Z" }, - { url = "https://files.pythonhosted.org/packages/ff/37/ba95c6262836d2b286b4e05a9d16a5e870995d5d2503ac6adc6312208049/pyside6_essentials-6.11.0-cp310-abi3-win_amd64.whl", hash = "sha256:3b3362882ad9389357a80504e600180006a957731fec05786fced7b038461fdf", size = 75793322, upload-time = "2026-03-23T12:43:35.575Z" }, - { url = "https://files.pythonhosted.org/packages/53/27/d17f25e45820e633a70e6109b35991eda09a5e8000c2a306f0ab7538d48c/pyside6_essentials-6.11.0-cp310-abi3-win_arm64.whl", hash = "sha256:81ca603dbf21bc39f89bb42db215c25ebe0c879a1a4c387625c321d2730ec187", size = 56337457, upload-time = "2026-03-23T12:43:43.573Z" }, -] - [[package]] name = "pytest" version = "9.0.3" @@ -1438,15 +1299,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9a/22/f1925cdda983ab66fc8ec6ec8014b959262747e58bdca26a4e3d1da29d56/python_multipart-0.0.26-py3-none-any.whl", hash = "sha256:c0b169f8c4484c13b0dcf2ef0ec3a4adb255c4b7d18d8e420477d2b1dd03f185", size = 28847, upload-time = "2026-04-10T14:09:58.131Z" }, ] -[[package]] -name = "pywin32-ctypes" -version = "0.2.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/85/9f/01a1a99704853cb63f253eea009390c88e7131c67e66a0a02099a8c917cb/pywin32-ctypes-0.2.3.tar.gz", hash = "sha256:d162dc04946d704503b2edc4d55f3dba5c1d539ead017afa00142c38b9885755", size = 29471, upload-time = "2024-08-14T10:15:34.626Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/de/3d/8161f7711c017e01ac9f008dfddd9410dff3674334c233bde66e7ba65bbf/pywin32_ctypes-0.2.3-py3-none-any.whl", hash = "sha256:8a1513379d709975552d202d942d9837758905c8d01eb82b8bcc30918929e7b8", size = 30756, upload-time = "2024-08-14T10:15:33.187Z" }, -] - [[package]] name = "pyyaml" version = "6.0.3" @@ -1571,18 +1423,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e1/e3/c164c88b2e5ce7b24d667b9bd83589cf4f3520d97cad01534cd3c4f55fdb/setuptools-81.0.0-py3-none-any.whl", hash = "sha256:fdd925d5c5d9f62e4b74b30d6dd7828ce236fd6ed998a08d81de62ce5a6310d6", size = 1062021, upload-time = "2026-02-06T21:10:37.175Z" }, ] -[[package]] -name = "shiboken6" -version = "6.11.0" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/82/1d/b56b7b694fbc871496435488d1f41c5068de546334850d722756511cef65/shiboken6-6.11.0-cp310-abi3-macosx_13_0_universal2.whl", hash = "sha256:d88e8a1eb705f2b9ad21db08a61ae1dc0c773e5cd86a069de0754c4cf1f9b43b", size = 476085, upload-time = "2026-03-23T12:47:05.724Z" }, - { url = "https://files.pythonhosted.org/packages/65/cb/4bb0c76011166230daa7c0074aeb3fdb3935c83ac1fef3789b85fcd1a8fc/shiboken6-6.11.0-cp310-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:ad54e64f8192ddbdff0c54ac82b89edcd62ed623f502ea21c960541d19514053", size = 271055, upload-time = "2026-03-23T12:47:07.349Z" }, - { url = "https://files.pythonhosted.org/packages/f5/96/771a6e2b530f725303d16d78a321fa4876b98b4f3615c9851880df8c1a43/shiboken6-6.11.0-cp310-abi3-manylinux_2_39_aarch64.whl", hash = "sha256:a10dc7718104ea2dc15d5b0b96909b77162ce1c76fcc6968e6df692b947a00e9", size = 267456, upload-time = "2026-03-23T12:47:08.689Z" }, - { url = "https://files.pythonhosted.org/packages/72/f7/44c0c42c3f5f29dec457fd46ea0552174bcb8aa75becf03bbd90308ba07b/shiboken6-6.11.0-cp310-abi3-win_amd64.whl", hash = "sha256:483ff78a73c7b3189ca924abc694318084f078bcfeaffa68e32024ff2d025ee1", size = 1222132, upload-time = "2026-03-23T12:47:10.143Z" }, - { url = "https://files.pythonhosted.org/packages/fb/99/6e5ee21db2d6af84bbbd7d871d441dafeb069c6de5667b1aa49891a77c66/shiboken6-6.11.0-cp310-abi3-win_arm64.whl", hash = "sha256:3bd76cf56105ab2d62ecaff630366f11264f69b88d488f10f048da9a065781f4", size = 1783186, upload-time = "2026-03-23T12:47:11.832Z" }, -] - [[package]] name = "six" version = "1.17.0" @@ -1592,22 +1432,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, ] -[[package]] -name = "sounddevice" -version = "0.5.5" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "cffi" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/2a/f9/2592608737553638fca98e21e54bfec40bf577bb98a61b2770c912aab25e/sounddevice-0.5.5.tar.gz", hash = "sha256:22487b65198cb5bf2208755105b524f78ad173e5ab6b445bdab1c989f6698df3", size = 143191, upload-time = "2026-01-23T18:36:43.529Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/1e/0a/478e441fd049002cf308520c0d62dd8333e7c6cc8d997f0dda07b9fbcc46/sounddevice-0.5.5-py3-none-any.whl", hash = "sha256:30ff99f6c107f49d25ad16a45cacd8d91c25a1bcdd3e81a206b921a3a6405b1f", size = 32807, upload-time = "2026-01-23T18:36:35.649Z" }, - { url = "https://files.pythonhosted.org/packages/56/f9/c037c35f6d0b6bc3bc7bfb314f1d6f1f9a341328ef47cd63fc4f850a7b27/sounddevice-0.5.5-py3-none-macosx_10_6_x86_64.macosx_10_6_universal2.whl", hash = "sha256:05eb9fd6c54c38d67741441c19164c0dae8ce80453af2d8c4ad2e7823d15b722", size = 108557, upload-time = "2026-01-23T18:36:37.41Z" }, - { url = "https://files.pythonhosted.org/packages/88/a1/d19dd9889cd4bce2e233c4fac007cd8daaf5b9fe6e6a5d432cf17be0b807/sounddevice-0.5.5-py3-none-win32.whl", hash = "sha256:1234cc9b4c9df97b6cbe748146ae0ec64dd7d6e44739e8e42eaa5b595313a103", size = 317765, upload-time = "2026-01-23T18:36:39.047Z" }, - { url = "https://files.pythonhosted.org/packages/c3/0e/002ed7c4c1c2ab69031f78989d3b789fee3a7fba9e586eb2b81688bf4961/sounddevice-0.5.5-py3-none-win_amd64.whl", hash = "sha256:cfc6b2c49fb7f555591c78cb8ecf48d6a637fd5b6e1db5fec6ed9365d64b3519", size = 365324, upload-time = "2026-01-23T18:36:40.496Z" }, - { url = "https://files.pythonhosted.org/packages/4e/39/a61d4b83a7746b70d23d9173be688c0c6bfc7173772344b7442c2c155497/sounddevice-0.5.5-py3-none-win_arm64.whl", hash = "sha256:3861901ddd8230d2e0e8ae62ac320cdd4c688d81df89da036dcb812f757bb3e6", size = 317115, upload-time = "2026-01-23T18:36:42.235Z" }, -] - [[package]] name = "sqlalchemy" version = "2.0.49" @@ -1783,6 +1607,23 @@ wheels = [ { url = "https://download-r2.pytorch.org/whl/cpu/torchvision-0.26.0%2Bcpu-cp314-cp314t-win_amd64.whl", hash = "sha256:6139108231a29ffb607931360ee24594553a939467c65530f734a2ed9918f011", upload-time = "2026-03-23T15:36:09Z" }, ] +[[package]] +name = "tornado" +version = "6.5.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f8/f1/3173dfa4a18db4a9b03e5d55325559dab51ee653763bb8745a75af491286/tornado-6.5.5.tar.gz", hash = "sha256:192b8f3ea91bd7f1f50c06955416ed76c6b72f96779b962f07f911b91e8d30e9", size = 516006, upload-time = "2026-03-10T21:31:02.067Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/59/8c/77f5097695f4dd8255ecbd08b2a1ed8ba8b953d337804dd7080f199e12bf/tornado-6.5.5-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:487dc9cc380e29f58c7ab88f9e27cdeef04b2140862e5076a66fb6bb68bb1bfa", size = 445983, upload-time = "2026-03-10T21:30:44.28Z" }, + { url = "https://files.pythonhosted.org/packages/ab/5e/7625b76cd10f98f1516c36ce0346de62061156352353ef2da44e5c21523c/tornado-6.5.5-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:65a7f1d46d4bb41df1ac99f5fcb685fb25c7e61613742d5108b010975a9a6521", size = 444246, upload-time = "2026-03-10T21:30:46.571Z" }, + { url = "https://files.pythonhosted.org/packages/b2/04/7b5705d5b3c0fab088f434f9c83edac1573830ca49ccf29fb83bf7178eec/tornado-6.5.5-cp39-abi3-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:e74c92e8e65086b338fd56333fb9a68b9f6f2fe7ad532645a290a464bcf46be5", size = 447229, upload-time = "2026-03-10T21:30:48.273Z" }, + { url = "https://files.pythonhosted.org/packages/34/01/74e034a30ef59afb4097ef8659515e96a39d910b712a89af76f5e4e1f93c/tornado-6.5.5-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:435319e9e340276428bbdb4e7fa732c2d399386d1de5686cb331ec8eee754f07", size = 448192, upload-time = "2026-03-10T21:30:51.22Z" }, + { url = "https://files.pythonhosted.org/packages/be/00/fe9e02c5a96429fce1a1d15a517f5d8444f9c412e0bb9eadfbe3b0fc55bf/tornado-6.5.5-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:3f54aa540bdbfee7b9eb268ead60e7d199de5021facd276819c193c0fb28ea4e", size = 448039, upload-time = "2026-03-10T21:30:53.52Z" }, + { url = "https://files.pythonhosted.org/packages/82/9e/656ee4cec0398b1d18d0f1eb6372c41c6b889722641d84948351ae19556d/tornado-6.5.5-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:36abed1754faeb80fbd6e64db2758091e1320f6bba74a4cf8c09cd18ccce8aca", size = 447445, upload-time = "2026-03-10T21:30:55.541Z" }, + { url = "https://files.pythonhosted.org/packages/5a/76/4921c00511f88af86a33de770d64141170f1cfd9c00311aea689949e274e/tornado-6.5.5-cp39-abi3-win32.whl", hash = "sha256:dd3eafaaeec1c7f2f8fdcd5f964e8907ad788fe8a5a32c4426fbbdda621223b7", size = 448582, upload-time = "2026-03-10T21:30:57.142Z" }, + { url = "https://files.pythonhosted.org/packages/2c/23/f6c6112a04d28eed765e374435fb1a9198f73e1ec4b4024184f21faeb1ad/tornado-6.5.5-cp39-abi3-win_amd64.whl", hash = "sha256:6443a794ba961a9f619b1ae926a2e900ac20c34483eea67be4ed8f1e58d3ef7b", size = 448990, upload-time = "2026-03-10T21:30:58.857Z" }, + { url = "https://files.pythonhosted.org/packages/b7/c8/876602cbc96469911f0939f703453c1157b0c826ecb05bdd32e023397d4e/tornado-6.5.5-cp39-abi3-win_arm64.whl", hash = "sha256:2c9a876e094109333f888539ddb2de4361743e5d21eece20688e3e351e4990a6", size = 448016, upload-time = "2026-03-10T21:31:00.43Z" }, +] + [[package]] name = "typing-extensions" version = "4.15.0" @@ -1967,15 +1808,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e3/bd/fa9bb053192491b3867ba07d2343d9f2252e00811567d30ae8d0f78136fe/watchfiles-1.1.1-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:a916a2932da8f8ab582f242c065f5c81bed3462849ca79ee357dd9551b0e9b01", size = 622112, upload-time = "2025-10-14T15:05:50.941Z" }, ] -[[package]] -name = "websocket-client" -version = "1.9.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/2c/41/aa4bf9664e4cda14c3b39865b12251e8e7d239f4cd0e3cc1b6c2ccde25c1/websocket_client-1.9.0.tar.gz", hash = "sha256:9e813624b6eb619999a97dc7958469217c3176312b3a16a4bd1bc7e08a46ec98", size = 70576, upload-time = "2025-10-07T21:16:36.495Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/34/db/b10e48aa8fff7407e67470363eac595018441cf32d5e1001567a7aeba5d2/websocket_client-1.9.0-py3-none-any.whl", hash = "sha256:af248a825037ef591efbf6ed20cc5faa03d3b47b9e5a2230a529eeee1c1fc3ef", size = 82616, upload-time = "2025-10-07T21:16:34.951Z" }, -] - [[package]] name = "websockets" version = "16.0" diff --git a/voice_client.spec b/voice_client.spec deleted file mode 100644 index 519fd75..0000000 --- a/voice_client.spec +++ /dev/null @@ -1,56 +0,0 @@ -# PyInstaller spec for the voice confirmation desktop client. -# Build on the target OS (Windows / macOS / Linux). -# Usage: uv run --group voice-client-build pyinstaller voice_client.spec - -from PyInstaller.utils.hooks import collect_all - -datas, binaries, hiddenimports = collect_all("PySide6") - -block_cipher = None - -a = Analysis( - ["voice_confirmation_client/__main__.py"], - pathex=[], - binaries=binaries, - datas=datas, - hiddenimports=hiddenimports + ["sounddevice", "numpy", "websocket"], - hookspath=[], - hooksconfig={}, - runtime_hooks=[], - excludes=[], - win_no_prefer_redirects=False, - win_private_assemblies=False, - cipher=block_cipher, - noarchive=False, -) - -pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher) - -exe = EXE( - pyz, - a.scripts, - [], - exclude_binaries=True, - name="voice-confirmation-client", - debug=False, - bootloader_ignore_signals=False, - strip=False, - upx=False, - console=False, - disable_windowed_traceback=False, - argv_emulation=False, - target_arch=None, - codesign_identity=None, - entitlements_file=None, -) - -coll = COLLECT( - exe, - a.binaries, - a.zipfiles, - a.datas, - strip=False, - upx=False, - upx_exclude=[], - name="voice-confirmation-client", -) diff --git a/voice_confirmation_client/README.md b/voice_confirmation_client/README.md deleted file mode 100644 index 45f1cf4..0000000 --- a/voice_confirmation_client/README.md +++ /dev/null @@ -1,99 +0,0 @@ -# 手术室耗材语音确认客户端(桌面版) - -独立桌面程序:按可配置间隔(默认 **5 秒**)轮询 `GET /client/surgeries/{surgery_id}/pending-confirmation`,播放服务端返回的 **MP3 话术**,录制医生麦克风为 **16 kHz 单声道 WAV**,并调用 `POST .../pending-confirmation/{confirmation_id}/resolve`(`multipart` 字段名 `audio`)。协议与 `[docs/客户端手术通信接口说明.md](../docs/客户端手术通信接口说明.md)` 一致。 - -## 环境 - -- Python **3.13+**(与主项目一致) -- 安装可选依赖组 **voice-client**(PySide6、httpx、numpy、sounddevice、websocket-client) - -```bash -cd /path/to/operation-room-monitor-server -uv sync --group voice-client -``` - -## 运行(开发态) - -未配置项目 `build-system` 时,`uv` 可能不会注册 `voice-confirmation-client` 命令,推荐: - -```bash -./start_voice_confirmation_client.sh -``` - -或在仓库根目录: - -```bash -uv run --group voice-client python -m voice_confirmation_client -``` - -Windows(仓库根目录): - -```bat -start_voice_confirmation_client.bat -``` - -若 entry point 已可用,也可: - -```bash -uv run --group voice-client voice-confirmation-client -``` - -**术间 / 摄像头 / 语音终端对应关系**只在服务端 `**OR_SITE_CONFIG_JSON_FILE`** 里维护一份(`voice_or_room_bindings`);桌面程序不读该文件。 - -本机要做的只有两件事:**服务端 Base URL**,以及 **本机语音终端 ID**(须等于 JSON 里某条 `voice_terminal_id`)。**手术号不在客户端输入**:勾选 **启用服务端自动指派** 后,开录/停录仅通过 **WebSocket** `voice_assignment` 下发(断线后自动重连,不用 HTTP 轮询);当前手术号在**窗口标题**中显示;停录后自动停止。可用 **停止监控(本机)** 做本地紧急中断。 - -### 配置文件(系统级 + 用户级) - -字段均为 **UTF-8 JSON**(`voice_terminal_id`、`http_base_url`,均可选)。启动时 **合并** 两层配置:**用户级覆盖系统级** 同名字段。 - -| 层级 | 用途 | 默认路径 | -|------|------|----------| -| **系统级** | 运维装机下发(只读亦可) | **Windows** `%ProgramData%\OperationRoomMonitor\voice_client.json`;**macOS** `/Library/Application Support/OperationRoomMonitor/voice_client.json`;**Linux** `/etc/operation-room-monitor/voice_client.json` | -| **用户级** | 在界面修改「服务端 Base URL」或「本机语音终端 ID」并 **离开输入框** 后自动保存 | **Windows** `%LOCALAPPDATA%\OperationRoomMonitor\voice_client.json`;**macOS** `~/Library/Application Support/OperationRoomMonitor/voice_client.json`;**Linux** `~/.config/operation-room-monitor/voice_client.json` | - -测试或定制安装可用环境变量 **`VOICE_CLIENT_MACHINE_CONFIG_FILE`**、**`VOICE_CLIENT_USER_CONFIG_FILE`** 分别覆盖上述两个路径。 - -示例见 `voice_confirmation_client/resources/voice_client.sample.json`(通常用作系统级模板)。 - -## 日志(loguru) - -客户端使用 **loguru**:**右侧日志区**与**启动终端 stderr** 会同时输出。开录无反应时请看是否出现「本机语音终端 ID 为空」、`WebSocket 已连接`、`收到 voice_assignment start`、或反复「WebSocket 断开」等行。 - -## 音频说明 - -- **播放 MP3**:优先使用本机 `ffplay`(ffmpeg),其次 macOS 使用 `afplay`;可将 `ffplay` 放到 `voice_confirmation_client/bin/`(与包同级目录下的 `bin/`)以便离线环境使用。 -- **录音**:默认使用 **sounddevice** 录制并重采样为 16 kHz 单声道 WAV(与浏览器 Demo 一致)。可选勾选 **优先使用 ffmpeg 录音**(依赖本机 ffmpeg 及可用的设备参数;Windows 默认设备名可能需按现场调整,见 `voice_confirmation_client/core/record.py` 中 `default_ffmpeg_input_args`)。 - -## 打包(PyInstaller) - -在 **目标操作系统** 上构建(不要交叉编译 Qt 桌面程序)。 - -```bash -uv sync --group voice-client-build -uv run --group voice-client-build pyinstaller voice_client.spec --noconfirm -# 或 -uv run --group voice-client-build python scripts/build_voice_client.py -``` - -**Windows 一键打包(仓库根目录)**:双击或在 `cmd` 中执行 `build_voice_confirmation_client.bat`;需要干净构建时加参数 `--clean`(会先删除 `build/`、`dist/`)。 - -产物目录:`dist/voice-confirmation-client/`(目录分发,内含可执行文件)。Windows 下可执行文件为 `voice-confirmation-client.exe`。 - -**说明**: - -- 体积较大(含 PySide6);杀毒软件可能对 PyInstaller 打包的 exe 误报,可向医院 IT 申请加白。 -- **macOS**:未签名/未公证的 `.app` 可能需在「隐私与安全性」中手动允许;正式发布需 Apple 开发者签名与公证。 -- **可选**:将 `ffmpeg`/`ffplay` 二进制放入打包目录下的 `voice_confirmation_bin/`,程序会优先使用(需在 spec 中增加 `datas` 将该目录打入包内,或手动复制到分发目录)。 - -## 术间排查 - -1. **网络**:客户端机器能访问监控服务 HTTP/HTTPS 端口(默认文档为 `38080`)。 -2. **麦克风**:在「输入设备」中选择正确设备;无列表时检查系统隐私权限(麦克风)。 -3. **无待确认**:轮询返回 404 为常态;可关闭「隐藏 404 轮询日志」观察请求节奏。 -4. **解析失败**:使用 **重试本轮** 重新播放 + 录音 + 上传;或使用 **仅重播话术** 听清提示。 - -## 与浏览器 Demo 的差异 - -- 浏览器 Demo(`scripts/demo_client/`)默认 **10 秒** 轮询;本客户端默认 **5 秒**,可在界面修改。 -- 本客户端无「开始/结束手术」按钮;手术需由既有流程或他端调用 `POST /client/surgeries/start` 启动;若启用自动指派,开录成功后本机将自动开始待确认轮询。 - diff --git a/voice_confirmation_client/__init__.py b/voice_confirmation_client/__init__.py deleted file mode 100644 index bf0b1eb..0000000 --- a/voice_confirmation_client/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -"""Desktop voice confirmation client for OR monitor API (pending-confirmation loop).""" - -__version__ = "0.1.0" diff --git a/voice_confirmation_client/__main__.py b/voice_confirmation_client/__main__.py deleted file mode 100644 index 2c6831d..0000000 --- a/voice_confirmation_client/__main__.py +++ /dev/null @@ -1,35 +0,0 @@ -"""Entry: `python -m voice_confirmation_client` or `voice-confirmation-client`.""" - -from __future__ import annotations - -import signal -import sys - - -def main() -> None: - from PySide6.QtCore import QTimer - from PySide6.QtWidgets import QApplication - - from voice_confirmation_client.gui.main_window import MainWindow - - app = QApplication(sys.argv) - win = MainWindow() - win.show() - - app.aboutToQuit.connect(win.shutdown) - - # Qt 事件循环长时间跑在 native 代码里时,Python 无法处理 SIGINT;定时器让解释器周期性醒来。 - _pulse = QTimer() - _pulse.timeout.connect(lambda: None) - _pulse.start(200) - - def _on_sigint(_signum: int, _frame: object | None) -> None: - app.quit() - - signal.signal(signal.SIGINT, _on_sigint) - - raise SystemExit(app.exec()) - - -if __name__ == "__main__": - main() diff --git a/voice_confirmation_client/core/__init__.py b/voice_confirmation_client/core/__init__.py deleted file mode 100644 index a31dd22..0000000 --- a/voice_confirmation_client/core/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from voice_confirmation_client.core.monitor_worker import MonitorWorker - -__all__ = ["MonitorWorker"] diff --git a/voice_confirmation_client/core/api.py b/voice_confirmation_client/core/api.py deleted file mode 100644 index 1906747..0000000 --- a/voice_confirmation_client/core/api.py +++ /dev/null @@ -1,87 +0,0 @@ -"""HTTP client for pending-confirmation and resolve endpoints.""" - -from __future__ import annotations - -import json -from dataclasses import dataclass -from typing import Any -from urllib.parse import quote, urljoin - -import httpx - - -@dataclass -class PendingConfirmationPayload: - surgery_id: str - confirmation_id: str - prompt_text: str - prompt_audio_mp3_base64: str - options: list[dict[str, Any]] - model_top1_label: str - model_top1_confidence: float - created_at: str - raw: dict[str, Any] - - -class ConfirmationApiClient: - def __init__(self, base_url: str, timeout: float = 60.0) -> None: - self._base = base_url.rstrip("/") + "/" - self._timeout = timeout - self._client = httpx.Client(timeout=timeout) - - @property - def base_url_normalized(self) -> str: - return self._base - - def close(self) -> None: - self._client.close() - - def _url(self, path: str) -> str: - return urljoin(self._base, path.lstrip("/")) - - def get_pending(self, surgery_id: str) -> tuple[int, dict[str, Any] | str]: - url = self._url(f"client/surgeries/{surgery_id}/pending-confirmation") - r = self._client.get(url) - text = r.text - if not text: - return r.status_code, {} - try: - body: dict[str, Any] | str = json.loads(text) - except json.JSONDecodeError: - body = text - return r.status_code, body - - def parse_pending(self, body: dict[str, Any]) -> PendingConfirmationPayload: - return PendingConfirmationPayload( - surgery_id=str(body.get("surgery_id", "")), - confirmation_id=str(body["confirmation_id"]), - prompt_text=str(body.get("prompt_text", "")), - prompt_audio_mp3_base64=str(body.get("prompt_audio_mp3_base64", "")), - options=list(body.get("options") or []), - model_top1_label=str(body.get("model_top1_label", "")), - model_top1_confidence=float(body.get("model_top1_confidence", 0.0)), - created_at=str(body.get("created_at", "")), - raw=body, - ) - - def post_resolve( - self, - surgery_id: str, - confirmation_id: str, - wav_bytes: bytes, - filename: str = "voice.wav", - ) -> tuple[int, dict[str, Any] | str]: - cid_enc = quote(confirmation_id, safe="") - url = self._url( - f"client/surgeries/{surgery_id}/pending-confirmation/{cid_enc}/resolve" - ) - files = {"audio": (filename, wav_bytes, "audio/wav")} - r = self._client.post(url, files=files) - text = r.text - if not text: - return r.status_code, {} - try: - body: dict[str, Any] | str = json.loads(text) - except json.JSONDecodeError: - body = text - return r.status_code, body diff --git a/voice_confirmation_client/core/assignment_listener.py b/voice_confirmation_client/core/assignment_listener.py deleted file mode 100644 index 8408912..0000000 --- a/voice_confirmation_client/core/assignment_listener.py +++ /dev/null @@ -1,143 +0,0 @@ -"""WebSocket:接收服务端 voice_assignment(开录/停录自动启停监控)。""" - -from __future__ import annotations - -import json -import threading -import time -from collections.abc import Callable -from typing import Any -from urllib.parse import quote, urlparse, urlunparse - -from loguru import logger - - -def http_base_to_ws_root(http_base: str) -> str: - p = urlparse(http_base.strip()) - scheme = "wss" if p.scheme == "https" else "ws" - return urlunparse((scheme, p.netloc, "", "", "", "")) - - -class VoiceAssignmentListener: - """后台线程:仅 WebSocket;断线后等待一小段时间再重连。""" - - def __init__( - self, - *, - http_base_url: str, - terminal_id: str, - on_start: Callable[[str], None], - on_end: Callable[[str], None], - reconnect_delay_sec: float = 2.0, - ) -> None: - self._http_base = http_base_url.rstrip("/") - self._terminal_id = terminal_id.strip() - self._on_start = on_start - self._on_end = on_end - self._reconnect_delay = reconnect_delay_sec - self._stop = threading.Event() - self._thread: threading.Thread | None = None - self._last_assignment: str | None = None - - @property - def terminal_id(self) -> str: - return self._terminal_id - - def start(self) -> None: - if not self._terminal_id: - logger.warning("未配置 terminal_id,跳过语音终端指派监听") - return - if self._thread and self._thread.is_alive(): - logger.debug("指派监听线程已在运行,忽略重复 start") - return - self._stop.clear() - self._thread = threading.Thread(target=self._run, name="VoiceAssignment", daemon=True) - self._thread.start() - logger.info( - "已启动指派监听(仅 WebSocket)terminal_id={!r} base={!r}", - self._terminal_id, - self._http_base, - ) - - def stop(self) -> None: - self._stop.set() - logger.debug("已请求停止指派监听线程") - - def _run(self) -> None: - import websocket - - ws_root = http_base_to_ws_root(self._http_base) - path = f"/client/voice-terminals/ws?terminal_id={quote(self._terminal_id)}" - ws_url = ws_root.rstrip("/") + path - logger.info("WebSocket 目标: {}", ws_url) - - while not self._stop.is_set(): - try: - - def _on_open(_ws: Any) -> None: - logger.info("WebSocket 已连接 terminal_id={!r}", self._terminal_id) - logger.info( - "若开录后仍无 voice_assignment:核对本机 ID 与 OR_SITE_CONFIG「voice_or_room_bindings」、" - "开录请求 camera_ids 能否解析到该终端;开录须 POST /client/surgeries/start " - "或(联调)POST /internal/demo/orchestrate-and-start。" - ) - - def _on_close(_ws: Any, close_status_code: Any, close_msg: Any) -> None: - logger.warning( - "WebSocket 断开 code={} msg={!r}", - close_status_code, - close_msg, - ) - - def _on_error(_ws: Any, err: Any) -> None: - if err is None: - return - if type(err).__name__ == "ABNF": - logger.debug("WebSocket 内部帧回调(已忽略): {}", type(err).__name__) - return - logger.warning("WebSocket 错误: {}", err) - - ws = websocket.WebSocketApp( - ws_url, - on_open=_on_open, - on_close=_on_close, - on_message=self._ws_on_message, - on_error=_on_error, - ) - ws.run_forever(ping_interval=None, ping_timeout=None) - except Exception as exc: - logger.exception("WebSocket run_forever 异常: {}", exc) - - if self._stop.is_set(): - break - - logger.info( - "{:.1f}s 后重连 WebSocket…", - self._reconnect_delay, - ) - time.sleep(self._reconnect_delay) - - def _ws_on_message(self, _ws: Any, message: str) -> None: - try: - data = json.loads(message) - except json.JSONDecodeError: - logger.debug("WebSocket 非 JSON 消息(已忽略): {!r}", message[:200]) - return - if data.get("type") != "voice_assignment": - logger.debug("WebSocket 非 voice_assignment 消息 type={!r}", data.get("type")) - return - action = data.get("action") - sid = str(data.get("surgery_id") or "") - if not sid: - return - if action == "start": - logger.info("收到 voice_assignment start surgery_id={!r}", sid) - self._last_assignment = sid - self._on_start(sid) - elif action == "end": - logger.info("收到 voice_assignment end surgery_id={!r}", sid) - if self._last_assignment == sid: - self._last_assignment = None - self._on_end(sid) - else: - logger.debug("忽略 voice_assignment action={!r}", action) diff --git a/voice_confirmation_client/core/machine_config.py b/voice_confirmation_client/core/machine_config.py deleted file mode 100644 index 5fc50c5..0000000 --- a/voice_confirmation_client/core/machine_config.py +++ /dev/null @@ -1,104 +0,0 @@ -"""语音客户端配置:系统级(运维下发)+ 用户级(界面保存,覆盖同名字段)。 - -- ``VOICE_CLIENT_MACHINE_CONFIG_FILE``:仅改变**系统级**配置文件路径(测试或定制)。 -- ``VOICE_CLIENT_USER_CONFIG_FILE``:仅改变**用户级**配置文件路径(测试或定制)。 -""" - -from __future__ import annotations - -import json -import os -import sys -from pathlib import Path -from typing import Any - -from loguru import logger - -_DEFAULT_HTTP_BASE = "http://127.0.0.1:38080" -_CONFIG_FILENAME = "voice_client.json" - - -def machine_config_file_path() -> Path: - """系统级配置:运维部署;只读亦可。""" - override = (os.environ.get("VOICE_CLIENT_MACHINE_CONFIG_FILE") or "").strip() - if override: - return Path(override).expanduser() - if sys.platform == "win32": - base = os.environ.get("PROGRAMDATA", r"C:\ProgramData") - return Path(base) / "OperationRoomMonitor" / _CONFIG_FILENAME - if sys.platform == "darwin": - return ( - Path("/Library/Application Support/OperationRoomMonitor") - / _CONFIG_FILENAME - ) - return Path("/etc/operation-room-monitor") / _CONFIG_FILENAME - - -def user_voice_client_config_path() -> Path: - """用户级配置:当前登录用户可写,界面编辑后保存到此。""" - override = (os.environ.get("VOICE_CLIENT_USER_CONFIG_FILE") or "").strip() - if override: - return Path(override).expanduser() - if sys.platform == "win32": - base = os.environ.get("LOCALAPPDATA", str(Path.home() / "AppData/Local")) - return Path(base) / "OperationRoomMonitor" / _CONFIG_FILENAME - if sys.platform == "darwin": - return ( - Path.home() / "Library/Application Support/OperationRoomMonitor" / _CONFIG_FILENAME - ) - return Path.home() / ".config/operation-room-monitor" / _CONFIG_FILENAME - - -def _read_json_object(path: Path) -> dict[str, Any]: - if not path.is_file(): - return {} - try: - raw = path.read_text(encoding="utf-8") - data = json.loads(raw) - except OSError as exc: - logger.warning("无法读取语音客户端配置 {}: {}", path, exc) - return {} - except json.JSONDecodeError as exc: - logger.warning("语音客户端配置 JSON 无效 {}: {}", path, exc) - return {} - if not isinstance(data, dict): - logger.warning("语音客户端配置须为 JSON 对象: {}", path) - return {} - return data - - -def load_voice_client_config() -> dict[str, Any]: - """合并系统配置与用户配置;同键时用户覆盖系统。""" - system = _read_json_object(machine_config_file_path()) - user = _read_json_object(user_voice_client_config_path()) - merged: dict[str, Any] = dict(system) - merged.update(user) - return merged - - -def save_user_voice_client_config(*, voice_terminal_id: str, http_base_url: str) -> None: - """将当前界面上的连接参数写入用户级配置文件。""" - path = user_voice_client_config_path() - path.parent.mkdir(parents=True, exist_ok=True) - payload = { - "voice_terminal_id": voice_terminal_id.strip(), - "http_base_url": http_base_url.strip().rstrip("/"), - } - path.write_text( - json.dumps(payload, ensure_ascii=False, indent=2) + "\n", - encoding="utf-8", - ) - - -def voice_terminal_id_from_config(file_data: dict[str, Any]) -> str: - """合并后 dict 中的 ``voice_terminal_id``;缺省为空串。""" - v = file_data.get("voice_terminal_id") - return str(v).strip() if v is not None else "" - - -def http_base_url_from_config(file_data: dict[str, Any]) -> str: - """合并后 dict 中的 ``http_base_url``;缺省为 ``http://127.0.0.1:38080``。""" - v = file_data.get("http_base_url") - if v is not None and str(v).strip(): - return str(v).strip().rstrip("/") - return _DEFAULT_HTTP_BASE diff --git a/voice_confirmation_client/core/monitor_worker.py b/voice_confirmation_client/core/monitor_worker.py deleted file mode 100644 index 6725749..0000000 --- a/voice_confirmation_client/core/monitor_worker.py +++ /dev/null @@ -1,354 +0,0 @@ -"""Background polling + play + record + resolve (threaded, Qt-free).""" - -from __future__ import annotations - -import re -import threading -import time -from collections.abc import Callable -from dataclasses import dataclass, field -from typing import Any - -from loguru import logger - -from voice_confirmation_client.core.api import ConfirmationApiClient -from voice_confirmation_client.core.playback import play_mp3_from_base64 -from voice_confirmation_client.core.record import record_wav_16k_mono - - -@dataclass -class MonitorSettings: - base_url: str = "http://127.0.0.1:38080" - surgery_id: str = "" - interval_sec: float = 5.0 - record_seconds: float = 8.0 - dry_run: bool = False - hide_404_logs: bool = True - prefer_ffmpeg_record: bool = False - sounddevice_device: int | str | None = None - - -@dataclass -class _MutableState: - generation: int = 0 - busy: bool = False - spoken_cid: str | None = None - failed_resolve_cid: str | None = None - force_retry: bool = False - last_payload: dict[str, Any] | None = None - - -class MonitorWorker: - """Polls pending-confirmation; on new item plays MP3, records WAV, POSTs resolve.""" - - def __init__( - self, - *, - on_log: Callable[[str], None] | None = None, - on_state: Callable[[str], None] | None = None, - on_pending: Callable[[dict[str, Any] | None], None] | None = None, - ) -> None: - self._on_log = on_log - self._on_state = on_state - self._on_pending = on_pending - self._settings = MonitorSettings() - self._settings_lock = threading.Lock() - self._state = _MutableState() - self._state_lock = threading.Lock() - self._stop = threading.Event() - self._wake = threading.Event() - self._monitoring = threading.Event() - self._thread: threading.Thread | None = None - self._api: ConfirmationApiClient | None = None - self._api_base: str | None = None - self._api_lock = threading.Lock() - - def set_settings(self, **kwargs: Any) -> None: - with self._settings_lock: - old_sid = self._settings.surgery_id - for k, v in kwargs.items(): - if hasattr(self._settings, k): - setattr(self._settings, k, v) - sid_changed = ( - "surgery_id" in kwargs and self._settings.surgery_id != old_sid - ) - with self._state_lock: - self._state.generation += 1 - if sid_changed: - self._state.spoken_cid = None - self._state.failed_resolve_cid = None - self._state.last_payload = None - self._state.force_retry = False - self._emit_pending(None) - - def start_thread(self) -> None: - if self._thread and self._thread.is_alive(): - return - self._stop.clear() - self._thread = threading.Thread(target=self._run, name="VoiceMonitor", daemon=True) - self._thread.start() - - def stop_thread(self) -> None: - self._stop.set() - self._wake.set() - if self._thread: - self._thread.join(timeout=8.0) - self._thread = None - with self._api_lock: - if self._api: - self._api.close() - self._api = None - self._api_base = None - - def set_monitoring(self, active: bool) -> None: - if active: - with self._settings_lock: - sid = self._settings.surgery_id - logger.info("监控已开启 surgery_id={!r}", sid) - self._monitoring.set() - self._wake.set() - else: - logger.info("监控已关闭") - self._monitoring.clear() - with self._state_lock: - self._state.generation += 1 - - def retry_failed(self) -> None: - with self._state_lock: - self._state.force_retry = True - self._wake.set() - - def replay_prompt_only(self) -> None: - """Play last pending MP3 again (GUI button); no record/upload.""" - threading.Thread(target=self._replay_prompt_job, name="ReplayPrompt", daemon=True).start() - - def _replay_prompt_job(self) -> None: - with self._state_lock: - payload = self._state.last_payload - if not payload: - self._log("没有可重播的待确认数据") - return - b64 = payload.get("prompt_audio_mp3_base64") or "" - if not b64: - self._log("当前任务无 MP3 数据") - return - self._emit_state("播放话术(手动重播)…") - try: - play_mp3_from_base64(str(b64)) - except Exception as e: - self._log(f"重播失败: {e}") - finally: - self._emit_state("待机") - - def _log(self, msg: str) -> None: - logger.info("{}", msg) - if self._on_log: - self._on_log(msg) - - def _emit_state(self, s: str) -> None: - if self._on_state: - self._on_state(s) - - def _emit_pending(self, p: dict[str, Any] | None) -> None: - if self._on_pending: - self._on_pending(p) - - def _get_api(self, base_url: str) -> ConfirmationApiClient: - norm = base_url.rstrip("/") + "/" - with self._api_lock: - if self._api is None or self._api_base != norm: - if self._api: - self._api.close() - self._api = ConfirmationApiClient(base_url) - self._api_base = norm - return self._api - - def _run(self) -> None: - while not self._stop.is_set(): - if not self._monitoring.is_set(): - time.sleep(0.15) - continue - - with self._settings_lock: - cfg = MonitorSettings( - base_url=self._settings.base_url, - surgery_id=self._settings.surgery_id, - interval_sec=self._settings.interval_sec, - record_seconds=self._settings.record_seconds, - dry_run=self._settings.dry_run, - hide_404_logs=self._settings.hide_404_logs, - prefer_ffmpeg_record=self._settings.prefer_ffmpeg_record, - sounddevice_device=self._settings.sounddevice_device, - ) - - if not re.fullmatch(r"\d{6}", cfg.surgery_id or ""): - self._emit_state("手术号无效(需 6 位数字)") - self._wake.wait(timeout=1.0) - self._wake.clear() - continue - - api = self._get_api(cfg.base_url) - - with self._state_lock: - if self._state.busy: - self._wake.wait(timeout=0.5) - self._wake.clear() - continue - gen_before = self._state.generation - - try: - status, body = api.get_pending(cfg.surgery_id) - except Exception as e: - self._log(f"GET pending 失败: {e}") - self._wait_interval(cfg.interval_sec) - continue - - with self._state_lock: - if self._state.generation != gen_before: - continue - if self._state.busy: - continue - - if status == 404: - with self._state_lock: - self._state.last_payload = None - self._state.spoken_cid = None - self._state.failed_resolve_cid = None - self._emit_pending(None) - if not cfg.hide_404_logs: - self._log("暂无待确认") - self._emit_state("轮询中(无待确认)") - self._wait_interval(cfg.interval_sec) - continue - - if status != 200 or not isinstance(body, dict): - self._log(f"GET pending 异常 HTTP {status}: {body}") - self._wait_interval(cfg.interval_sec) - continue - - cid = str(body.get("confirmation_id") or "") - if not cid: - self._wait_interval(cfg.interval_sec) - continue - - with self._state_lock: - self._state.last_payload = body - failed = self._state.failed_resolve_cid - force = self._state.force_retry - spoken = self._state.spoken_cid - - if failed is not None and failed != cid: - self._state.failed_resolve_cid = None - self._state.force_retry = False - failed = None - - if failed == cid and not force: - self._emit_pending(body) - self._wait_interval(cfg.interval_sec) - continue - - if spoken == cid and failed is None and not force: - # Already completed pipeline for this cid without failure; server still returns same id? - self._emit_pending(body) - self._wait_interval(cfg.interval_sec) - continue - - self._state.force_retry = False - self._state.busy = True - self._state.spoken_cid = cid - - self._emit_pending(body) - - try: - self._pipeline_play_record_resolve(cfg, api, body, cid) - finally: - with self._state_lock: - self._state.busy = False - - self._wake.clear() - self._wait_interval(cfg.interval_sec) - - def _wait_interval(self, interval_sec: float) -> None: - self._wake.wait(timeout=max(0.5, interval_sec)) - self._wake.clear() - - def _pipeline_play_record_resolve( - self, - cfg: MonitorSettings, - api: ConfirmationApiClient, - body: dict[str, Any], - cid: str, - ) -> None: - gen_lock = self._state_lock - with gen_lock: - gen_run = self._state.generation - - try: - self._emit_state("播放话术…") - play_mp3_from_base64(str(body.get("prompt_audio_mp3_base64") or "")) - except Exception as e: - self._log(f"播放失败: {e}") - with gen_lock: - self._state.failed_resolve_cid = cid - self._emit_state("播放失败(可重试)") - return - - with gen_lock: - if self._state.generation != gen_run: - return - - try: - self._emit_state("录音中…") - wav = record_wav_16k_mono( - cfg.record_seconds, - device=cfg.sounddevice_device, - prefer_ffmpeg=cfg.prefer_ffmpeg_record, - ) - except Exception as e: - self._log(f"录音失败: {e}") - with gen_lock: - self._state.failed_resolve_cid = cid - self._emit_state("录音失败(可重试)") - return - - with gen_lock: - if self._state.generation != gen_run: - return - - if cfg.dry_run: - self._log(f"[dry-run] 已录音 {len(wav)} 字节,跳过上传") - with gen_lock: - self._state.failed_resolve_cid = None - self._state.spoken_cid = None - self._state.generation += 1 - self._emit_state("待机(dry-run)") - return - - try: - self._emit_state("上传识别…") - st, res = api.post_resolve(cfg.surgery_id, cid, wav) - except Exception as e: - self._log(f"POST resolve 失败: {e}") - with gen_lock: - self._state.failed_resolve_cid = cid - self._emit_state("上传失败(可重试)") - return - - if st == 200 and isinstance(res, dict) and res.get("status") == "accepted": - self._log( - f"已确认: {res.get('message', '')} " - f"(resolved_label={res.get('resolved_label')!r})" - ) - with gen_lock: - self._state.failed_resolve_cid = None - self._state.spoken_cid = None - self._state.last_payload = None - self._state.generation += 1 - self._emit_pending(None) - self._emit_state("待机") - return - - self._log(f"resolve 未接受 HTTP {st}: {res}") - with gen_lock: - self._state.failed_resolve_cid = cid - self._emit_state("解析/上传被拒(可重试)") diff --git a/voice_confirmation_client/core/paths.py b/voice_confirmation_client/core/paths.py deleted file mode 100644 index 134e0a3..0000000 --- a/voice_confirmation_client/core/paths.py +++ /dev/null @@ -1,47 +0,0 @@ -"""Resolve bundled helper binaries (ffplay/ffmpeg) next to the package or PyInstaller extract dir.""" - -from __future__ import annotations - -import sys -from pathlib import Path - - -def package_root() -> Path: - """Directory containing `voice_confirmation_client` package.""" - return Path(__file__).resolve().parent.parent - - -def frozen_base() -> Path | None: - """PyInstaller onefile/onedir: sys._MEIPASS or executable dir.""" - if getattr(sys, "frozen", False): - meipass = getattr(sys, "_MEIPASS", None) - if meipass: - return Path(meipass) - return Path(sys.executable).resolve().parent - return None - - -def bin_dir() -> Path: - """Optional `bin/` next to package (dev) or under _MEIPASS (frozen).""" - fb = frozen_base() - if fb is not None: - d = fb / "voice_confirmation_bin" - if d.is_dir(): - return d - return package_root() / "bin" - - -def find_ffplay() -> Path | None: - for name in ("ffplay", "ffplay.exe"): - p = bin_dir() / name - if p.is_file(): - return p - return None - - -def find_ffmpeg() -> Path | None: - for name in ("ffmpeg", "ffmpeg.exe"): - p = bin_dir() / name - if p.is_file(): - return p - return None diff --git a/voice_confirmation_client/core/playback.py b/voice_confirmation_client/core/playback.py deleted file mode 100644 index 1739db1..0000000 --- a/voice_confirmation_client/core/playback.py +++ /dev/null @@ -1,61 +0,0 @@ -"""Play MP3 bytes via system player or bundled ffplay.""" - -from __future__ import annotations - -import base64 -import os -import shutil -import subprocess -import sys -import tempfile -from pathlib import Path - -from voice_confirmation_client.core.paths import find_ffplay - - -def play_mp3_from_base64(b64: str) -> None: - raw_b64 = "".join((b64 or "").split()) - if not raw_b64: - raise ValueError("empty prompt_audio_mp3_base64") - data = base64.b64decode(raw_b64, validate=False) - with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f: - f.write(data) - tmp = f.name - try: - _play_mp3_path(Path(tmp)) - finally: - try: - os.unlink(tmp) - except OSError: - pass - - -def _play_mp3_path(path: Path) -> None: - bundled = find_ffplay() - if bundled and bundled.is_file(): - subprocess.run( - [str(bundled), "-nodisp", "-autoexit", "-loglevel", "quiet", str(path)], - check=True, - timeout=600, - ) - return - ffplay = shutil.which("ffplay") - if ffplay: - subprocess.run( - [ffplay, "-nodisp", "-autoexit", "-loglevel", "quiet", str(path)], - check=True, - timeout=600, - ) - return - if sys.platform == "darwin": - subprocess.run(["afplay", str(path)], check=True, timeout=600) - return - if os.name == "nt": - os.startfile(str(path)) # type: ignore[attr-defined] - import time - - time.sleep(5) - return - raise RuntimeError( - "No MP3 player found. Install ffmpeg (ffplay) or run on macOS with afplay." - ) diff --git a/voice_confirmation_client/core/record.py b/voice_confirmation_client/core/record.py deleted file mode 100644 index dc70a7a..0000000 --- a/voice_confirmation_client/core/record.py +++ /dev/null @@ -1,94 +0,0 @@ -"""Record microphone to 16 kHz mono WAV (sounddevice or ffmpeg).""" - -from __future__ import annotations - -import io -import subprocess -import sys -import tempfile -import wave -from pathlib import Path - -import numpy as np - -from voice_confirmation_client.core.paths import find_ffmpeg - - -def record_wav_16k_mono( - duration_sec: float, - *, - device: int | str | None = None, - prefer_ffmpeg: bool = False, - ffmpeg_input_args: list[str] | None = None, -) -> bytes: - """Return WAV file bytes (16-bit PCM, 16 kHz, mono).""" - if prefer_ffmpeg: - bundled = find_ffmpeg() - ffmpeg_bin = str(bundled) if bundled and bundled.is_file() else shutil_which_ffmpeg() - if ffmpeg_bin: - return _record_ffmpeg(ffmpeg_bin, duration_sec, ffmpeg_input_args) - return _record_sounddevice(duration_sec, device=device) - - -def shutil_which_ffmpeg() -> str | None: - import shutil - - return shutil.which("ffmpeg") - - -def _record_sounddevice(duration_sec: float, device: int | str | None) -> bytes: - import sounddevice as sd - - samplerate = 16000 - frames = int(duration_sec * samplerate) - kwargs: dict = {"samplerate": samplerate, "channels": 1, "dtype": "float32"} - if device is not None and device != "": - kwargs["device"] = device - recording = sd.rec(frames, **kwargs) - sd.wait() - mono = np.clip(recording.reshape(-1), -1.0, 1.0) - pcm = (mono * 32767.0).astype(np.int16) - buf = io.BytesIO() - with wave.open(buf, "wb") as wf: - wf.setnchannels(1) - wf.setsampwidth(2) - wf.setframerate(samplerate) - wf.writeframes(pcm.tobytes()) - return buf.getvalue() - - -def default_ffmpeg_input_args() -> list[str]: - if sys.platform == "darwin": - return ["-f", "avfoundation", "-i", ":0"] - if sys.platform == "win32": - return ["-f", "dshow", "-i", "audio=Microphone"] - return ["-f", "alsa", "-i", "default"] - - -def _record_ffmpeg( - ffmpeg_bin: str, duration_sec: float, ffmpeg_input_args: list[str] | None -) -> bytes: - input_args = ffmpeg_input_args if ffmpeg_input_args else default_ffmpeg_input_args() - with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp: - out = tmp.name - try: - cmd = [ - ffmpeg_bin, - "-y", - "-loglevel", - "error", - *input_args, - "-t", - str(duration_sec), - "-ar", - "16000", - "-ac", - "1", - "-sample_fmt", - "s16", - out, - ] - subprocess.run(cmd, check=True, timeout=int(duration_sec) + 45) - return Path(out).read_bytes() - finally: - Path(out).unlink(missing_ok=True) diff --git a/voice_confirmation_client/gui/__init__.py b/voice_confirmation_client/gui/__init__.py deleted file mode 100644 index 97c1277..0000000 --- a/voice_confirmation_client/gui/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""PySide6 desktop GUI.""" diff --git a/voice_confirmation_client/gui/main_window.py b/voice_confirmation_client/gui/main_window.py deleted file mode 100644 index f43bd02..0000000 --- a/voice_confirmation_client/gui/main_window.py +++ /dev/null @@ -1,364 +0,0 @@ -"""Main PySide6 window for the voice confirmation client.""" - -from __future__ import annotations - -import json -from typing import Any - -from PySide6.QtCore import Qt, QObject, QTimer, Signal -from PySide6.QtGui import QCloseEvent, QFont, QShowEvent -from PySide6.QtWidgets import ( - QCheckBox, - QComboBox, - QDoubleSpinBox, - QFormLayout, - QFrame, - QGroupBox, - QHBoxLayout, - QLabel, - QLineEdit, - QMainWindow, - QPushButton, - QPlainTextEdit, - QSplitter, - QVBoxLayout, - QWidget, -) - -from loguru import logger - -from voice_confirmation_client.core.assignment_listener import VoiceAssignmentListener -from voice_confirmation_client.core.machine_config import ( - http_base_url_from_config, - load_voice_client_config, - machine_config_file_path, - save_user_voice_client_config, - user_voice_client_config_path, - voice_terminal_id_from_config, -) -from voice_confirmation_client.core.monitor_worker import MonitorWorker -from voice_confirmation_client.logging_config import setup_voice_client_logging - -# 待确认接口仍为轮询;界面不再暴露,固定默认间隔。 -_DEFAULT_PENDING_POLL_INTERVAL_SEC = 5.0 - - -class _Bridge(QObject): - log_line = Signal(str) - state_text = Signal(str) - pending_payload = Signal(object) - voice_assign_start = Signal(str) - voice_assign_end = Signal(str) - - -class MainWindow(QMainWindow): - def __init__(self) -> None: - super().__init__() - self.setWindowTitle("手术室耗材语音确认客户端") - self.resize(920, 640) - self._assignment_listener: VoiceAssignmentListener | None = None - self._assigned_surgery_id: str = "" - self._rec_banner_pulse_phase: bool = False - self._rec_pulse_timer = QTimer(self) - self._rec_pulse_timer.setInterval(550) - self._rec_pulse_timer.timeout.connect(self._pulse_recording_banner) - - self._bridge = _Bridge() - self._bridge.pending_payload.connect(self._show_pending) - self._bridge.voice_assign_start.connect(self._on_server_assign_start) - self._bridge.voice_assign_end.connect(self._on_server_assign_end) - - _mc = load_voice_client_config() - - central = QWidget() - self.setCentralWidget(central) - root = QVBoxLayout(central) - - form_box = QGroupBox("连接") - form = QFormLayout(form_box) - self._base_url = QLineEdit() - self._record_sec = QDoubleSpinBox() - self._record_sec.setRange(2.0, 60.0) - self._record_sec.setValue(8.0) - self._record_sec.setSuffix(" s") - form.addRow("服务端 Base URL", self._base_url) - self._base_url.setText(http_base_url_from_config(_mc)) - self._terminal_id = QLineEdit(voice_terminal_id_from_config(_mc)) - self._terminal_id.setPlaceholderText("与 OR_SITE_CONFIG 中 voice_terminal_id 一致") - self._auto_assign = QCheckBox("启用服务端自动指派(开录后自动监控该手术)") - self._auto_assign.setChecked(True) - form.addRow("本机语音终端 ID", self._terminal_id) - form.addRow(self._auto_assign) - self._terminal_id.editingFinished.connect(self._on_connection_fields_edited) - self._base_url.editingFinished.connect(self._on_connection_fields_edited) - self._auto_assign.toggled.connect(self._on_auto_assign_toggled) - form.addRow("录音时长", self._record_sec) - self._record_sec.valueChanged.connect(lambda _: self._apply_settings_silent()) - root.addWidget(form_box) - - adv = QGroupBox("音频 / 调试") - adv_l = QFormLayout(adv) - self._device_combo = QComboBox() - self._device_combo.addItem("系统默认麦克风", None) - self._populate_input_devices() - self._prefer_ffmpeg = QCheckBox("优先使用 ffmpeg 录音(需本机 ffmpeg 且设备参数可用)") - self._hide_404 = QCheckBox("隐藏 404 轮询日志(推荐)") - self._hide_404.setChecked(True) - self._dry_run = QCheckBox("Dry-run:录音后不上传") - adv_l.addRow("输入设备", self._device_combo) - adv_l.addRow(self._prefer_ffmpeg) - adv_l.addRow(self._hide_404) - adv_l.addRow(self._dry_run) - root.addWidget(adv) - - btn_row = QHBoxLayout() - self._btn_stop = QPushButton("停止监控(本机)") - self._btn_stop.setEnabled(False) - self._btn_retry = QPushButton("重试本轮(播放+录音+上传)") - self._btn_replay = QPushButton("仅重播话术") - btn_row.addWidget(self._btn_stop) - btn_row.addWidget(self._btn_retry) - btn_row.addWidget(self._btn_replay) - btn_row.addStretch() - root.addLayout(btn_row) - - self._recording_banner = QFrame() - self._recording_banner.setObjectName("recordingBanner") - self._recording_banner.setVisible(False) - bl = QHBoxLayout(self._recording_banner) - bl.setContentsMargins(14, 12, 14, 12) - self._recording_banner_label = QLabel("● 正在录音 — 请对着麦克风清晰作答") - self._recording_banner_label.setAlignment(Qt.AlignmentFlag.AlignCenter) - bf = QFont() - bf.setPointSize(15) - bf.setBold(True) - self._recording_banner_label.setFont(bf) - self._recording_banner_label.setStyleSheet("color: #ffffff; background: transparent;") - bl.addWidget(self._recording_banner_label, stretch=1) - self._apply_recording_banner_style("#b71c1c") - root.addWidget(self._recording_banner) - - self._status_label = QLabel("待机") - root.addWidget(self._status_label) - self._bridge.state_text.connect(self._on_worker_state) - - split = QSplitter(Qt.Orientation.Horizontal) - self._pending_view = QPlainTextEdit() - self._pending_view.setReadOnly(True) - self._pending_view.setPlaceholderText("待确认内容将显示在这里…") - self._log = QPlainTextEdit() - self._log.setReadOnly(True) - self._log.setPlaceholderText("日志…") - split.addWidget(self._pending_view) - split.addWidget(self._log) - split.setSizes([360, 520]) - root.addWidget(split, stretch=1) - - self._bridge.log_line.connect(self._append_log_plain) - setup_voice_client_logging( - gui_line_sink=lambda m: self._bridge.log_line.emit(m.rstrip("\n")), - ) - logger.info( - "语音确认客户端已启动;本机终端 ID 须与 OR_SITE_CONFIG 中 voice_terminal_id 一致" - ) - _sys_cfg = machine_config_file_path() - if _sys_cfg.is_file(): - logger.info("系统级配置: {}", _sys_cfg) - _user_cfg = user_voice_client_config_path() - if _user_cfg.is_file(): - logger.info("用户级配置(界面保存): {}", _user_cfg) - - self._worker = MonitorWorker( - on_log=None, - on_state=lambda s: self._bridge.state_text.emit(s), - on_pending=lambda p: self._bridge.pending_payload.emit(p), - ) - self._worker.start_thread() - - self._btn_stop.clicked.connect(self._stop_monitoring) - self._btn_retry.clicked.connect(self._worker.retry_failed) - self._btn_replay.clicked.connect(self._worker.replay_prompt_only) - - self._apply_settings_silent() - - def showEvent(self, event: QShowEvent) -> None: - super().showEvent(event) - self._sync_assignment_listener() - - def _apply_recording_banner_style(self, bg_hex: str) -> None: - self._recording_banner.setStyleSheet( - f"QFrame#recordingBanner {{ background-color: {bg_hex}; border-radius: 8px; }}" - ) - - def _pulse_recording_banner(self) -> None: - self._rec_banner_pulse_phase = not self._rec_banner_pulse_phase - self._apply_recording_banner_style( - "#c62828" if self._rec_banner_pulse_phase else "#b71c1c" - ) - - def _set_recording_banner_active(self, active: bool) -> None: - if active: - self._recording_banner.setVisible(True) - self._rec_banner_pulse_phase = False - self._apply_recording_banner_style("#b71c1c") - if not self._rec_pulse_timer.isActive(): - self._rec_pulse_timer.start() - else: - self._rec_pulse_timer.stop() - self._recording_banner.setVisible(False) - self._apply_recording_banner_style("#b71c1c") - - def _on_worker_state(self, s: str) -> None: - self._status_label.setText(s) - self._set_recording_banner_active("录音中" in s) - - def _refresh_window_title(self) -> None: - base = "手术室耗材语音确认客户端" - if self._assigned_surgery_id: - self.setWindowTitle(f"{base} — 手术 {self._assigned_surgery_id}") - else: - self.setWindowTitle(base) - - def _on_auto_assign_toggled(self, _checked: bool) -> None: - if not self._auto_assign.isChecked(): - self._worker.set_monitoring(False) - self._assigned_surgery_id = "" - self._refresh_window_title() - self._btn_stop.setEnabled(False) - self._on_worker_state("已关闭自动指派") - self._apply_settings_silent() - self._sync_assignment_listener() - - def _on_connection_fields_edited(self) -> None: - try: - save_user_voice_client_config( - voice_terminal_id=self._terminal_id.text(), - http_base_url=self._base_url.text(), - ) - except OSError as exc: - logger.warning("无法保存用户级配置: {}", exc) - self._sync_assignment_listener() - - def _sync_assignment_listener(self) -> None: - if self._assignment_listener: - self._assignment_listener.stop() - self._assignment_listener = None - if not self._auto_assign.isChecked(): - logger.info("未勾选「启用服务端自动指派」,不连接 WebSocket") - self._worker.set_monitoring(False) - self._assigned_surgery_id = "" - self._refresh_window_title() - self._btn_stop.setEnabled(False) - self._apply_settings_silent() - return - tid = self._terminal_id.text().strip() - base = self._base_url.text().strip() - if not tid: - logger.warning( - "「本机语音终端 ID」为空,无法接收开录指派;请在每机配置文件或界面中填写(须与 OR_SITE_CONFIG 中 voice_terminal_id 一致)" - ) - return - if not base: - logger.warning("服务端 Base URL 为空,无法连接指派接口") - return - self._assignment_listener = VoiceAssignmentListener( - http_base_url=base, - terminal_id=tid, - on_start=lambda s: self._bridge.voice_assign_start.emit(s), - on_end=lambda s: self._bridge.voice_assign_end.emit(s), - ) - self._assignment_listener.start() - - def _on_server_assign_start(self, sid: str) -> None: - if len(sid) != 6 or not sid.isdigit(): - logger.warning("服务端指派无效手术号: {!r}(须为 6 位数字)", sid) - return - self._assigned_surgery_id = sid - self._refresh_window_title() - self._apply_settings_silent() - self._worker.set_monitoring(True) - self._btn_stop.setEnabled(True) - logger.info("服务端已指派手术 {},已自动开始监控(WebSocket 指派)", sid) - - def _on_server_assign_end(self, sid: str) -> None: - self._worker.set_monitoring(False) - self._assigned_surgery_id = "" - self._refresh_window_title() - self._btn_stop.setEnabled(False) - self._apply_settings_silent() - self._on_worker_state("已停止(服务端结束)") - logger.info("服务端已结束手术 {},已自动停止监控", sid) - - def _show_pending(self, payload: object) -> None: - if payload is None: - self._pending_view.clear() - return - if not isinstance(payload, dict): - self._pending_view.setPlainText(str(payload)) - return - try: - text = json.dumps(payload, ensure_ascii=False, indent=2) - except (TypeError, ValueError): - text = str(payload) - self._pending_view.setPlainText(text) - - def _populate_input_devices(self) -> None: - try: - import sounddevice as sd - except ImportError: - return - try: - devices = sd.query_devices() - hostapis = sd.query_hostapis() - except Exception: - return - for i, d in enumerate(devices): - if d.get("max_input_channels", 0) <= 0: - continue - ha = hostapis[d["hostapi"]]["name"] if d.get("hostapi") is not None else "" - label = f"{i}: {d.get('name', '')} ({ha})" - self._device_combo.addItem(label, i) - - def _apply_settings_silent(self) -> None: - dev_data = self._device_combo.currentData() - self._worker.set_settings( - base_url=self._base_url.text().strip(), - surgery_id=self._assigned_surgery_id, - interval_sec=_DEFAULT_PENDING_POLL_INTERVAL_SEC, - record_seconds=float(self._record_sec.value()), - dry_run=self._dry_run.isChecked(), - hide_404_logs=self._hide_404.isChecked(), - prefer_ffmpeg_record=self._prefer_ffmpeg.isChecked(), - sounddevice_device=dev_data, - ) - - def _stop_monitoring(self) -> None: - self._worker.set_monitoring(False) - self._assigned_surgery_id = "" - self._refresh_window_title() - self._btn_stop.setEnabled(False) - self._apply_settings_silent() - logger.info("—— 本地已停止监控;服务端结束手术或再次开录后将自动恢复指派 ——") - self._on_worker_state("已停止(本地)") - - def _append_log_plain(self, line: str) -> None: - """由 loguru GUI sink 写入,已含时间与级别,不再加前缀。""" - w = getattr(self, "_log", None) - if w is None: - return - w.appendPlainText(line) - sb = w.verticalScrollBar() - sb.setValue(sb.maximum()) - - def shutdown(self) -> None: - """停止后台线程;窗口关闭与 Ctrl+C(aboutToQuit)共用。""" - self._rec_pulse_timer.stop() - self._set_recording_banner_active(False) - if self._assignment_listener: - self._assignment_listener.stop() - self._assignment_listener = None - self._worker.stop_thread() - - def closeEvent(self, event: QCloseEvent) -> None: - self.shutdown() - event.accept() diff --git a/voice_confirmation_client/logging_config.py b/voice_confirmation_client/logging_config.py deleted file mode 100644 index 2026770..0000000 --- a/voice_confirmation_client/logging_config.py +++ /dev/null @@ -1,35 +0,0 @@ -"""桌面客户端 loguru:终端 + 可选 GUI 日志区。""" - -from __future__ import annotations - -import sys -from collections.abc import Callable - -from loguru import logger - - -def setup_voice_client_logging( - *, - gui_line_sink: Callable[[str], None] | None = None, - stderr_level: str = "INFO", - gui_level: str = "DEBUG", -) -> None: - """初始化 sink:彩色 stderr + 可选一行一条写入 Qt(由 sink 内 Signal 线程安全投递)。""" - logger.remove() - logger.add( - sys.stderr, - format=( - "{time:HH:mm:ss} | " - "{level:<8} | " - "{name}:{function} | " - "{message}" - ), - level=stderr_level, - colorize=sys.stderr.isatty(), - ) - if gui_line_sink: - logger.add( - gui_line_sink, - format="{time:HH:mm:ss.SSS} | {level:<8} | {message}", - level=gui_level, - ) diff --git a/voice_confirmation_client/resources/voice_client.sample.json b/voice_confirmation_client/resources/voice_client.sample.json deleted file mode 100644 index 51df23b..0000000 --- a/voice_confirmation_client/resources/voice_client.sample.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "voice_terminal_id": "desktop-1", - "http_base_url": "http://127.0.0.1:38080" -} diff --git a/web/voice-confirmation/README.md b/web/voice-confirmation/README.md new file mode 100644 index 0000000..2e01832 --- /dev/null +++ b/web/voice-confirmation/README.md @@ -0,0 +1,36 @@ +# 手术室耗材语音确认(浏览器端) + +与 FastAPI **分宿部署**的纯静态页面(HTML + JavaScript + [Tailwind CSS](https://tailwindcss.com) CDN):配置 **服务端 Base URL** 与 **本机语音终端 ID** 即可使用,协议与 [`docs/客户端手术通信接口说明.md`](../../docs/客户端手术通信接口说明.md) 一致(WebSocket 收 `voice_assignment` / `voice_pending` / `voice_pending_empty`,HTTP `POST .../resolve` 上传 WAV)。 + +## 本地预览 + +勿使用 `file://` 打开(麦克风与跨域行为异常)。在仓库根目录先执行 `uv sync --group dev`(**livereload** 在 dev 组中)。然后: + +```bash +chmod +x start_voice_confirmation_web.sh # 仅首次 +./start_voice_confirmation_web.sh # 默认 8080;编辑 HTML/JS 后浏览器自动刷新 +# 若不需要热重载(仅 stdlib 静态服务): +./start_voice_confirmation_web.sh --plain 8080 +``` + +Windows:`start_voice_confirmation_web.bat`;无热重载时首参 `--plain` 再跟端口。亦可: + +```bash +uv run --group dev python scripts/dev_static_livereload.py --root web/voice-confirmation -p 8080 +``` + +浏览器访问 `http://127.0.0.1:8080`(或所选端口),将 **Base URL** 指向监控 API(如 `http://127.0.0.1:38080`)。 + +## 生产部署 + +- 将本目录原样部署到任意静态托管(Nginx、对象存储、CDN 等)。 +- **HTTPS**:非 localhost 下浏览器通常要求安全上下文才允许麦克风;API 须使用 **https** 与 **wss**,避免混合内容被拦截。 +- **CORS**:后端需对浏览器 `fetch` 放行;默认可开启环境变量 **`DEMO_CORS_ENABLED=true`**,并将 **`DEMO_CORS_ORIGINS`** 设为静态页来源(或联调期 `*`,见 [`main.py`](../../main.py))。 + +## 与 `scripts/demo_client` 的关系 + +本目录与 [`scripts/demo_client/`](../../scripts/demo_client/) **相互独立**:Demo 含 HTTP 轮询待确认等联调能力;本页与官方语音确认语义一致,**不轮询** `GET .../pending-confirmation`。 + +## 推荐浏览器 + +录音与 `AudioContext` 行为以 **Chrome / Edge 新版** 为优先验证目标。 diff --git a/web/voice-confirmation/index.html b/web/voice-confirmation/index.html new file mode 100644 index 0000000..c125c9c --- /dev/null +++ b/web/voice-confirmation/index.html @@ -0,0 +1,259 @@ + + + + + + 手术室耗材 — 语音确认 + + + + + +
+
+

+ 语音确认 +

+

+ 独立网页客户端:仅 WebSocket 收队首、HTTP 上传答复。请通过 http(s) 服务打开本页(勿用 + file://)。生产环境请为静态页与 API 配置 HTTPS / WSS。 +

+
+ +
+
+

连接

+
+
+ + +
+
+ + +
+ +
+
+ + +
+ +
+
+
+ +
+

操作

+

+ 待机 +

+ +
+ + + +
+
+
+ +
+
+

排队序号(当前 FIFO)

+

+

累积序号(本场入队)

+

+

+ 排队:pending_queue_position / + pending_queue_length;累积:pending_cumulative_ordinal(均由服务端在 voice_pending 与 GET 中下发)。 +

+
+
+

服务端语音确认结果(最近一次 HTTP 响应)

+
+
+
+ +
+
+

队首待确认(JSON)

+

+        
+
+

日志

+

+        
+
+
+ + + + diff --git a/web/voice-confirmation/voice_app.js b/web/voice-confirmation/voice_app.js new file mode 100644 index 0000000..ed0de45 --- /dev/null +++ b/web/voice-confirmation/voice_app.js @@ -0,0 +1,952 @@ +/** + * 手术室语音确认 — 浏览器端(与桌面版历史 MonitorWorker 状态机行为对齐) + * 仅 WebSocket 收 voice_pending / voice_pending_empty;HTTP 仅 POST resolve。 + */ +const LS_BASE = "vc_http_base_url"; +const LS_TID = "vc_terminal_id"; +const LS_SEC = "vc_record_sec"; +const LS_AUTO = "vc_auto_assign"; +const LS_DRY = "vc_dry_run"; + +function sleep(ms) { + return new Promise((r) => setTimeout(r, ms)); +} + +function httpBaseToWsRoot(httpBase) { + let u; + try { + u = new URL(httpBase.trim()); + } catch { + return null; + } + const scheme = u.protocol === "https:" ? "wss:" : "ws:"; + return `${scheme}//${u.host}`; +} + +function buildVoiceWsUrl(httpBase, terminalId) { + const root = httpBaseToWsRoot(httpBase); + if (!root) return null; + const path = `/client/voice-terminals/ws?terminal_id=${encodeURIComponent(terminalId.trim())}`; + return root + path; +} + +// --- WAV --- +function resampleFloat32(input, inRate, outRate) { + if (inRate === outRate) return input; + const outLen = Math.max(1, Math.floor((input.length * outRate) / inRate)); + const out = new Float32Array(outLen); + for (let i = 0; i < outLen; i++) { + const pos = (i * inRate) / outRate; + const i0 = Math.floor(pos); + const f = pos - i0; + out[i] = (1 - f) * (input[i0] ?? 0) + f * (input[i0 + 1] ?? 0); + } + return out; +} + +function floatToPcmS16le(floats) { + const n = floats.length; + const buf = new Int16Array(n); + for (let i = 0; i < n; i++) { + const s = Math.max(-1, Math.min(1, floats[i])); + buf[i] = s < 0 ? s * 0x8000 : s * 0x7fff; + } + return buf; +} + +function pcmS16leToWav(pcm, sampleRate) { + const n = pcm.byteLength; + const ab = new ArrayBuffer(44 + n); + const dv = new DataView(ab); + const enc = (o, s) => { + for (let i = 0; i < s.length; i++) dv.setUint8(o + i, s.charCodeAt(i)); + }; + enc(0, "RIFF"); + dv.setUint32(4, 36 + n, true); + enc(8, "WAVE"); + enc(12, "fmt "); + dv.setUint32(16, 16, true); + dv.setUint16(20, 1, true); + dv.setUint16(22, 1, true); + dv.setUint32(24, sampleRate, true); + dv.setUint32(28, sampleRate * 2, true); + dv.setUint16(32, 2, true); + dv.setUint16(34, 16, true); + enc(36, "data"); + dv.setUint32(40, n, true); + new Uint8Array(ab, 44).set(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength)); + return new Uint8Array(ab); +} + +/** + * 从已有 MediaStream 采集(须先 getUserMedia)。必须 resume AudioContext,否则在多数浏览器 + * 中 ScriptProcessor 不跑,WAV 全为或接近静音。 + */ +async function recordWav16kFromStream(stream, durationSec) { + const targetRate = 16000; + const chunks = []; + let ac; + let source; + let proc; + try { + try { + ac = new AudioContext({ sampleRate: targetRate }); + } catch { + ac = new AudioContext(); + } + const inRate = ac.sampleRate; + source = ac.createMediaStreamSource(stream); + const bufferSize = 4096; + proc = ac.createScriptProcessor(bufferSize, 1, 1); + proc.onaudioprocess = (e) => { + const ch0 = e.inputBuffer.getChannelData(0); + chunks.push(new Float32Array(ch0)); + }; + source.connect(proc); + proc.connect(ac.destination); + if (ac.state === "suspended") { + await ac.resume(); + } + await sleep(Math.max(0, durationSec) * 1000); + let total = 0; + for (const c of chunks) total += c.length; + const merged = new Float32Array(total); + let o = 0; + for (const c of chunks) { + merged.set(c, o); + o += c.length; + } + const atTarget = resampleFloat32(merged, inRate, targetRate); + const pcm = floatToPcmS16le(atTarget); + return pcmS16leToWav(pcm, targetRate); + } finally { + try { + proc && proc.disconnect(); + } catch { + /* */ + } + try { + source && source.disconnect(); + } catch { + /* */ + } + try { + stream.getTracks().forEach((t) => t.stop()); + } catch { + /* */ + } + if (ac) { + try { + await ac.close(); + } catch { + /* */ + } + } + } +} + +/** + * 录制指定秒数 16kHz mono 16-bit PCM WAV(优先 AudioContext 16k,否则重采样) + */ +async function recordWav16kMono(durationSec) { + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + return await recordWav16kFromStream(stream, durationSec); +} + +function mp3BlobFromBase64(b64) { + const raw = (b64 || "").replace(/\s/g, ""); + if (!raw) throw new Error("empty prompt_audio_mp3_base64"); + const binary = atob(raw); + const u8 = new Uint8Array(binary.length); + for (let i = 0; i < binary.length; i++) u8[i] = binary.charCodeAt(i); + return new Blob([u8], { type: "audio/mpeg" }); +} + +function getMp3DurationSecFromObjectUrl(objectUrl) { + return new Promise((resolve, reject) => { + const probe = new Audio(); + probe.preload = "metadata"; + const fail = (msg) => { + try { + probe.onloadedmetadata = null; + probe.onerror = null; + probe.src = ""; + probe.remove?.(); + } catch { + /* */ + } + reject(new Error(msg)); + }; + probe.onloadedmetadata = () => { + const d = probe.duration; + try { + probe.onloadedmetadata = null; + probe.onerror = null; + probe.src = ""; + probe.remove?.(); + } catch { + /* */ + } + if (!Number.isFinite(d) || d <= 0) { + fail("invalid TTS duration"); + return; + } + resolve(d); + }; + probe.onerror = () => fail("TTS metadata load failed"); + probe.src = objectUrl; + }); +} + +/** + * 与 TTS 同步采音:先 await 麦克风与图,再并行 play 与采集。 + * 总采集时长 = TTS 音轨时长 + postAfterTtsSec(播报结束后再多采几秒)。 + * 元数据读失败时退化为先播后采(仅采 postAfterTtsSec 秒)。 + */ +async function playTtsParallelRecord(b64, postAfterTtsSec) { + const post = Math.max(0, Number(postAfterTtsSec) || 0); + const blob = mp3BlobFromBase64(b64); + const url = URL.createObjectURL(blob); + let ttsSec; + try { + ttsSec = await getMp3DurationSecFromObjectUrl(url); + } catch { + URL.revokeObjectURL(url); + /* 元数据失败时无法与 TTS 对齐总时长,退化为先播后采 */ + await playMp3FromBase64(b64); + return await recordWav16kMono(Math.max(2, post)); + } + const totalSec = Math.max(2, ttsSec + post); + let stream; + try { + stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + } catch (e) { + URL.revokeObjectURL(url); + throw e; + } + /* 先挂采集 Promise(getUserMedia 已在此 await);内部 await ac.resume 在 microtask。 + play() 用 setTimeout(0) 放到宏任务,避免在 resume 前执行,否则 onaudioprocess 不跑、WAV 全静音。 */ + const recP = recordWav16kFromStream(stream, totalSec); + const playP = new Promise((resolve, reject) => { + const audio = new Audio(); + audio.preload = "auto"; + audio.onended = () => { + URL.revokeObjectURL(url); + resolve(); + }; + audio.onerror = () => { + URL.revokeObjectURL(url); + reject(new Error("audio play failed")); + }; + audio.src = url; + setTimeout(() => { + audio.play().catch((err) => { + URL.revokeObjectURL(url); + reject(err); + }); + }, 0); + }); + const settled = await Promise.allSettled([playP, recP]); + if (settled[0].status === "rejected") throw settled[0].reason; + if (settled[1].status === "rejected") throw settled[1].reason; + return settled[1].value; +} + +function playMp3FromBase64(b64) { + let blob; + try { + blob = mp3BlobFromBase64(b64); + } catch (e) { + return Promise.reject(e); + } + const url = URL.createObjectURL(blob); + return new Promise((resolve, reject) => { + const audio = new Audio(); + audio.preload = "auto"; + audio.onended = () => { + URL.revokeObjectURL(url); + resolve(); + }; + audio.onerror = () => { + URL.revokeObjectURL(url); + reject(new Error("audio play failed")); + }; + audio.src = url; + audio.play().catch((e) => { + URL.revokeObjectURL(url); + reject(e); + }); + }); +} + +async function postResolve(baseUrl, surgeryId, confirmationId, wavBytes) { + const enc = encodeURIComponent(confirmationId); + const u = new URL( + `client/surgeries/${surgeryId}/pending-confirmation/${enc}/resolve`, + baseUrl.endsWith("/") ? baseUrl : baseUrl + "/" + ); + const form = new FormData(); + form.append("audio", new Blob([wavBytes], { type: "audio/wav" }), "voice.wav"); + const res = await fetch(u.toString(), { method: "POST", body: form }); + const text = await res.text(); + let body = text; + try { + body = text ? JSON.parse(text) : {}; + } catch { + /* keep string */ + } + return { status: res.status, body }; +} + +// --- 状态机(对齐 monitor_worker.MonitorWorker)--- +class VoiceMonitorEngine { + constructor(hooks) { + this.onLog = hooks.onLog; + this.onState = hooks.onState; + this.onPending = hooks.onPending; + this.onResolveResult = hooks.onResolveResult; + this.settings = { + base_url: "http://127.0.0.1:38080", + surgery_id: "", + record_seconds: 5, + dry_run: false, + }; + this._state = { + generation: 0, + busy: false, + spoken_cid: null, + failed_resolve_cid: null, + force_retry: false, + last_payload: null, + }; + this._stop = false; + this._monitoring = false; + this._wsEventQueue = []; + this._wakeResolvers = []; + } + + injectWsPending(payload) { + this._wsEventQueue.push(["pending", { ...payload }]); + this._wake(); + } + + injectWsPendingEmpty(surgeryId) { + this._wsEventQueue.push(["empty", (surgeryId || "").trim()]); + this._wake(); + } + + _drainWsEvents(surgeryId) { + const requeue = []; + let lastKind = "none"; + let lastBody = null; + for (const [kind, data] of this._wsEventQueue) { + if (kind === "pending") { + const row = { ...data }; + if (String(row.surgery_id || "") !== surgeryId) { + requeue.push([kind, data]); + continue; + } + delete row.type; + lastKind = "payload"; + lastBody = row; + } else if (kind === "empty") { + if (String(data) !== surgeryId) { + requeue.push([kind, data]); + continue; + } + lastKind = "empty"; + lastBody = null; + } + } + this._wsEventQueue = requeue; + return [lastKind, lastBody]; + } + + setSettings(updates) { + const oldSid = this.settings.surgery_id; + for (const [k, v] of Object.entries(updates)) { + if (k in this.settings) this.settings[k] = v; + } + const sidChanged = Object.prototype.hasOwnProperty.call(updates, "surgery_id") && this.settings.surgery_id !== oldSid; + this._state.generation += 1; + if (sidChanged) { + this._state.spoken_cid = null; + this._state.failed_resolve_cid = null; + this._state.last_payload = null; + this._state.force_retry = false; + this._emitPending(null); + } + this._wake(); + } + + setMonitoring(active) { + this._monitoring = !!active; + if (active) { + this._log("监控已开启 surgery_id=" + JSON.stringify(this.settings.surgery_id)); + } else { + this._log("监控已关闭"); + this._state.generation += 1; + } + this._wake(); + } + + stop() { + this._stop = true; + this._wake(); + } + + retryFailed() { + this._state.force_retry = true; + this._wake(); + } + + _log(msg) { + if (this.onLog) this.onLog(msg); + } + + _emitState(s) { + if (this.onState) this.onState(s); + } + + _onPending(p) { + if (this.onPending) this.onPending(p); + } + + _wake() { + const rs = this._wakeResolvers.splice(0); + rs.forEach((r) => r()); + } + + async _waitForWake() { + await new Promise((resolve) => { + this._wakeResolvers.push(resolve); + }); + } + + _emitPending(p) { + this._onPending(p); + } + + _emitResolveResult(payload) { + if (this.onResolveResult) this.onResolveResult(payload); + } + + async _replayPromptJob() { + const p = this._state.last_payload; + if (!p) { + this._log("没有可重播的待确认数据"); + return; + } + const b64 = p.prompt_audio_mp3_base64 || ""; + if (!b64) { + this._log("当前任务无 MP3 数据"); + return; + } + this._emitState("播放话术(手动重播)…"); + try { + await playMp3FromBase64(b64); + } catch (e) { + this._log("重播失败: " + e); + } finally { + this._emitState("待机"); + } + } + + replayPromptOnly() { + this._replayPromptJob(); + } + + async runLoop() { + while (!this._stop) { + if (!this._monitoring) { + await sleep(150); + continue; + } + const cfg = { ...this.settings }; + if (!/^\d{6}$/.test(cfg.surgery_id || "")) { + this._emitState("手术号无效(需 6 位数字)"); + await new Promise((r) => setTimeout(r, 1000)); + continue; + } + if (this._state.busy) { + await sleep(500); + continue; + } + const genBefore = this._state.generation; + const [wsKind, wsBody] = this._drainWsEvents(cfg.surgery_id); + + if (wsKind === "empty") { + if (this._state.generation !== genBefore) continue; + if (this._state.busy) continue; + this._state.last_payload = null; + this._state.spoken_cid = null; + this._state.failed_resolve_cid = null; + this._emitPending(null); + this._emitState("待机(无待确认)"); + await this._waitForWake(); + continue; + } + + let body = null; + if (wsKind === "payload" && wsBody) body = wsBody; + + if (body == null) { + const lp = this._state.last_payload; + const failed = this._state.failed_resolve_cid; + const force = this._state.force_retry; + const cidLp = typeof lp === "object" && lp ? String(lp.confirmation_id || "") : ""; + if (lp && typeof lp === "object" && force && failed === cidLp) { + body = { ...lp }; + } else if (lp && typeof lp === "object" && failed === cidLp && !force) { + this._emitPending(lp); + this._emitState("请重试录音或检查麦克风"); + await this._waitForWake(); + continue; + } else if (lp && typeof lp === "object" && failed == null && !force && this._state.spoken_cid === cidLp && cidLp) { + this._emitPending(lp); + this._emitState("待机(等待 WebSocket 推送)"); + await this._waitForWake(); + continue; + } + } + + if (body == null) { + this._emitState("待机(等待 WebSocket 推送)"); + await this._waitForWake(); + continue; + } + + if (this._state.generation !== genBefore) continue; + if (this._state.busy) continue; + + const cid = String(body.confirmation_id || ""); + if (!cid) { + await this._waitForWake(); + continue; + } + + this._state.last_payload = body; + let failed = this._state.failed_resolve_cid; + const force = this._state.force_retry; + const spoken = this._state.spoken_cid; + if (failed != null && failed !== cid) { + this._state.failed_resolve_cid = null; + this._state.force_retry = false; + failed = null; + } + if (failed === cid && !force) { + this._emitPending(body); + await this._waitForWake(); + continue; + } + if (spoken === cid && failed == null && !force) { + this._emitPending(body); + await this._waitForWake(); + continue; + } + this._state.force_retry = false; + this._state.busy = true; + this._state.spoken_cid = cid; + + const qn = body.pending_queue_length; + if (typeof qn === "number" && qn > 1) { + this._log("待确认队列共 " + qn + " 条(按 FIFO 队首依次处理)"); + } + this._emitPending(body); + + let genRun; + try { + genRun = this._state.generation; + await this._pipelinePlayRecordResolve(cfg, body, cid, genRun); + } finally { + this._state.busy = false; + } + } + } + + async _pipelinePlayRecordResolve(cfg, body, cid, genRun) { + let wav; + try { + this._emitState("TTS 播报中,同时录音中…"); + wav = await playTtsParallelRecord( + String(body.prompt_audio_mp3_base64 || ""), + cfg.record_seconds, + ); + } catch (e) { + this._log("播报/录音失败: " + e); + this._state.failed_resolve_cid = cid; + const msg = String(e || ""); + this._emitState( + msg.includes("empty") || msg.includes("empty prompt") + ? "无 TTS 音频(可重试)" + : "播放或录音失败(可重试)", + ); + return; + } + if (this._state.generation !== genRun) return; + if (cfg.dry_run) { + this._log("[dry-run] 已录音 " + wav.length + " 字节,跳过上传"); + this._state.failed_resolve_cid = null; + this._state.spoken_cid = null; + this._state.generation += 1; + this._emitState("待机(dry-run)"); + this._emitResolveResult({ + httpStatus: null, + surgery_id: cfg.surgery_id, + confirmation_id: cid, + body: { note: "未请求服务端(dry-run)", recorded_bytes: wav.length }, + }); + return; + } + try { + this._emitState("上传识别…"); + const { status: st, body: res } = await postResolve(cfg.base_url, cfg.surgery_id, cid, wav); + const baseMeta = { httpStatus: st, surgery_id: cfg.surgery_id, confirmation_id: cid }; + if (st === 200 && typeof res === "object" && res) { + const rstatus = res.status; + if (rstatus === "accepted") { + this._emitResolveResult({ ...baseMeta, body: res }); + this._log("已确认: " + (res.message || "") + " (resolved_label=" + JSON.stringify(res.resolved_label) + ")"); + this._state.failed_resolve_cid = null; + this._state.spoken_cid = null; + this._state.last_payload = null; + this._state.generation += 1; + this._emitPending(null); + this._emitState("待机"); + return; + } + if (rstatus === "failed") { + this._emitResolveResult({ ...baseMeta, body: res }); + const code = res.error_code || ""; + const msg = String(res.message || ""); + this._log("语音未通过(可重试)" + (code ? "[" + code + "] " : "") + msg); + this._state.failed_resolve_cid = cid; + this._emitState("请重试录音或检查麦克风"); + return; + } + } + if (st === 422 && typeof res === "object" && res && res.detail) { + const d = res.detail; + if (typeof d === "object" && d.code) { + const c = d.code; + if (c === "VOICE_ASR_FAILED" || c === "VOICE_TEXT_EMPTY" || c === "VOICE_PARSE_FAILED") { + this._emitResolveResult({ ...baseMeta, body: res }); + this._log("语音未通过(可重试,旧接口)[" + c + "]: " + (d.message || "")); + this._state.failed_resolve_cid = cid; + this._emitState("请重试录音或检查麦克风"); + return; + } + } + } + this._emitResolveResult({ ...baseMeta, body: res }); + this._log("resolve 未接受 HTTP " + st + ": " + JSON.stringify(res)); + this._state.failed_resolve_cid = cid; + this._emitState("解析/上传被拒(可重试)"); + } catch (e) { + this._emitResolveResult({ + httpStatus: null, + surgery_id: cfg.surgery_id, + confirmation_id: cid, + body: null, + error: String(e), + }); + this._log("POST resolve 失败: " + e); + this._state.failed_resolve_cid = cid; + this._emitState("上传失败(可重试)"); + } + } +} + +// --- 全局 wiring --- +let ws = null; +let wsManualClose = false; +let assignedSurgeryId = ""; + +function appendLog(msg) { + const el = document.getElementById("log"); + if (!el) return; + const t = new Date().toLocaleTimeString(); + el.textContent += (el.textContent ? "\n" : "") + `[${t}] ${msg}`; + el.scrollTop = el.scrollHeight; +} + +function setStatus(s) { + const el = document.getElementById("status"); + if (el) el.textContent = s; +} + +function updateQueueHint(p) { + const posEl = document.getElementById("queuePositionHint"); + const cumEl = document.getElementById("cumulativeHint"); + if (p == null) { + if (posEl) posEl.textContent = "无待确认"; + if (cumEl) cumEl.textContent = "无待确认"; + return; + } + const n = p.pending_queue_length; + const pos = p.pending_queue_position; + const cum = p.pending_cumulative_ordinal; + const nOk = typeof n === "number" && n >= 1; + const posOk = typeof pos === "number" && pos >= 1; + const cumOk = typeof cum === "number" && cum >= 1; + if (posEl) { + if (posOk && nOk) { + posEl.textContent = `第 ${pos} / 共 ${n} 条(FIFO 内排队)`; + } else if (nOk) { + posEl.textContent = `共 ${n} 条待确认(服务端未给 pending_queue_position 时仅显示条数)`; + } else { + posEl.textContent = "待确认"; + } + } + if (cumEl) { + cumEl.textContent = cumOk + ? `第 ${cum} 条(本场手术累计入队序号)` + : "待确认(服务端未返回 pending_cumulative_ordinal)"; + } +} + +function setResolveResultDisplay(payload) { + const el = document.getElementById("resolveResult"); + if (!el) return; + el.textContent = JSON.stringify(payload, null, 2); +} + +const engine = new VoiceMonitorEngine({ + onLog: (m) => appendLog(m), + onState: (s) => { + setStatus(s); + const banner = document.getElementById("recBanner"); + if (banner) { + const recording = String(s).includes("录音中"); + banner.classList.toggle("hidden", !recording); + banner.classList.toggle("rec-banner-active", recording); + } + }, + onPending: (p) => { + const el = document.getElementById("pendingJson"); + if (el) el.textContent = p == null ? "" : JSON.stringify(p, null, 2); + updateQueueHint(p); + }, + onResolveResult: (payload) => setResolveResultDisplay(payload), +}); + +function loadForm() { + const base = localStorage.getItem(LS_BASE) || "http://127.0.0.1:38080"; + const tid = localStorage.getItem(LS_TID) || ""; + const sec = localStorage.getItem(LS_SEC) || "5"; + const auto = localStorage.getItem(LS_AUTO) !== "0"; + const dry = localStorage.getItem(LS_DRY) === "1"; + const baseEl = document.getElementById("baseUrl"); + const tidEl = document.getElementById("terminalId"); + const secEl = document.getElementById("recordSec"); + const autoEl = document.getElementById("autoAssign"); + const dryEl = document.getElementById("dryRun"); + if (baseEl) baseEl.value = base; + if (tidEl) tidEl.value = tid; + if (secEl) secEl.value = sec; + if (autoEl) autoEl.checked = auto; + if (dryEl) dryEl.checked = dry; + applySettings(); +} + +function saveForm() { + const base = document.getElementById("baseUrl")?.value?.trim() || ""; + const tid = document.getElementById("terminalId")?.value?.trim() || ""; + const sec = document.getElementById("recordSec")?.value || "5"; + const auto = document.getElementById("autoAssign")?.checked ?? true; + const dry = document.getElementById("dryRun")?.checked ?? false; + localStorage.setItem(LS_BASE, base); + localStorage.setItem(LS_TID, tid); + localStorage.setItem(LS_SEC, sec); + localStorage.setItem(LS_AUTO, auto ? "1" : "0"); + localStorage.setItem(LS_DRY, dry ? "1" : "0"); +} + +function applySettings() { + saveForm(); + const base = document.getElementById("baseUrl")?.value?.trim() || ""; + const recordSec = parseFloat(document.getElementById("recordSec")?.value || "5") || 5; + const dry = document.getElementById("dryRun")?.checked ?? false; + engine.setSettings({ + base_url: base, + surgery_id: assignedSurgeryId, + record_seconds: recordSec, + dry_run: dry, + }); +} + +function connectWs() { + if (ws) { + wsManualClose = true; + try { + ws.close(); + } catch { + /* ignore */ + } + ws = null; + } + const auto = document.getElementById("autoAssign")?.checked ?? true; + if (!auto) { + engine.setMonitoring(false); + assignedSurgeryId = ""; + updateTitle(); + document.getElementById("btnStop")?.setAttribute("disabled", "disabled"); + return; + } + const base = document.getElementById("baseUrl")?.value?.trim() || ""; + const tid = document.getElementById("terminalId")?.value?.trim() || ""; + if (!tid || !base) { + appendLog("未配置 Base URL 或终端 ID,跳过 WebSocket"); + return; + } + const url = buildVoiceWsUrl(base, tid); + if (!url) { + appendLog("无效的 Base URL"); + return; + } + wsManualClose = false; + appendLog("正在连接 " + url); + try { + ws = new WebSocket(url); + } catch (e) { + appendLog("WebSocket 构造失败: " + e); + return; + } + ws.onopen = () => appendLog("WebSocket 已连接 terminal_id=" + JSON.stringify(tid)); + ws.onclose = (ev) => { + appendLog("WebSocket 断开 code=" + ev.code); + ws = null; + if (!wsManualClose && document.getElementById("autoAssign")?.checked) { + setTimeout(connectWs, 2000); + } + }; + ws.onerror = () => appendLog("WebSocket 错误"); + ws.onmessage = (ev) => { + let data; + try { + data = JSON.parse(ev.data); + } catch { + return; + } + if (!data || typeof data !== "object") return; + const t = data.type; + if (t === "voice_pending") { + engine.injectWsPending(data); + return; + } + if (t === "voice_pending_empty") { + engine.injectWsPendingEmpty(String(data.surgery_id || "")); + return; + } + if (t === "voice_assignment") { + const action = data.action; + const sid = String(data.surgery_id || ""); + if (action === "start") { + if (sid.length !== 6 || !/^\d{6}$/.test(sid)) { + appendLog("服务端指派无效手术号: " + JSON.stringify(sid)); + return; + } + assignedSurgeryId = sid; + updateTitle(); + applySettings(); + engine.setMonitoring(true); + const btn = document.getElementById("btnStop"); + if (btn) btn.removeAttribute("disabled"); + appendLog("服务端已指派手术 " + sid + ",已自动开始监控"); + return; + } + if (action === "end") { + engine.setMonitoring(false); + assignedSurgeryId = ""; + updateTitle(); + applySettings(); + setStatus("已停止(服务端结束)"); + const btn = document.getElementById("btnStop"); + if (btn) btn.setAttribute("disabled", "disabled"); + appendLog("服务端已结束手术 " + sid + ",已自动停止监控"); + } + } + }; +} + +function updateTitle() { + const t = document.getElementById("pageTitle"); + if (t) t.textContent = assignedSurgeryId ? "语音确认 — 手术 " + assignedSurgeryId : "语音确认"; +} + +function onAutoAssignChange() { + if (!document.getElementById("autoAssign")?.checked) { + engine.setMonitoring(false); + assignedSurgeryId = ""; + updateTitle(); + document.getElementById("btnStop")?.setAttribute("disabled", "disabled"); + applySettings(); + setStatus("已关闭自动指派"); + if (ws) { + wsManualClose = true; + try { + ws.close(); + } catch { + /* ignore */ + } + ws = null; + } + } else { + connectWs(); + } +} + +function stopMonitoringLocal() { + engine.setMonitoring(false); + assignedSurgeryId = ""; + updateTitle(); + document.getElementById("btnStop")?.setAttribute("disabled", "disabled"); + applySettings(); + appendLog("—— 本地已停止监控;服务端结束手术或再次开录后将自动恢复指派 ——"); + setStatus("已停止(本地)"); +} + +function init() { + loadForm(); + updateQueueHint(null); + engine + .runLoop() + .catch((e) => appendLog("runLoop: " + e)); + if (document.getElementById("autoAssign")?.checked) { + connectWs(); + } else { + applySettings(); + } + const base = document.getElementById("baseUrl"); + const tid = document.getElementById("terminalId"); + const rec = document.getElementById("recordSec"); + const auto = document.getElementById("autoAssign"); + [base, tid, rec].forEach((el) => { + if (el) { + el.addEventListener("change", () => { + saveForm(); + applySettings(); + if (auto?.checked) connectWs(); + }); + } + }); + if (auto) auto.addEventListener("change", onAutoAssignChange); + document.getElementById("dryRun")?.addEventListener("change", () => { + saveForm(); + applySettings(); + }); + document.getElementById("btnStop")?.addEventListener("click", stopMonitoringLocal); + document.getElementById("btnRetry")?.addEventListener("click", () => engine.retryFailed()); + document.getElementById("btnReplay")?.addEventListener("click", () => engine.replayPromptOnly()); + window.addEventListener("beforeunload", () => { + engine.stop(); + wsManualClose = true; + if (ws) try { ws.close(); } catch { /* */ } + }); +} + +if (document.readyState === "loading") { + document.addEventListener("DOMContentLoaded", init); +} else { + init(); +}