From 4c3f9a367b585c6b0f9d5cdd3d25b19c2972e473 Mon Sep 17 00:00:00 2001 From: Kevin Date: Mon, 27 Apr 2026 09:52:10 +0800 Subject: [PATCH] feat(voice-client): PySide6 desktop client and Windows build scripts Add voice_confirmation_client (poll, TTS MP3 playback, mic WAV resolve), PyInstaller spec, start/build helpers, and API unit tests. Pending manual testing: end-to-end on OR workstations and packaged exe. Made-with: Cursor --- build_voice_confirmation_client.bat | 30 ++ pyproject.toml | 14 + scripts/build_voice_client.py | 37 ++ start_voice_confirmation_client.bat | 5 + start_voice_confirmation_client.sh | 11 + tests/test_voice_confirmation_client_api.py | 48 +++ uv.lock | 182 +++++++++ voice_client.spec | 56 +++ voice_confirmation_client/README.md | 80 ++++ voice_confirmation_client/__init__.py | 3 + voice_confirmation_client/__main__.py | 20 + voice_confirmation_client/core/__init__.py | 3 + voice_confirmation_client/core/api.py | 87 +++++ .../core/monitor_worker.py | 347 ++++++++++++++++++ voice_confirmation_client/core/paths.py | 47 +++ voice_confirmation_client/core/playback.py | 61 +++ voice_confirmation_client/core/record.py | 94 +++++ voice_confirmation_client/gui/__init__.py | 1 + voice_confirmation_client/gui/main_window.py | 198 ++++++++++ 19 files changed, 1324 insertions(+) create mode 100644 build_voice_confirmation_client.bat create mode 100644 scripts/build_voice_client.py create mode 100644 start_voice_confirmation_client.bat create mode 100755 start_voice_confirmation_client.sh create mode 100644 tests/test_voice_confirmation_client_api.py create mode 100644 voice_client.spec create mode 100644 voice_confirmation_client/README.md create mode 100644 voice_confirmation_client/__init__.py create mode 100644 voice_confirmation_client/__main__.py create mode 100644 voice_confirmation_client/core/__init__.py create mode 100644 voice_confirmation_client/core/api.py create mode 100644 voice_confirmation_client/core/monitor_worker.py create mode 100644 voice_confirmation_client/core/paths.py create mode 100644 voice_confirmation_client/core/playback.py create mode 100644 voice_confirmation_client/core/record.py create mode 100644 voice_confirmation_client/gui/__init__.py create mode 100644 voice_confirmation_client/gui/main_window.py diff --git a/build_voice_confirmation_client.bat b/build_voice_confirmation_client.bat new file mode 100644 index 0000000..6195ad2 --- /dev/null +++ b/build_voice_confirmation_client.bat @@ -0,0 +1,30 @@ +@echo off +REM 在 Windows 上将语音确认客户端打成 PyInstaller 目录包(内含 .exe)。 +REM 需在仓库根目录双击运行,或在 cmd 中执行;首次会自动 uv sync。 +REM 可选:build_voice_confirmation_client.bat --clean (先清空 build、dist) + +setlocal EnableExtensions +cd /d "%~dp0" + +echo [1/2] uv sync --group voice-client-build +uv sync --group voice-client-build +if errorlevel 1 goto :failed + +echo [2/2] PyInstaller ^(voice_client.spec^) +if /i "%~1"=="--clean" ( + uv run --group voice-client-build python scripts\build_voice_client.py --clean +) else ( + uv run --group voice-client-build python scripts\build_voice_client.py +) +if errorlevel 1 goto :failed + +echo. +echo 完成。输出目录: %CD%\dist\voice-confirmation-client\ +echo 主程序: dist\voice-confirmation-client\voice-confirmation-client.exe +pause +exit /b 0 + +:failed +echo 构建失败(见上方日志)。 +pause +exit /b 1 diff --git a/pyproject.toml b/pyproject.toml index f21c499..e8cfa8b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ dependencies = [ [project.scripts] operation-room-monitor-server = "main:main" +voice-confirmation-client = "voice_confirmation_client.__main__:main" # Use PyTorch CPU wheels from the official index so: # - Linux Docker builds (incl. Docker Desktop on Mac) do not install NVIDIA CUDA pip bundles. @@ -48,6 +49,19 @@ dev = [ "aiosqlite>=0.21.0", "alembic>=1.14.0", ] +voice-client = [ + "httpx>=0.28.0", + "numpy>=2.0.0", + "PySide6>=6.8.0", + "sounddevice>=0.5.0", +] +voice-client-build = [ + "httpx>=0.28.0", + "numpy>=2.0.0", + "PySide6>=6.8.0", + "sounddevice>=0.5.0", + "pyinstaller>=6.0.0", +] [tool.pytest.ini_options] asyncio_mode = "auto" diff --git a/scripts/build_voice_client.py b/scripts/build_voice_client.py new file mode 100644 index 0000000..0344f09 --- /dev/null +++ b/scripts/build_voice_client.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 +"""Build the voice confirmation desktop client with PyInstaller (run on target OS).""" + +from __future__ import annotations + +import argparse +import shutil +import subprocess +import sys +from pathlib import Path + + +def main() -> None: + root = Path(__file__).resolve().parents[1] + spec = root / "voice_client.spec" + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--clean", + action="store_true", + help="Remove build/ and dist/ before building", + ) + args = parser.parse_args() + if args.clean: + for name in ("build", "dist"): + p = root / name + if p.is_dir(): + shutil.rmtree(p) + if not spec.is_file(): + print(f"Missing {spec}", file=sys.stderr) + sys.exit(1) + cmd = [sys.executable, "-m", "PyInstaller", str(spec), "--noconfirm"] + print("Running:", " ".join(cmd)) + raise SystemExit(subprocess.call(cmd, cwd=root)) + + +if __name__ == "__main__": + main() diff --git a/start_voice_confirmation_client.bat b/start_voice_confirmation_client.bat new file mode 100644 index 0000000..da89896 --- /dev/null +++ b/start_voice_confirmation_client.bat @@ -0,0 +1,5 @@ +@echo off +REM 启动手术室耗材语音确认桌面客户端。需已安装 uv 并完成 uv sync --group voice-client +setlocal +cd /d "%~dp0" +uv run --group voice-client python -m voice_confirmation_client %* diff --git a/start_voice_confirmation_client.sh b/start_voice_confirmation_client.sh new file mode 100755 index 0000000..4839769 --- /dev/null +++ b/start_voice_confirmation_client.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# 启动手术室耗材语音确认桌面客户端(PySide6)。 +# 依赖:本机已安装 uv,并已执行过 uv sync --group voice-client +# 用法:./start_voice_confirmation_client.sh + +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$ROOT" + +exec uv run --group voice-client python -m voice_confirmation_client "$@" diff --git a/tests/test_voice_confirmation_client_api.py b/tests/test_voice_confirmation_client_api.py new file mode 100644 index 0000000..a5b21eb --- /dev/null +++ b/tests/test_voice_confirmation_client_api.py @@ -0,0 +1,48 @@ +"""Core HTTP client tests (no PySide6).""" + +from __future__ import annotations + +import httpx +import pytest + +from voice_confirmation_client.core.api import ConfirmationApiClient + + +def test_post_resolve_url_encoding(monkeypatch: pytest.MonkeyPatch) -> None: + captured: dict = {} + + def handler(request: httpx.Request) -> httpx.Response: + captured["url"] = str(request.url) + return httpx.Response(200, json={"status": "accepted"}) + + transport = httpx.MockTransport(handler) + client = ConfirmationApiClient("http://example.test:8080") + client._client = httpx.Client(transport=transport) # noqa: SLF001 + + st, body = client.post_resolve("123456", "c/id+here", b"RIFF....", "voice.wav") + assert st == 200 + assert isinstance(body, dict) + assert body.get("status") == "accepted" + assert captured["url"].endswith( + "/client/surgeries/123456/pending-confirmation/c%2Fid%2Bhere/resolve" + ) + + client.close() + + +def test_parse_pending() -> None: + client = ConfirmationApiClient("http://localhost") + raw = { + "surgery_id": "123456", + "confirmation_id": "abc", + "prompt_text": "请确认", + "prompt_audio_mp3_base64": "AA", + "options": [{"label": "纱布", "confidence": 0.4}], + "model_top1_label": "x", + "model_top1_confidence": 0.41, + "created_at": "2026-01-01T00:00:00+00:00", + } + p = client.parse_pending(raw) + assert p.confirmation_id == "abc" + assert p.prompt_text == "请确认" + client.close() diff --git a/uv.lock b/uv.lock index a17f696..bc730da 100644 --- a/uv.lock +++ b/uv.lock @@ -33,6 +33,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d2/29/6533c317b74f707ea28f8d633734dbda2119bbadfc61b2f3640ba835d0f7/alembic-1.18.4-py3-none-any.whl", hash = "sha256:a5ed4adcf6d8a4cb575f3d759f071b03cd6e5c7618eb796cb52497be25bfe19a", size = 263893, upload-time = "2026-02-10T16:00:49.997Z" }, ] +[[package]] +name = "altgraph" +version = "0.17.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7e/f8/97fdf103f38fed6792a1601dbc16cc8aac56e7459a9fff08c812d8ae177a/altgraph-0.17.5.tar.gz", hash = "sha256:c87b395dd12fabde9c99573a9749d67da8d29ef9de0125c7f536699b4a9bc9e7", size = 48428, upload-time = "2025-11-21T20:35:50.583Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a9/ba/000a1996d4308bc65120167c21241a3b205464a2e0b58deda26ae8ac21d1/altgraph-0.17.5-py2.py3-none-any.whl", hash = "sha256:f3a22400bce1b0c701683820ac4f3b159cd301acab067c51c653e06961600597", size = 21228, upload-time = "2025-11-21T20:35:49.444Z" }, +] + [[package]] name = "annotated-doc" version = "0.0.4" @@ -641,6 +650,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0c/29/0348de65b8cc732daa3e33e67806420b2ae89bdce2b04af740289c5c6c8c/loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c", size = 61595, upload-time = "2024-12-06T11:20:54.538Z" }, ] +[[package]] +name = "macholib" +version = "1.16.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "altgraph", marker = "sys_platform == 'darwin'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/10/2f/97589876ea967487978071c9042518d28b958d87b17dceb7cdc1d881f963/macholib-1.16.4.tar.gz", hash = "sha256:f408c93ab2e995cd2c46e34fe328b130404be143469e41bc366c807448979362", size = 59427, upload-time = "2025-11-22T08:28:38.373Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/d1/a9f36f8ecdf0fb7c9b1e78c8d7af12b8c8754e74851ac7b94a8305540fc7/macholib-1.16.4-py2.py3-none-any.whl", hash = "sha256:da1a3fa8266e30f0ce7e97c6a54eefaae8edd1e5f86f3eb8b95457cae90265ea", size = 38117, upload-time = "2025-11-22T08:28:36.939Z" }, +] + [[package]] name = "mako" version = "1.3.11" @@ -877,6 +898,19 @@ dev = [ { name = "pytest" }, { name = "pytest-asyncio" }, ] +voice-client = [ + { name = "httpx" }, + { name = "numpy" }, + { name = "pyside6" }, + { name = "sounddevice" }, +] +voice-client-build = [ + { name = "httpx" }, + { name = "numpy" }, + { name = "pyinstaller" }, + { name = "pyside6" }, + { name = "sounddevice" }, +] [package.metadata] requires-dist = [ @@ -906,6 +940,19 @@ dev = [ { name = "pytest", specifier = ">=8.3.0" }, { name = "pytest-asyncio", specifier = ">=0.25.0" }, ] +voice-client = [ + { name = "httpx", specifier = ">=0.28.0" }, + { name = "numpy", specifier = ">=2.0.0" }, + { name = "pyside6", specifier = ">=6.8.0" }, + { name = "sounddevice", specifier = ">=0.5.0" }, +] +voice-client-build = [ + { name = "httpx", specifier = ">=0.28.0" }, + { name = "numpy", specifier = ">=2.0.0" }, + { name = "pyinstaller", specifier = ">=6.0.0" }, + { name = "pyside6", specifier = ">=6.8.0" }, + { name = "sounddevice", specifier = ">=0.5.0" }, +] [[package]] name = "packaging" @@ -916,6 +963,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7a/c2/920ef838e2f0028c8262f16101ec09ebd5969864e5a64c4c05fad0617c56/packaging-26.1-py3-none-any.whl", hash = "sha256:5d9c0669c6285e491e0ced2eee587eaf67b670d94a19e94e3984a481aba6802f", size = 95831, upload-time = "2026-04-14T21:12:47.56Z" }, ] +[[package]] +name = "pefile" +version = "2024.8.26" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/03/4f/2750f7f6f025a1507cd3b7218691671eecfd0bbebebe8b39aa0fe1d360b8/pefile-2024.8.26.tar.gz", hash = "sha256:3ff6c5d8b43e8c37bb6e6dd5085658d658a7a0bdcd20b6a07b1fcfc1c4e9d632", size = 76008, upload-time = "2024-08-26T20:58:38.155Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/16/12b82f791c7f50ddec566873d5bdd245baa1491bac11d15ffb98aecc8f8b/pefile-2024.8.26-py3-none-any.whl", hash = "sha256:76f8b485dcd3b1bb8166f1128d395fa3d87af26360c2358fb75b80019b957c6f", size = 74766, upload-time = "2024-08-26T21:01:02.632Z" }, +] + [[package]] name = "pillow" version = "12.2.0" @@ -1218,6 +1274,47 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f4/7e/a72dd26f3b0f4f2bf1dd8923c85f7ceb43172af56d63c7383eb62b332364/pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176", size = 1231151, upload-time = "2026-03-29T13:29:30.038Z" }, ] +[[package]] +name = "pyinstaller" +version = "6.20.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "altgraph" }, + { name = "macholib", marker = "sys_platform == 'darwin'" }, + { name = "packaging" }, + { name = "pefile", marker = "sys_platform == 'win32'" }, + { name = "pyinstaller-hooks-contrib" }, + { name = "pywin32-ctypes", marker = "sys_platform == 'win32'" }, + { name = "setuptools" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/46/60/d03d52e6690d4e9caf333dcd14550cde634ce6c118b3bc8fa3112c3186fd/pyinstaller-6.20.0.tar.gz", hash = "sha256:95c5c7e03d5d61e9dfb8ef259c699cf492bb1041beb6dbe83696608cec07347a", size = 4048728, upload-time = "2026-04-22T20:59:36.96Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d0/e4/e228d6d1bbb7fd62dc660a8fb202a583b023d3a3624ca95d1a9290ee4d6a/pyinstaller-6.20.0-py3-none-macosx_10_13_universal2.whl", hash = "sha256:bf3be4e1284ee78ddccba5e29f99443a12a7b4673168288ffc4c9d38c6f7b90e", size = 1047642, upload-time = "2026-04-22T20:58:32.006Z" }, + { url = "https://files.pythonhosted.org/packages/ce/bd/afb631bcb3f9040efebd4f6d067f0828b51710818f69fb41a2d4b7787f52/pyinstaller-6.20.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:72ae9c1fdea134afa791f58bdc9a1934d5c7609753c111e0026bfc272b32b712", size = 742494, upload-time = "2026-04-22T20:58:36.285Z" }, + { url = "https://files.pythonhosted.org/packages/76/08/0729a5bac14754150e5d83b39d87d842eb42b0bffcaa03dbad6252e23a39/pyinstaller-6.20.0-py3-none-manylinux2014_i686.whl", hash = "sha256:1031bcc307f3fbeffd4e162723e64d46dbf591c82dd0997413afb2a07328b941", size = 754191, upload-time = "2026-04-22T20:58:40.603Z" }, + { url = "https://files.pythonhosted.org/packages/e6/82/bc0ee4c7b97db1958eb651e0da9fb1e672e5ae53ca8867fd97701de52906/pyinstaller-6.20.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:8df3b3f347659fa2562d8d193a98ad4600133b8b8d07c268df89e4154376750e", size = 751902, upload-time = "2026-04-22T20:58:44.7Z" }, + { url = "https://files.pythonhosted.org/packages/3d/e7/770002d6aaa54173881cb2c49bb195ba67b97bf39bac1cdf320f28401629/pyinstaller-6.20.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:b0d3cc9dd8120d448459bd3880a12e2f9774c51443af49047801446377999a59", size = 748634, upload-time = "2026-04-22T20:58:48.579Z" }, + { url = "https://files.pythonhosted.org/packages/fe/db/68ba1fccb71278b2124fb90b37b7c8c0bc4c1173fba45b94466df3d9cb7f/pyinstaller-6.20.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:03696bb6350177c6bc23bcaf78e71a33c4a89b6754dd90d1be2f318e978c918b", size = 748490, upload-time = "2026-04-22T20:58:52.749Z" }, + { url = "https://files.pythonhosted.org/packages/03/0f/ac77ffa996a56be3d5c8f85734a007f8347240691657f9704e7de2527fa3/pyinstaller-6.20.0-py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:6357f1699f6af84f37e7367f031d4f68abdba65543b83990c9e8f5a4cebed0b7", size = 747650, upload-time = "2026-04-22T20:58:57.093Z" }, + { url = "https://files.pythonhosted.org/packages/e0/56/1ee91c3a2bc10ca1f36da10a6fd55ff7efc4dec367171eb25992a827874f/pyinstaller-6.20.0-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:0ab39c690abad26ba148e8f664f0478acc82a733997f4f22e757774832802da9", size = 747413, upload-time = "2026-04-22T20:59:01.174Z" }, + { url = "https://files.pythonhosted.org/packages/d7/55/ae264339996953c4cdf9d89d916a0a8fa26a83cf917a742fff8b9d5f3fe8/pyinstaller-6.20.0-py3-none-win32.whl", hash = "sha256:9a7637e8e44b4387b13667fdcaac86ab6b29c446c16d34d8401539b81838759c", size = 1331584, upload-time = "2026-04-22T20:59:07.201Z" }, + { url = "https://files.pythonhosted.org/packages/76/8c/300f57578882cce259bfb5ae56fda3b69caa3fe9df40a176c719920ea6e2/pyinstaller-6.20.0-py3-none-win_amd64.whl", hash = "sha256:d588844e890ee80c4365867f98146636e1849bbca8e4284bbf0c809aff0f161a", size = 1391851, upload-time = "2026-04-22T20:59:14.024Z" }, + { url = "https://files.pythonhosted.org/packages/8a/ea/b2f8e1642aecda78c0b75c7321f708e49e10bb3c00dd4f148c40761a1527/pyinstaller-6.20.0-py3-none-win_arm64.whl", hash = "sha256:bd53282c0a73e5c95573e1ddc8e5d564d4932bec91efbaed4dc5fdff9c2ae7f2", size = 1332259, upload-time = "2026-04-22T20:59:20.509Z" }, +] + +[[package]] +name = "pyinstaller-hooks-contrib" +version = "2026.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "packaging" }, + { name = "setuptools" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c7/fe/9278c29394bf69169febc21f96b4252c3ee7c8ec22c2fc545004bed47e71/pyinstaller_hooks_contrib-2026.4.tar.gz", hash = "sha256:766c281acb1ecc32e21c8c667056d7ebf5da0aabd5e30c219f9c2a283620eeaa", size = 173050, upload-time = "2026-03-31T14:10:51.188Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/f4/035fb8c06deff827f540a9a4ed9122c54e5376fca3e42eddf0c263730775/pyinstaller_hooks_contrib-2026.4-py3-none-any.whl", hash = "sha256:1de1a5e49a878122010b88c7e295502bc69776c157c4a4dc78741a4e6178b00f", size = 455496, upload-time = "2026-03-31T14:10:49.867Z" }, +] + [[package]] name = "pyparsing" version = "3.3.2" @@ -1227,6 +1324,54 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/10/bd/c038d7cc38edc1aa5bf91ab8068b63d4308c66c4c8bb3cbba7dfbc049f9c/pyparsing-3.3.2-py3-none-any.whl", hash = "sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d", size = 122781, upload-time = "2026-01-21T03:57:55.912Z" }, ] +[[package]] +name = "pyside6" +version = "6.11.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyside6-addons" }, + { name = "pyside6-essentials" }, + { name = "shiboken6" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/f4/95/f3f5a2799163b6658126d78a85bc1dec9eda88c75c26780556b26071a1d8/pyside6-6.11.0-cp310-abi3-macosx_13_0_universal2.whl", hash = "sha256:1f2735dc4f2bd4ec452ae50502c8a22128bba0aced35358a2bbc58384b820c6f", size = 571544, upload-time = "2026-03-23T12:47:20.263Z" }, + { url = "https://files.pythonhosted.org/packages/da/89/9a1f521051714e6694ebbe2b979ded279845ec8e25cb309ca3960158d74f/pyside6-6.11.0-cp310-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:c642e2d25704ca746fd37f56feacf25c5aecc4cd40bef23d18eec81f87d9dc00", size = 571725, upload-time = "2026-03-23T12:47:21.727Z" }, + { url = "https://files.pythonhosted.org/packages/c2/3d/f779d8bba00fcde31a7d7fb6b59347a70773c9cc8135592dea9972579877/pyside6-6.11.0-cp310-abi3-manylinux_2_39_aarch64.whl", hash = "sha256:267b344c73580ac938ca63c611881fb42a3922ebfe043e271005f4f06c372c4e", size = 571722, upload-time = "2026-03-23T12:47:22.761Z" }, + { url = "https://files.pythonhosted.org/packages/ac/98/150e01a026df3e9697310236821fa825319bb4b9d6137539cb25a3032968/pyside6-6.11.0-cp310-abi3-win_amd64.whl", hash = "sha256:9092cb002ca43c64006afb2e0d0f6f51aef17aa737c33a45e502326a081ddcbc", size = 577988, upload-time = "2026-03-23T12:47:23.795Z" }, + { url = "https://files.pythonhosted.org/packages/50/e7/55960f7c6b41d058e95cb4af02652c46c48702c506c8bbf12e99550e1fb3/pyside6-6.11.0-cp310-abi3-win_arm64.whl", hash = "sha256:b15f39acc2b8f46251a630acad0d97f9a0a0461f2baffcd66d7adfada8eb641e", size = 561372, upload-time = "2026-03-23T12:47:25.073Z" }, +] + +[[package]] +name = "pyside6-addons" +version = "6.11.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyside6-essentials" }, + { name = "shiboken6" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/c0/df/241f311c61a46b7b1195927da77b2537692ee3442aa9ccd87981164ff78d/pyside6_addons-6.11.0-cp310-abi3-macosx_13_0_universal2.whl", hash = "sha256:d5eaa4643302e3a0fa94c5766234bee4073d7d5ab9c2b7fd222692a176faf182", size = 331554157, upload-time = "2026-03-23T12:40:40.497Z" }, + { url = "https://files.pythonhosted.org/packages/31/b9/e81172835ccc9d8b9792cc6bf7524a252a0db9a76ddd693de230402697f9/pyside6_addons-6.11.0-cp310-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:ac6fe3d4ef4497dde3efc5e896b0acd53ff6c93be4bf485f045690f919419f35", size = 174948482, upload-time = "2026-03-23T12:41:05.379Z" }, + { url = "https://files.pythonhosted.org/packages/a8/a4/426d9333782bf65ab2a20257d6b4b3af9b8d5d7a710da719865fab49d492/pyside6_addons-6.11.0-cp310-abi3-manylinux_2_39_aarch64.whl", hash = "sha256:8ffb40222456078930816ebcac2f2511716d2acbc11716dd5acc5c365179a753", size = 170430798, upload-time = "2026-03-23T12:41:38.134Z" }, + { url = "https://files.pythonhosted.org/packages/35/9a/46d271fedfabad8c6dce2ebb69bb593745487ed33753a56a47c3ba4fdb1c/pyside6_addons-6.11.0-cp310-abi3-win_amd64.whl", hash = "sha256:413e6121c24f5ffdce376298059eddecff74aa6d638e94e0f6015b33d29b889e", size = 168723088, upload-time = "2026-03-23T12:42:00.668Z" }, + { url = "https://files.pythonhosted.org/packages/16/cd/1b28264f7dc9a642da2e4e7c02f67418d0949eb7ce329ae20869703c2630/pyside6_addons-6.11.0-cp310-abi3-win_arm64.whl", hash = "sha256:aaaee83385977a0fe134b2f4fbfb92b45a880d5b656e4d90a708eef10b1b6de8", size = 35698324, upload-time = "2026-03-23T12:42:13.748Z" }, +] + +[[package]] +name = "pyside6-essentials" +version = "6.11.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "shiboken6" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/d2/00/8a8583d3429c737cc20e61a43eba8ab1ec13ddb101e99802c2ffeedf3b41/pyside6_essentials-6.11.0-cp310-abi3-macosx_13_0_universal2.whl", hash = "sha256:85d6ca87ef35fa6565d385ede72ae48420dd3f63113929d10fc800f6b0360e01", size = 108085251, upload-time = "2026-03-23T12:42:52.872Z" }, + { url = "https://files.pythonhosted.org/packages/f3/a9/07c9e5c014b871c1b19caf8f994bcd50b345559b81f81671217b49559b67/pyside6_essentials-6.11.0-cp310-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:dc20e7afd5fc6fe51297db91cef997ce60844be578f7a49fc61b7ab9657a8849", size = 78316055, upload-time = "2026-03-23T12:43:04.19Z" }, + { url = "https://files.pythonhosted.org/packages/7c/35/f06b1b641d7600ec46374c16cd37c66fa4a22870326b4eb073a95471035f/pyside6_essentials-6.11.0-cp310-abi3-manylinux_2_39_aarch64.whl", hash = "sha256:4854cb0a1b061e7a576d8fb7bb7cf9f49540d558b1acb7df0742a7afefe61e4e", size = 77380821, upload-time = "2026-03-23T12:43:24.649Z" }, + { url = "https://files.pythonhosted.org/packages/ff/37/ba95c6262836d2b286b4e05a9d16a5e870995d5d2503ac6adc6312208049/pyside6_essentials-6.11.0-cp310-abi3-win_amd64.whl", hash = "sha256:3b3362882ad9389357a80504e600180006a957731fec05786fced7b038461fdf", size = 75793322, upload-time = "2026-03-23T12:43:35.575Z" }, + { url = "https://files.pythonhosted.org/packages/53/27/d17f25e45820e633a70e6109b35991eda09a5e8000c2a306f0ab7538d48c/pyside6_essentials-6.11.0-cp310-abi3-win_arm64.whl", hash = "sha256:81ca603dbf21bc39f89bb42db215c25ebe0c879a1a4c387625c321d2730ec187", size = 56337457, upload-time = "2026-03-23T12:43:43.573Z" }, +] + [[package]] name = "pytest" version = "9.0.3" @@ -1285,6 +1430,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9a/22/f1925cdda983ab66fc8ec6ec8014b959262747e58bdca26a4e3d1da29d56/python_multipart-0.0.26-py3-none-any.whl", hash = "sha256:c0b169f8c4484c13b0dcf2ef0ec3a4adb255c4b7d18d8e420477d2b1dd03f185", size = 28847, upload-time = "2026-04-10T14:09:58.131Z" }, ] +[[package]] +name = "pywin32-ctypes" +version = "0.2.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/85/9f/01a1a99704853cb63f253eea009390c88e7131c67e66a0a02099a8c917cb/pywin32-ctypes-0.2.3.tar.gz", hash = "sha256:d162dc04946d704503b2edc4d55f3dba5c1d539ead017afa00142c38b9885755", size = 29471, upload-time = "2024-08-14T10:15:34.626Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/de/3d/8161f7711c017e01ac9f008dfddd9410dff3674334c233bde66e7ba65bbf/pywin32_ctypes-0.2.3-py3-none-any.whl", hash = "sha256:8a1513379d709975552d202d942d9837758905c8d01eb82b8bcc30918929e7b8", size = 30756, upload-time = "2024-08-14T10:15:33.187Z" }, +] + [[package]] name = "pyyaml" version = "6.0.3" @@ -1409,6 +1563,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e1/e3/c164c88b2e5ce7b24d667b9bd83589cf4f3520d97cad01534cd3c4f55fdb/setuptools-81.0.0-py3-none-any.whl", hash = "sha256:fdd925d5c5d9f62e4b74b30d6dd7828ce236fd6ed998a08d81de62ce5a6310d6", size = 1062021, upload-time = "2026-02-06T21:10:37.175Z" }, ] +[[package]] +name = "shiboken6" +version = "6.11.0" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/82/1d/b56b7b694fbc871496435488d1f41c5068de546334850d722756511cef65/shiboken6-6.11.0-cp310-abi3-macosx_13_0_universal2.whl", hash = "sha256:d88e8a1eb705f2b9ad21db08a61ae1dc0c773e5cd86a069de0754c4cf1f9b43b", size = 476085, upload-time = "2026-03-23T12:47:05.724Z" }, + { url = "https://files.pythonhosted.org/packages/65/cb/4bb0c76011166230daa7c0074aeb3fdb3935c83ac1fef3789b85fcd1a8fc/shiboken6-6.11.0-cp310-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:ad54e64f8192ddbdff0c54ac82b89edcd62ed623f502ea21c960541d19514053", size = 271055, upload-time = "2026-03-23T12:47:07.349Z" }, + { url = "https://files.pythonhosted.org/packages/f5/96/771a6e2b530f725303d16d78a321fa4876b98b4f3615c9851880df8c1a43/shiboken6-6.11.0-cp310-abi3-manylinux_2_39_aarch64.whl", hash = "sha256:a10dc7718104ea2dc15d5b0b96909b77162ce1c76fcc6968e6df692b947a00e9", size = 267456, upload-time = "2026-03-23T12:47:08.689Z" }, + { url = "https://files.pythonhosted.org/packages/72/f7/44c0c42c3f5f29dec457fd46ea0552174bcb8aa75becf03bbd90308ba07b/shiboken6-6.11.0-cp310-abi3-win_amd64.whl", hash = "sha256:483ff78a73c7b3189ca924abc694318084f078bcfeaffa68e32024ff2d025ee1", size = 1222132, upload-time = "2026-03-23T12:47:10.143Z" }, + { url = "https://files.pythonhosted.org/packages/fb/99/6e5ee21db2d6af84bbbd7d871d441dafeb069c6de5667b1aa49891a77c66/shiboken6-6.11.0-cp310-abi3-win_arm64.whl", hash = "sha256:3bd76cf56105ab2d62ecaff630366f11264f69b88d488f10f048da9a065781f4", size = 1783186, upload-time = "2026-03-23T12:47:11.832Z" }, +] + [[package]] name = "six" version = "1.17.0" @@ -1418,6 +1584,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, ] +[[package]] +name = "sounddevice" +version = "0.5.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2a/f9/2592608737553638fca98e21e54bfec40bf577bb98a61b2770c912aab25e/sounddevice-0.5.5.tar.gz", hash = "sha256:22487b65198cb5bf2208755105b524f78ad173e5ab6b445bdab1c989f6698df3", size = 143191, upload-time = "2026-01-23T18:36:43.529Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/0a/478e441fd049002cf308520c0d62dd8333e7c6cc8d997f0dda07b9fbcc46/sounddevice-0.5.5-py3-none-any.whl", hash = "sha256:30ff99f6c107f49d25ad16a45cacd8d91c25a1bcdd3e81a206b921a3a6405b1f", size = 32807, upload-time = "2026-01-23T18:36:35.649Z" }, + { url = "https://files.pythonhosted.org/packages/56/f9/c037c35f6d0b6bc3bc7bfb314f1d6f1f9a341328ef47cd63fc4f850a7b27/sounddevice-0.5.5-py3-none-macosx_10_6_x86_64.macosx_10_6_universal2.whl", hash = "sha256:05eb9fd6c54c38d67741441c19164c0dae8ce80453af2d8c4ad2e7823d15b722", size = 108557, upload-time = "2026-01-23T18:36:37.41Z" }, + { url = "https://files.pythonhosted.org/packages/88/a1/d19dd9889cd4bce2e233c4fac007cd8daaf5b9fe6e6a5d432cf17be0b807/sounddevice-0.5.5-py3-none-win32.whl", hash = "sha256:1234cc9b4c9df97b6cbe748146ae0ec64dd7d6e44739e8e42eaa5b595313a103", size = 317765, upload-time = "2026-01-23T18:36:39.047Z" }, + { url = "https://files.pythonhosted.org/packages/c3/0e/002ed7c4c1c2ab69031f78989d3b789fee3a7fba9e586eb2b81688bf4961/sounddevice-0.5.5-py3-none-win_amd64.whl", hash = "sha256:cfc6b2c49fb7f555591c78cb8ecf48d6a637fd5b6e1db5fec6ed9365d64b3519", size = 365324, upload-time = "2026-01-23T18:36:40.496Z" }, + { url = "https://files.pythonhosted.org/packages/4e/39/a61d4b83a7746b70d23d9173be688c0c6bfc7173772344b7442c2c155497/sounddevice-0.5.5-py3-none-win_arm64.whl", hash = "sha256:3861901ddd8230d2e0e8ae62ac320cdd4c688d81df89da036dcb812f757bb3e6", size = 317115, upload-time = "2026-01-23T18:36:42.235Z" }, +] + [[package]] name = "sqlalchemy" version = "2.0.49" diff --git a/voice_client.spec b/voice_client.spec new file mode 100644 index 0000000..a57b99c --- /dev/null +++ b/voice_client.spec @@ -0,0 +1,56 @@ +# PyInstaller spec for the voice confirmation desktop client. +# Build on the target OS (Windows / macOS / Linux). +# Usage: uv run --group voice-client-build pyinstaller voice_client.spec + +from PyInstaller.utils.hooks import collect_all + +datas, binaries, hiddenimports = collect_all("PySide6") + +block_cipher = None + +a = Analysis( + ["voice_confirmation_client/__main__.py"], + pathex=[], + binaries=binaries, + datas=datas, + hiddenimports=hiddenimports + ["sounddevice", "numpy"], + hookspath=[], + hooksconfig={}, + runtime_hooks=[], + excludes=[], + win_no_prefer_redirects=False, + win_private_assemblies=False, + cipher=block_cipher, + noarchive=False, +) + +pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher) + +exe = EXE( + pyz, + a.scripts, + [], + exclude_binaries=True, + name="voice-confirmation-client", + debug=False, + bootloader_ignore_signals=False, + strip=False, + upx=False, + console=False, + disable_windowed_traceback=False, + argv_emulation=False, + target_arch=None, + codesign_identity=None, + entitlements_file=None, +) + +coll = COLLECT( + exe, + a.binaries, + a.zipfiles, + a.datas, + strip=False, + upx=False, + upx_exclude=[], + name="voice-confirmation-client", +) diff --git a/voice_confirmation_client/README.md b/voice_confirmation_client/README.md new file mode 100644 index 0000000..a319638 --- /dev/null +++ b/voice_confirmation_client/README.md @@ -0,0 +1,80 @@ +# 手术室耗材语音确认客户端(桌面版) + +独立桌面程序:按可配置间隔(默认 **5 秒**)轮询 `GET /client/surgeries/{surgery_id}/pending-confirmation`,播放服务端返回的 **MP3 话术**,录制医生麦克风为 **16 kHz 单声道 WAV**,并调用 `POST .../pending-confirmation/{confirmation_id}/resolve`(`multipart` 字段名 `audio`)。协议与 `[docs/客户端手术通信接口说明.md](../docs/客户端手术通信接口说明.md)` 一致。 + +## 环境 + +- Python **3.13+**(与主项目一致) +- 安装可选依赖组 `**voice-client`**(PySide6、httpx、numpy、sounddevice) + +```bash +cd /path/to/operation-room-monitor-server +uv sync --group voice-client +``` + +## 运行(开发态) + +未配置项目 `build-system` 时,`uv` 可能不会注册 `voice-confirmation-client` 命令,推荐: + +```bash +./start_voice_confirmation_client.sh +``` + +或在仓库根目录: + +```bash +uv run --group voice-client python -m voice_confirmation_client +``` + +Windows(仓库根目录): + +```bat +start_voice_confirmation_client.bat +``` + +若 entry point 已可用,也可: + +```bash +uv run --group voice-client voice-confirmation-client +``` + +在界面中填写 **服务端 Base URL**、**6 位手术号**,点击 **开始监控**。 + +## 音频说明 + +- **播放 MP3**:优先使用本机 `ffplay`(ffmpeg),其次 macOS 使用 `afplay`;可将 `ffplay` 放到 `voice_confirmation_client/bin/`(与包同级目录下的 `bin/`)以便离线环境使用。 +- **录音**:默认使用 **sounddevice** 录制并重采样为 16 kHz 单声道 WAV(与浏览器 Demo 一致)。可选勾选 **优先使用 ffmpeg 录音**(依赖本机 ffmpeg 及可用的设备参数;Windows 默认设备名可能需按现场调整,见 `voice_confirmation_client/core/record.py` 中 `default_ffmpeg_input_args`)。 + +## 打包(PyInstaller) + +在 **目标操作系统** 上构建(不要交叉编译 Qt 桌面程序)。 + +```bash +uv sync --group voice-client-build +uv run --group voice-client-build pyinstaller voice_client.spec --noconfirm +# 或 +uv run --group voice-client-build python scripts/build_voice_client.py +``` + +**Windows 一键打包(仓库根目录)**:双击或在 `cmd` 中执行 `build_voice_confirmation_client.bat`;需要干净构建时加参数 `--clean`(会先删除 `build/`、`dist/`)。 + +产物目录:`dist/voice-confirmation-client/`(目录分发,内含可执行文件)。Windows 下可执行文件为 `voice-confirmation-client.exe`。 + +**说明**: + +- 体积较大(含 PySide6);杀毒软件可能对 PyInstaller 打包的 exe 误报,可向医院 IT 申请加白。 +- **macOS**:未签名/未公证的 `.app` 可能需在「隐私与安全性」中手动允许;正式发布需 Apple 开发者签名与公证。 +- **可选**:将 `ffmpeg`/`ffplay` 二进制放入打包目录下的 `voice_confirmation_bin/`,程序会优先使用(需在 spec 中增加 `datas` 将该目录打入包内,或手动复制到分发目录)。 + +## 术间排查 + +1. **网络**:客户端机器能访问监控服务 HTTP/HTTPS 端口(默认文档为 `38080`)。 +2. **麦克风**:在「输入设备」中选择正确设备;无列表时检查系统隐私权限(麦克风)。 +3. **无待确认**:轮询返回 404 为常态;可关闭「隐藏 404 轮询日志」观察请求节奏。 +4. **解析失败**:使用 **重试本轮** 重新播放 + 录音 + 上传;或使用 **仅重播话术** 听清提示。 + +## 与浏览器 Demo 的差异 + +- 浏览器 Demo(`scripts/demo_client/`)默认 **10 秒** 轮询;本客户端默认 **5 秒**,可在界面修改。 +- 本客户端无「开始/结束手术」按钮;手术需由既有流程或他端调用 `POST /client/surgeries/start` 启动。 + diff --git a/voice_confirmation_client/__init__.py b/voice_confirmation_client/__init__.py new file mode 100644 index 0000000..bf0b1eb --- /dev/null +++ b/voice_confirmation_client/__init__.py @@ -0,0 +1,3 @@ +"""Desktop voice confirmation client for OR monitor API (pending-confirmation loop).""" + +__version__ = "0.1.0" diff --git a/voice_confirmation_client/__main__.py b/voice_confirmation_client/__main__.py new file mode 100644 index 0000000..0a133fc --- /dev/null +++ b/voice_confirmation_client/__main__.py @@ -0,0 +1,20 @@ +"""Entry: `python -m voice_confirmation_client` or `voice-confirmation-client`.""" + +from __future__ import annotations + +import sys + + +def main() -> None: + from PySide6.QtWidgets import QApplication + + from voice_confirmation_client.gui.main_window import MainWindow + + app = QApplication(sys.argv) + win = MainWindow() + win.show() + raise SystemExit(app.exec()) + + +if __name__ == "__main__": + main() diff --git a/voice_confirmation_client/core/__init__.py b/voice_confirmation_client/core/__init__.py new file mode 100644 index 0000000..a31dd22 --- /dev/null +++ b/voice_confirmation_client/core/__init__.py @@ -0,0 +1,3 @@ +from voice_confirmation_client.core.monitor_worker import MonitorWorker + +__all__ = ["MonitorWorker"] diff --git a/voice_confirmation_client/core/api.py b/voice_confirmation_client/core/api.py new file mode 100644 index 0000000..1906747 --- /dev/null +++ b/voice_confirmation_client/core/api.py @@ -0,0 +1,87 @@ +"""HTTP client for pending-confirmation and resolve endpoints.""" + +from __future__ import annotations + +import json +from dataclasses import dataclass +from typing import Any +from urllib.parse import quote, urljoin + +import httpx + + +@dataclass +class PendingConfirmationPayload: + surgery_id: str + confirmation_id: str + prompt_text: str + prompt_audio_mp3_base64: str + options: list[dict[str, Any]] + model_top1_label: str + model_top1_confidence: float + created_at: str + raw: dict[str, Any] + + +class ConfirmationApiClient: + def __init__(self, base_url: str, timeout: float = 60.0) -> None: + self._base = base_url.rstrip("/") + "/" + self._timeout = timeout + self._client = httpx.Client(timeout=timeout) + + @property + def base_url_normalized(self) -> str: + return self._base + + def close(self) -> None: + self._client.close() + + def _url(self, path: str) -> str: + return urljoin(self._base, path.lstrip("/")) + + def get_pending(self, surgery_id: str) -> tuple[int, dict[str, Any] | str]: + url = self._url(f"client/surgeries/{surgery_id}/pending-confirmation") + r = self._client.get(url) + text = r.text + if not text: + return r.status_code, {} + try: + body: dict[str, Any] | str = json.loads(text) + except json.JSONDecodeError: + body = text + return r.status_code, body + + def parse_pending(self, body: dict[str, Any]) -> PendingConfirmationPayload: + return PendingConfirmationPayload( + surgery_id=str(body.get("surgery_id", "")), + confirmation_id=str(body["confirmation_id"]), + prompt_text=str(body.get("prompt_text", "")), + prompt_audio_mp3_base64=str(body.get("prompt_audio_mp3_base64", "")), + options=list(body.get("options") or []), + model_top1_label=str(body.get("model_top1_label", "")), + model_top1_confidence=float(body.get("model_top1_confidence", 0.0)), + created_at=str(body.get("created_at", "")), + raw=body, + ) + + def post_resolve( + self, + surgery_id: str, + confirmation_id: str, + wav_bytes: bytes, + filename: str = "voice.wav", + ) -> tuple[int, dict[str, Any] | str]: + cid_enc = quote(confirmation_id, safe="") + url = self._url( + f"client/surgeries/{surgery_id}/pending-confirmation/{cid_enc}/resolve" + ) + files = {"audio": (filename, wav_bytes, "audio/wav")} + r = self._client.post(url, files=files) + text = r.text + if not text: + return r.status_code, {} + try: + body: dict[str, Any] | str = json.loads(text) + except json.JSONDecodeError: + body = text + return r.status_code, body diff --git a/voice_confirmation_client/core/monitor_worker.py b/voice_confirmation_client/core/monitor_worker.py new file mode 100644 index 0000000..0583a01 --- /dev/null +++ b/voice_confirmation_client/core/monitor_worker.py @@ -0,0 +1,347 @@ +"""Background polling + play + record + resolve (threaded, Qt-free).""" + +from __future__ import annotations + +import re +import threading +import time +from collections.abc import Callable +from dataclasses import dataclass, field +from typing import Any + +from voice_confirmation_client.core.api import ConfirmationApiClient +from voice_confirmation_client.core.playback import play_mp3_from_base64 +from voice_confirmation_client.core.record import record_wav_16k_mono + + +@dataclass +class MonitorSettings: + base_url: str = "http://127.0.0.1:38080" + surgery_id: str = "" + interval_sec: float = 5.0 + record_seconds: float = 8.0 + dry_run: bool = False + hide_404_logs: bool = True + prefer_ffmpeg_record: bool = False + sounddevice_device: int | str | None = None + + +@dataclass +class _MutableState: + generation: int = 0 + busy: bool = False + spoken_cid: str | None = None + failed_resolve_cid: str | None = None + force_retry: bool = False + last_payload: dict[str, Any] | None = None + + +class MonitorWorker: + """Polls pending-confirmation; on new item plays MP3, records WAV, POSTs resolve.""" + + def __init__( + self, + *, + on_log: Callable[[str], None] | None = None, + on_state: Callable[[str], None] | None = None, + on_pending: Callable[[dict[str, Any] | None], None] | None = None, + ) -> None: + self._on_log = on_log + self._on_state = on_state + self._on_pending = on_pending + self._settings = MonitorSettings() + self._settings_lock = threading.Lock() + self._state = _MutableState() + self._state_lock = threading.Lock() + self._stop = threading.Event() + self._wake = threading.Event() + self._monitoring = threading.Event() + self._thread: threading.Thread | None = None + self._api: ConfirmationApiClient | None = None + self._api_base: str | None = None + self._api_lock = threading.Lock() + + def set_settings(self, **kwargs: Any) -> None: + with self._settings_lock: + old_sid = self._settings.surgery_id + for k, v in kwargs.items(): + if hasattr(self._settings, k): + setattr(self._settings, k, v) + sid_changed = ( + "surgery_id" in kwargs and self._settings.surgery_id != old_sid + ) + with self._state_lock: + self._state.generation += 1 + if sid_changed: + self._state.spoken_cid = None + self._state.failed_resolve_cid = None + self._state.last_payload = None + self._state.force_retry = False + self._emit_pending(None) + + def start_thread(self) -> None: + if self._thread and self._thread.is_alive(): + return + self._stop.clear() + self._thread = threading.Thread(target=self._run, name="VoiceMonitor", daemon=True) + self._thread.start() + + def stop_thread(self) -> None: + self._stop.set() + self._wake.set() + if self._thread: + self._thread.join(timeout=8.0) + self._thread = None + with self._api_lock: + if self._api: + self._api.close() + self._api = None + self._api_base = None + + def set_monitoring(self, active: bool) -> None: + if active: + self._monitoring.set() + self._wake.set() + else: + self._monitoring.clear() + with self._state_lock: + self._state.generation += 1 + + def retry_failed(self) -> None: + with self._state_lock: + self._state.force_retry = True + self._wake.set() + + def replay_prompt_only(self) -> None: + """Play last pending MP3 again (GUI button); no record/upload.""" + threading.Thread(target=self._replay_prompt_job, name="ReplayPrompt", daemon=True).start() + + def _replay_prompt_job(self) -> None: + with self._state_lock: + payload = self._state.last_payload + if not payload: + self._log("没有可重播的待确认数据") + return + b64 = payload.get("prompt_audio_mp3_base64") or "" + if not b64: + self._log("当前任务无 MP3 数据") + return + self._emit_state("播放话术(手动重播)…") + try: + play_mp3_from_base64(str(b64)) + except Exception as e: + self._log(f"重播失败: {e}") + finally: + self._emit_state("待机") + + def _log(self, msg: str) -> None: + if self._on_log: + self._on_log(msg) + + def _emit_state(self, s: str) -> None: + if self._on_state: + self._on_state(s) + + def _emit_pending(self, p: dict[str, Any] | None) -> None: + if self._on_pending: + self._on_pending(p) + + def _get_api(self, base_url: str) -> ConfirmationApiClient: + norm = base_url.rstrip("/") + "/" + with self._api_lock: + if self._api is None or self._api_base != norm: + if self._api: + self._api.close() + self._api = ConfirmationApiClient(base_url) + self._api_base = norm + return self._api + + def _run(self) -> None: + while not self._stop.is_set(): + if not self._monitoring.is_set(): + time.sleep(0.15) + continue + + with self._settings_lock: + cfg = MonitorSettings( + base_url=self._settings.base_url, + surgery_id=self._settings.surgery_id, + interval_sec=self._settings.interval_sec, + record_seconds=self._settings.record_seconds, + dry_run=self._settings.dry_run, + hide_404_logs=self._settings.hide_404_logs, + prefer_ffmpeg_record=self._settings.prefer_ffmpeg_record, + sounddevice_device=self._settings.sounddevice_device, + ) + + if not re.fullmatch(r"\d{6}", cfg.surgery_id or ""): + self._emit_state("手术号无效(需 6 位数字)") + self._wake.wait(timeout=1.0) + self._wake.clear() + continue + + api = self._get_api(cfg.base_url) + + with self._state_lock: + if self._state.busy: + self._wake.wait(timeout=0.5) + self._wake.clear() + continue + gen_before = self._state.generation + + try: + status, body = api.get_pending(cfg.surgery_id) + except Exception as e: + self._log(f"GET pending 失败: {e}") + self._wait_interval(cfg.interval_sec) + continue + + with self._state_lock: + if self._state.generation != gen_before: + continue + if self._state.busy: + continue + + if status == 404: + with self._state_lock: + self._state.last_payload = None + self._state.spoken_cid = None + self._state.failed_resolve_cid = None + self._emit_pending(None) + if not cfg.hide_404_logs: + self._log("暂无待确认") + self._emit_state("轮询中(无待确认)") + self._wait_interval(cfg.interval_sec) + continue + + if status != 200 or not isinstance(body, dict): + self._log(f"GET pending 异常 HTTP {status}: {body}") + self._wait_interval(cfg.interval_sec) + continue + + cid = str(body.get("confirmation_id") or "") + if not cid: + self._wait_interval(cfg.interval_sec) + continue + + with self._state_lock: + self._state.last_payload = body + failed = self._state.failed_resolve_cid + force = self._state.force_retry + spoken = self._state.spoken_cid + + if failed is not None and failed != cid: + self._state.failed_resolve_cid = None + self._state.force_retry = False + failed = None + + if failed == cid and not force: + self._emit_pending(body) + self._wait_interval(cfg.interval_sec) + continue + + if spoken == cid and failed is None and not force: + # Already completed pipeline for this cid without failure; server still returns same id? + self._emit_pending(body) + self._wait_interval(cfg.interval_sec) + continue + + self._state.force_retry = False + self._state.busy = True + self._state.spoken_cid = cid + + self._emit_pending(body) + + try: + self._pipeline_play_record_resolve(cfg, api, body, cid) + finally: + with self._state_lock: + self._state.busy = False + + self._wake.clear() + self._wait_interval(cfg.interval_sec) + + def _wait_interval(self, interval_sec: float) -> None: + self._wake.wait(timeout=max(0.5, interval_sec)) + self._wake.clear() + + def _pipeline_play_record_resolve( + self, + cfg: MonitorSettings, + api: ConfirmationApiClient, + body: dict[str, Any], + cid: str, + ) -> None: + gen_lock = self._state_lock + with gen_lock: + gen_run = self._state.generation + + try: + self._emit_state("播放话术…") + play_mp3_from_base64(str(body.get("prompt_audio_mp3_base64") or "")) + except Exception as e: + self._log(f"播放失败: {e}") + with gen_lock: + self._state.failed_resolve_cid = cid + self._emit_state("播放失败(可重试)") + return + + with gen_lock: + if self._state.generation != gen_run: + return + + try: + self._emit_state("录音中…") + wav = record_wav_16k_mono( + cfg.record_seconds, + device=cfg.sounddevice_device, + prefer_ffmpeg=cfg.prefer_ffmpeg_record, + ) + except Exception as e: + self._log(f"录音失败: {e}") + with gen_lock: + self._state.failed_resolve_cid = cid + self._emit_state("录音失败(可重试)") + return + + with gen_lock: + if self._state.generation != gen_run: + return + + if cfg.dry_run: + self._log(f"[dry-run] 已录音 {len(wav)} 字节,跳过上传") + with gen_lock: + self._state.failed_resolve_cid = None + self._state.spoken_cid = None + self._state.generation += 1 + self._emit_state("待机(dry-run)") + return + + try: + self._emit_state("上传识别…") + st, res = api.post_resolve(cfg.surgery_id, cid, wav) + except Exception as e: + self._log(f"POST resolve 失败: {e}") + with gen_lock: + self._state.failed_resolve_cid = cid + self._emit_state("上传失败(可重试)") + return + + if st == 200 and isinstance(res, dict) and res.get("status") == "accepted": + self._log( + f"已确认: {res.get('message', '')} " + f"(resolved_label={res.get('resolved_label')!r})" + ) + with gen_lock: + self._state.failed_resolve_cid = None + self._state.spoken_cid = None + self._state.last_payload = None + self._state.generation += 1 + self._emit_pending(None) + self._emit_state("待机") + return + + self._log(f"resolve 未接受 HTTP {st}: {res}") + with gen_lock: + self._state.failed_resolve_cid = cid + self._emit_state("解析/上传被拒(可重试)") diff --git a/voice_confirmation_client/core/paths.py b/voice_confirmation_client/core/paths.py new file mode 100644 index 0000000..134e0a3 --- /dev/null +++ b/voice_confirmation_client/core/paths.py @@ -0,0 +1,47 @@ +"""Resolve bundled helper binaries (ffplay/ffmpeg) next to the package or PyInstaller extract dir.""" + +from __future__ import annotations + +import sys +from pathlib import Path + + +def package_root() -> Path: + """Directory containing `voice_confirmation_client` package.""" + return Path(__file__).resolve().parent.parent + + +def frozen_base() -> Path | None: + """PyInstaller onefile/onedir: sys._MEIPASS or executable dir.""" + if getattr(sys, "frozen", False): + meipass = getattr(sys, "_MEIPASS", None) + if meipass: + return Path(meipass) + return Path(sys.executable).resolve().parent + return None + + +def bin_dir() -> Path: + """Optional `bin/` next to package (dev) or under _MEIPASS (frozen).""" + fb = frozen_base() + if fb is not None: + d = fb / "voice_confirmation_bin" + if d.is_dir(): + return d + return package_root() / "bin" + + +def find_ffplay() -> Path | None: + for name in ("ffplay", "ffplay.exe"): + p = bin_dir() / name + if p.is_file(): + return p + return None + + +def find_ffmpeg() -> Path | None: + for name in ("ffmpeg", "ffmpeg.exe"): + p = bin_dir() / name + if p.is_file(): + return p + return None diff --git a/voice_confirmation_client/core/playback.py b/voice_confirmation_client/core/playback.py new file mode 100644 index 0000000..1739db1 --- /dev/null +++ b/voice_confirmation_client/core/playback.py @@ -0,0 +1,61 @@ +"""Play MP3 bytes via system player or bundled ffplay.""" + +from __future__ import annotations + +import base64 +import os +import shutil +import subprocess +import sys +import tempfile +from pathlib import Path + +from voice_confirmation_client.core.paths import find_ffplay + + +def play_mp3_from_base64(b64: str) -> None: + raw_b64 = "".join((b64 or "").split()) + if not raw_b64: + raise ValueError("empty prompt_audio_mp3_base64") + data = base64.b64decode(raw_b64, validate=False) + with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f: + f.write(data) + tmp = f.name + try: + _play_mp3_path(Path(tmp)) + finally: + try: + os.unlink(tmp) + except OSError: + pass + + +def _play_mp3_path(path: Path) -> None: + bundled = find_ffplay() + if bundled and bundled.is_file(): + subprocess.run( + [str(bundled), "-nodisp", "-autoexit", "-loglevel", "quiet", str(path)], + check=True, + timeout=600, + ) + return + ffplay = shutil.which("ffplay") + if ffplay: + subprocess.run( + [ffplay, "-nodisp", "-autoexit", "-loglevel", "quiet", str(path)], + check=True, + timeout=600, + ) + return + if sys.platform == "darwin": + subprocess.run(["afplay", str(path)], check=True, timeout=600) + return + if os.name == "nt": + os.startfile(str(path)) # type: ignore[attr-defined] + import time + + time.sleep(5) + return + raise RuntimeError( + "No MP3 player found. Install ffmpeg (ffplay) or run on macOS with afplay." + ) diff --git a/voice_confirmation_client/core/record.py b/voice_confirmation_client/core/record.py new file mode 100644 index 0000000..dc70a7a --- /dev/null +++ b/voice_confirmation_client/core/record.py @@ -0,0 +1,94 @@ +"""Record microphone to 16 kHz mono WAV (sounddevice or ffmpeg).""" + +from __future__ import annotations + +import io +import subprocess +import sys +import tempfile +import wave +from pathlib import Path + +import numpy as np + +from voice_confirmation_client.core.paths import find_ffmpeg + + +def record_wav_16k_mono( + duration_sec: float, + *, + device: int | str | None = None, + prefer_ffmpeg: bool = False, + ffmpeg_input_args: list[str] | None = None, +) -> bytes: + """Return WAV file bytes (16-bit PCM, 16 kHz, mono).""" + if prefer_ffmpeg: + bundled = find_ffmpeg() + ffmpeg_bin = str(bundled) if bundled and bundled.is_file() else shutil_which_ffmpeg() + if ffmpeg_bin: + return _record_ffmpeg(ffmpeg_bin, duration_sec, ffmpeg_input_args) + return _record_sounddevice(duration_sec, device=device) + + +def shutil_which_ffmpeg() -> str | None: + import shutil + + return shutil.which("ffmpeg") + + +def _record_sounddevice(duration_sec: float, device: int | str | None) -> bytes: + import sounddevice as sd + + samplerate = 16000 + frames = int(duration_sec * samplerate) + kwargs: dict = {"samplerate": samplerate, "channels": 1, "dtype": "float32"} + if device is not None and device != "": + kwargs["device"] = device + recording = sd.rec(frames, **kwargs) + sd.wait() + mono = np.clip(recording.reshape(-1), -1.0, 1.0) + pcm = (mono * 32767.0).astype(np.int16) + buf = io.BytesIO() + with wave.open(buf, "wb") as wf: + wf.setnchannels(1) + wf.setsampwidth(2) + wf.setframerate(samplerate) + wf.writeframes(pcm.tobytes()) + return buf.getvalue() + + +def default_ffmpeg_input_args() -> list[str]: + if sys.platform == "darwin": + return ["-f", "avfoundation", "-i", ":0"] + if sys.platform == "win32": + return ["-f", "dshow", "-i", "audio=Microphone"] + return ["-f", "alsa", "-i", "default"] + + +def _record_ffmpeg( + ffmpeg_bin: str, duration_sec: float, ffmpeg_input_args: list[str] | None +) -> bytes: + input_args = ffmpeg_input_args if ffmpeg_input_args else default_ffmpeg_input_args() + with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp: + out = tmp.name + try: + cmd = [ + ffmpeg_bin, + "-y", + "-loglevel", + "error", + *input_args, + "-t", + str(duration_sec), + "-ar", + "16000", + "-ac", + "1", + "-sample_fmt", + "s16", + out, + ] + subprocess.run(cmd, check=True, timeout=int(duration_sec) + 45) + return Path(out).read_bytes() + finally: + Path(out).unlink(missing_ok=True) diff --git a/voice_confirmation_client/gui/__init__.py b/voice_confirmation_client/gui/__init__.py new file mode 100644 index 0000000..97c1277 --- /dev/null +++ b/voice_confirmation_client/gui/__init__.py @@ -0,0 +1 @@ +"""PySide6 desktop GUI.""" diff --git a/voice_confirmation_client/gui/main_window.py b/voice_confirmation_client/gui/main_window.py new file mode 100644 index 0000000..b9392ec --- /dev/null +++ b/voice_confirmation_client/gui/main_window.py @@ -0,0 +1,198 @@ +"""Main PySide6 window for the voice confirmation client.""" + +from __future__ import annotations + +import json +from datetime import datetime +from typing import Any + +from PySide6.QtCore import Qt, Signal, QObject +from PySide6.QtGui import QCloseEvent +from PySide6.QtWidgets import ( + QCheckBox, + QComboBox, + QDoubleSpinBox, + QFormLayout, + QGroupBox, + QHBoxLayout, + QLabel, + QLineEdit, + QMainWindow, + QMessageBox, + QPushButton, + QPlainTextEdit, + QSplitter, + QVBoxLayout, + QWidget, +) + +from voice_confirmation_client.core.monitor_worker import MonitorWorker + + +class _Bridge(QObject): + log_line = Signal(str) + state_text = Signal(str) + pending_payload = Signal(object) + + +class MainWindow(QMainWindow): + def __init__(self) -> None: + super().__init__() + self.setWindowTitle("手术室耗材语音确认客户端") + self.resize(920, 640) + + self._bridge = _Bridge() + self._bridge.log_line.connect(self._append_log) + self._bridge.pending_payload.connect(self._show_pending) + + self._worker = MonitorWorker( + on_log=lambda m: self._bridge.log_line.emit(m), + on_state=lambda s: self._bridge.state_text.emit(s), + on_pending=lambda p: self._bridge.pending_payload.emit(p), + ) + self._worker.start_thread() + + central = QWidget() + self.setCentralWidget(central) + root = QVBoxLayout(central) + + form_box = QGroupBox("连接与手术") + form = QFormLayout(form_box) + self._base_url = QLineEdit("http://127.0.0.1:38080") + self._surgery_id = QLineEdit("") + self._surgery_id.setPlaceholderText("6 位数字,如 123456") + self._interval = QDoubleSpinBox() + self._interval.setRange(1.0, 120.0) + self._interval.setValue(5.0) + self._interval.setSuffix(" s") + self._record_sec = QDoubleSpinBox() + self._record_sec.setRange(2.0, 60.0) + self._record_sec.setValue(8.0) + self._record_sec.setSuffix(" s") + form.addRow("服务端 Base URL", self._base_url) + form.addRow("手术号 surgery_id", self._surgery_id) + form.addRow("轮询间隔", self._interval) + form.addRow("录音时长", self._record_sec) + root.addWidget(form_box) + + adv = QGroupBox("音频 / 调试") + adv_l = QFormLayout(adv) + self._device_combo = QComboBox() + self._device_combo.addItem("系统默认麦克风", None) + self._populate_input_devices() + self._prefer_ffmpeg = QCheckBox("优先使用 ffmpeg 录音(需本机 ffmpeg 且设备参数可用)") + self._hide_404 = QCheckBox("隐藏 404 轮询日志(推荐)") + self._hide_404.setChecked(True) + self._dry_run = QCheckBox("Dry-run:录音后不上传") + adv_l.addRow("输入设备", self._device_combo) + adv_l.addRow(self._prefer_ffmpeg) + adv_l.addRow(self._hide_404) + adv_l.addRow(self._dry_run) + root.addWidget(adv) + + btn_row = QHBoxLayout() + self._btn_start = QPushButton("开始监控") + self._btn_stop = QPushButton("停止监控") + self._btn_stop.setEnabled(False) + self._btn_retry = QPushButton("重试本轮(播放+录音+上传)") + self._btn_replay = QPushButton("仅重播话术") + btn_row.addWidget(self._btn_start) + btn_row.addWidget(self._btn_stop) + btn_row.addWidget(self._btn_retry) + btn_row.addWidget(self._btn_replay) + btn_row.addStretch() + root.addLayout(btn_row) + + self._status_label = QLabel("待机") + root.addWidget(self._status_label) + self._bridge.state_text.connect(self._status_label.setText) + + split = QSplitter(Qt.Orientation.Horizontal) + self._pending_view = QPlainTextEdit() + self._pending_view.setReadOnly(True) + self._pending_view.setPlaceholderText("待确认内容将显示在这里…") + self._log = QPlainTextEdit() + self._log.setReadOnly(True) + self._log.setPlaceholderText("日志…") + split.addWidget(self._pending_view) + split.addWidget(self._log) + split.setSizes([360, 520]) + root.addWidget(split, stretch=1) + + self._btn_start.clicked.connect(self._start_monitoring) + self._btn_stop.clicked.connect(self._stop_monitoring) + self._btn_retry.clicked.connect(self._worker.retry_failed) + self._btn_replay.clicked.connect(self._worker.replay_prompt_only) + + self._apply_settings_silent() + + def _show_pending(self, payload: object) -> None: + if payload is None: + self._pending_view.clear() + return + if not isinstance(payload, dict): + self._pending_view.setPlainText(str(payload)) + return + try: + text = json.dumps(payload, ensure_ascii=False, indent=2) + except (TypeError, ValueError): + text = str(payload) + self._pending_view.setPlainText(text) + + def _populate_input_devices(self) -> None: + try: + import sounddevice as sd + except ImportError: + return + try: + devices = sd.query_devices() + hostapis = sd.query_hostapis() + except Exception: + return + for i, d in enumerate(devices): + if d.get("max_input_channels", 0) <= 0: + continue + ha = hostapis[d["hostapi"]]["name"] if d.get("hostapi") is not None else "" + label = f"{i}: {d.get('name', '')} ({ha})" + self._device_combo.addItem(label, i) + + def _apply_settings_silent(self) -> None: + dev_data = self._device_combo.currentData() + self._worker.set_settings( + base_url=self._base_url.text().strip(), + surgery_id=self._surgery_id.text().strip(), + interval_sec=float(self._interval.value()), + record_seconds=float(self._record_sec.value()), + dry_run=self._dry_run.isChecked(), + hide_404_logs=self._hide_404.isChecked(), + prefer_ffmpeg_record=self._prefer_ffmpeg.isChecked(), + sounddevice_device=dev_data, + ) + + def _start_monitoring(self) -> None: + sid = self._surgery_id.text().strip() + if len(sid) != 6 or not sid.isdigit(): + QMessageBox.warning(self, "校验失败", "手术号必须为 6 位数字。") + return + self._apply_settings_silent() + self._worker.set_monitoring(True) + self._btn_start.setEnabled(False) + self._btn_stop.setEnabled(True) + self._append_log("—— 开始监控 ——") + + def _stop_monitoring(self) -> None: + self._worker.set_monitoring(False) + self._btn_start.setEnabled(True) + self._btn_stop.setEnabled(False) + self._append_log("—— 已停止监控 ——") + self._status_label.setText("已停止") + + def _append_log(self, line: str) -> None: + ts = datetime.now().strftime("%H:%M:%S") + self._log.appendPlainText(f"[{ts}] {line}") + sb = self._log.verticalScrollBar() + sb.setValue(sb.maximum()) + + def closeEvent(self, event: QCloseEvent) -> None: + self._worker.stop_thread() + event.accept()