feat: surgery pipeline API, video inference, voice confirm, and tests
- Add FastAPI routes for surgery start/end, results, pending confirmation (WAV upload), and health checks.
- Implement RTSP/Hikvision capture, consumable classification, session manager, MinIO/Baidu voice resolution, and DB persistence.
- Add documentation (client API, video backends, staging checklist) and sample camera/RTSP config.
- Add pytest suite (API contract, session manager, voice, repositories, pipeline persistence) and httpx dev dependency.
- Replace deprecated HTTP_422_UNPROCESSABLE_ENTITY with HTTP_422_UNPROCESSABLE_CONTENT.
- Fix SurgeryPipeline DB reads to use an explicit transaction with autobegin disabled.
Made-with: Cursor
2026-04-21 18:33:54 +08:00
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
|
|
import asyncio
|
|
|
|
|
|
import os
|
|
|
|
|
|
import platform
|
|
|
|
|
|
import re
|
|
|
|
|
|
import shutil
|
|
|
|
|
|
import subprocess
|
|
|
|
|
|
import tempfile
|
|
|
|
|
|
from dataclasses import dataclass
|
|
|
|
|
|
|
|
|
|
|
|
from fastapi.concurrency import run_in_threadpool
|
|
|
|
|
|
from loguru import logger
|
|
|
|
|
|
|
|
|
|
|
|
from app.config import Settings
|
|
|
|
|
|
from app.services.baidu_speech import BaiduSpeechNotConfiguredError, BaiduSpeechService
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_CN_DIGITS = {
|
|
|
|
|
|
"零": 0,
|
|
|
|
|
|
"一": 1,
|
|
|
|
|
|
"二": 2,
|
|
|
|
|
|
"两": 2,
|
|
|
|
|
|
"三": 3,
|
|
|
|
|
|
"四": 4,
|
|
|
|
|
|
"五": 5,
|
|
|
|
|
|
"六": 6,
|
|
|
|
|
|
"七": 7,
|
|
|
|
|
|
"八": 8,
|
|
|
|
|
|
"九": 9,
|
|
|
|
|
|
"十": 10,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-04-23 14:24:20 +08:00
|
|
|
|
def _parse_ordinal_index_1based(token: str) -> int | None:
|
|
|
|
|
|
"""将「1」「3」「一」「三」「十一」等解析为 1-based 序数,失败返回 None。"""
|
|
|
|
|
|
t = (token or "").strip()
|
|
|
|
|
|
if not t:
|
|
|
|
|
|
return None
|
|
|
|
|
|
if t.isdigit():
|
|
|
|
|
|
v = int(t)
|
|
|
|
|
|
return v if 1 <= v <= 99 else None
|
|
|
|
|
|
if t in _CN_DIGITS and t != "零" and t != "十":
|
|
|
|
|
|
return int(_CN_DIGITS[t])
|
|
|
|
|
|
if t == "十":
|
|
|
|
|
|
return 10
|
|
|
|
|
|
if len(t) == 2 and t[0] == "十" and t[1] in _CN_DIGITS and t[1] not in ("零", "十"):
|
|
|
|
|
|
return 10 + int(_CN_DIGITS[t[1]])
|
|
|
|
|
|
if len(t) == 2 and t[1] == "十" and t[0] in _CN_DIGITS and t[0] != "零":
|
|
|
|
|
|
return int(_CN_DIGITS[t[0]]) * 10
|
|
|
|
|
|
if len(t) == 3 and t[0] in _CN_DIGITS and t[1] == "十" and t[2] in _CN_DIGITS:
|
|
|
|
|
|
return int(_CN_DIGITS[t[0]]) * 10 + int(_CN_DIGITS[t[2]])
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _label_from_ordinal_1based(n1: int, options: list[str]) -> str | None:
|
|
|
|
|
|
if n1 < 1:
|
|
|
|
|
|
return None
|
|
|
|
|
|
idx = n1 - 1
|
|
|
|
|
|
if 0 <= idx < len(options):
|
|
|
|
|
|
return options[idx]
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _choose_from_ordinal_text(raw: str, options: list[str]) -> str | None:
|
|
|
|
|
|
"""从「第一个」「第2个」「选3」「1号」等表述解析选项。返回 None 表示本函数未识别。"""
|
|
|
|
|
|
n_opt = len(options)
|
|
|
|
|
|
if n_opt < 1:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
# 1) 显式「第N个/项/款/…」,允许夹带后噪声,如「第一个对」
|
|
|
|
|
|
for m in re.finditer(
|
|
|
|
|
|
r"第([0-9]+|[一二两三四五六七八九十百]+)(?:个|项|款|的|种|名)?", raw
|
|
|
|
|
|
):
|
|
|
|
|
|
n1 = _parse_ordinal_index_1based(m.group(1))
|
|
|
|
|
|
if n1 is not None:
|
|
|
|
|
|
ch = _label_from_ordinal_1based(n1, options)
|
|
|
|
|
|
if ch is not None:
|
|
|
|
|
|
return ch
|
|
|
|
|
|
m_pick = re.search(
|
|
|
|
|
|
r"(?:^|[\s,,;;::])(?:选|要|就)\s*0*([1-9]\d?)(?:\s*号|个|项|款)?",
|
|
|
|
|
|
raw,
|
|
|
|
|
|
)
|
|
|
|
|
|
if m_pick:
|
|
|
|
|
|
n1 = int(m_pick.group(1))
|
|
|
|
|
|
ch = _label_from_ordinal_1based(n1, options)
|
|
|
|
|
|
if ch is not None:
|
|
|
|
|
|
return ch
|
|
|
|
|
|
norm_for_opt = raw.replace(" ", "").lower()
|
|
|
|
|
|
m_op = re.search(r"(?:option|选项)\s*[::]?\s*(\d+)", norm_for_opt, re.IGNORECASE)
|
|
|
|
|
|
if m_op:
|
|
|
|
|
|
n1 = int(m_op.group(1))
|
|
|
|
|
|
ch = _label_from_ordinal_1based(n1, options)
|
|
|
|
|
|
if ch is not None:
|
|
|
|
|
|
return ch
|
|
|
|
|
|
|
|
|
|
|
|
# 2) 行首/句末「一」「二」单字,仅当候选项数较少时
|
|
|
|
|
|
s = raw.replace(" ", "")
|
|
|
|
|
|
if n_opt <= 3:
|
|
|
|
|
|
m_one = re.match(r"^([一二两三四])$", s)
|
|
|
|
|
|
if m_one:
|
|
|
|
|
|
tok = m_one.group(1)
|
|
|
|
|
|
if tok in _CN_DIGITS and tok not in ("零", "十"):
|
|
|
|
|
|
n1 = int(_CN_DIGITS[tok])
|
|
|
|
|
|
ch = _label_from_ordinal_1based(n1, options)
|
|
|
|
|
|
if ch is not None:
|
|
|
|
|
|
return ch
|
|
|
|
|
|
m_tail = re.search(r"([0-9一二两三四五六七八九十]+)\s*号$", s)
|
|
|
|
|
|
if m_tail:
|
|
|
|
|
|
n1 = _parse_ordinal_index_1based(m_tail.group(1))
|
|
|
|
|
|
if n1 is not None:
|
|
|
|
|
|
ch = _label_from_ordinal_1based(n1, options)
|
|
|
|
|
|
if ch is not None:
|
|
|
|
|
|
return ch
|
|
|
|
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
feat: surgery pipeline API, video inference, voice confirm, and tests
- Add FastAPI routes for surgery start/end, results, pending confirmation (WAV upload), and health checks.
- Implement RTSP/Hikvision capture, consumable classification, session manager, MinIO/Baidu voice resolution, and DB persistence.
- Add documentation (client API, video backends, staging checklist) and sample camera/RTSP config.
- Add pytest suite (API contract, session manager, voice, repositories, pipeline persistence) and httpx dev dependency.
- Replace deprecated HTTP_422_UNPROCESSABLE_ENTITY with HTTP_422_UNPROCESSABLE_CONTENT.
- Fix SurgeryPipeline DB reads to use an explicit transaction with autobegin disabled.
Made-with: Cursor
2026-04-21 18:33:54 +08:00
|
|
|
|
def parse_voice_choice(asr_text: str, options: list[str]) -> str | None:
|
|
|
|
|
|
"""
|
|
|
|
|
|
从识别文本中解析医生选择的耗材名称。
|
|
|
|
|
|
支持:完全匹配、子串匹配、第 N 个(1/一/第一个)。
|
|
|
|
|
|
"""
|
2026-04-23 14:24:20 +08:00
|
|
|
|
raw = re.sub(
|
|
|
|
|
|
r"^[。,、;:!?\s]+|[。,、;:!?\s]+$",
|
|
|
|
|
|
"",
|
|
|
|
|
|
(asr_text or "").strip(),
|
|
|
|
|
|
)
|
feat: surgery pipeline API, video inference, voice confirm, and tests
- Add FastAPI routes for surgery start/end, results, pending confirmation (WAV upload), and health checks.
- Implement RTSP/Hikvision capture, consumable classification, session manager, MinIO/Baidu voice resolution, and DB persistence.
- Add documentation (client API, video backends, staging checklist) and sample camera/RTSP config.
- Add pytest suite (API contract, session manager, voice, repositories, pipeline persistence) and httpx dev dependency.
- Replace deprecated HTTP_422_UNPROCESSABLE_ENTITY with HTTP_422_UNPROCESSABLE_CONTENT.
- Fix SurgeryPipeline DB reads to use an explicit transaction with autobegin disabled.
Made-with: Cursor
2026-04-21 18:33:54 +08:00
|
|
|
|
if not raw:
|
|
|
|
|
|
return None
|
|
|
|
|
|
normalized = raw.replace(" ", "").lower()
|
|
|
|
|
|
|
|
|
|
|
|
for opt in options:
|
|
|
|
|
|
if opt and opt in raw:
|
|
|
|
|
|
return opt
|
|
|
|
|
|
|
2026-04-23 14:24:20 +08:00
|
|
|
|
chosen_ord = _choose_from_ordinal_text(raw, options)
|
|
|
|
|
|
if chosen_ord is not None:
|
|
|
|
|
|
return chosen_ord
|
|
|
|
|
|
|
feat: surgery pipeline API, video inference, voice confirm, and tests
- Add FastAPI routes for surgery start/end, results, pending confirmation (WAV upload), and health checks.
- Implement RTSP/Hikvision capture, consumable classification, session manager, MinIO/Baidu voice resolution, and DB persistence.
- Add documentation (client API, video backends, staging checklist) and sample camera/RTSP config.
- Add pytest suite (API contract, session manager, voice, repositories, pipeline persistence) and httpx dev dependency.
- Replace deprecated HTTP_422_UNPROCESSABLE_ENTITY with HTTP_422_UNPROCESSABLE_CONTENT.
- Fix SurgeryPipeline DB reads to use an explicit transaction with autobegin disabled.
Made-with: Cursor
2026-04-21 18:33:54 +08:00
|
|
|
|
m_num = re.search(r"(\d+)", raw)
|
|
|
|
|
|
if m_num:
|
|
|
|
|
|
idx = int(m_num.group(1)) - 1
|
|
|
|
|
|
if 0 <= idx < len(options):
|
|
|
|
|
|
return options[idx]
|
|
|
|
|
|
|
|
|
|
|
|
m_cn = re.search(r"第([一二两三四五六七八九十\d]+)个", raw)
|
|
|
|
|
|
if m_cn:
|
|
|
|
|
|
token = m_cn.group(1)
|
2026-04-23 14:24:20 +08:00
|
|
|
|
n1 = int(token) if token.isdigit() else _parse_ordinal_index_1based(token)
|
|
|
|
|
|
if n1 is not None:
|
|
|
|
|
|
ch = _label_from_ordinal_1based(n1, options)
|
|
|
|
|
|
if ch is not None:
|
|
|
|
|
|
return ch
|
feat: surgery pipeline API, video inference, voice confirm, and tests
- Add FastAPI routes for surgery start/end, results, pending confirmation (WAV upload), and health checks.
- Implement RTSP/Hikvision capture, consumable classification, session manager, MinIO/Baidu voice resolution, and DB persistence.
- Add documentation (client API, video backends, staging checklist) and sample camera/RTSP config.
- Add pytest suite (API contract, session manager, voice, repositories, pipeline persistence) and httpx dev dependency.
- Replace deprecated HTTP_422_UNPROCESSABLE_ENTITY with HTTP_422_UNPROCESSABLE_CONTENT.
- Fix SurgeryPipeline DB reads to use an explicit transaction with autobegin disabled.
Made-with: Cursor
2026-04-21 18:33:54 +08:00
|
|
|
|
|
|
|
|
|
|
for i, opt in enumerate(options):
|
|
|
|
|
|
if not opt:
|
|
|
|
|
|
continue
|
|
|
|
|
|
aliases = [f"第{i + 1}个", f"第{i + 1}", f"{i + 1}号"]
|
|
|
|
|
|
if any(a in normalized for a in aliases):
|
|
|
|
|
|
return opt
|
|
|
|
|
|
|
|
|
|
|
|
negatives = ("不是", "没有", "否", "无", "错")
|
|
|
|
|
|
if any(n in raw for n in negatives):
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-04-22 16:31:12 +08:00
|
|
|
|
def match_voice_choice_against_candidates(
|
|
|
|
|
|
asr_text: str, candidates: list[str]
|
|
|
|
|
|
) -> str | None:
|
|
|
|
|
|
"""
|
|
|
|
|
|
在未匹配 pending 展示的 topk 话术时,按本台手术「候选耗材清单」做名称子串匹配。
|
|
|
|
|
|
长名优先,减少短名误命中(如「纱」同时匹配多种耗材时优先更长全称)。
|
|
|
|
|
|
"""
|
|
|
|
|
|
raw = (asr_text or "").strip()
|
|
|
|
|
|
if not raw:
|
|
|
|
|
|
return None
|
|
|
|
|
|
stripped = [c.strip() for c in candidates if c and str(c).strip()]
|
|
|
|
|
|
if not stripped:
|
|
|
|
|
|
return None
|
|
|
|
|
|
for c in sorted(stripped, key=len, reverse=True):
|
|
|
|
|
|
if c in raw:
|
|
|
|
|
|
return c
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
feat: surgery pipeline API, video inference, voice confirm, and tests
- Add FastAPI routes for surgery start/end, results, pending confirmation (WAV upload), and health checks.
- Implement RTSP/Hikvision capture, consumable classification, session manager, MinIO/Baidu voice resolution, and DB persistence.
- Add documentation (client API, video backends, staging checklist) and sample camera/RTSP config.
- Add pytest suite (API contract, session manager, voice, repositories, pipeline persistence) and httpx dev dependency.
- Replace deprecated HTTP_422_UNPROCESSABLE_ENTITY with HTTP_422_UNPROCESSABLE_CONTENT.
- Fix SurgeryPipeline DB reads to use an explicit transaction with autobegin disabled.
Made-with: Cursor
2026-04-21 18:33:54 +08:00
|
|
|
|
def is_rejection_phrase(asr_text: str) -> bool:
|
|
|
|
|
|
"""医生明确否认全部候选时返回 True(须在 parse_voice_choice 之前调用)。"""
|
|
|
|
|
|
raw = (asr_text or "").strip()
|
|
|
|
|
|
if not raw:
|
|
|
|
|
|
return False
|
|
|
|
|
|
negatives = ("不是", "没有", "否", "无", "错")
|
|
|
|
|
|
return any(n in raw for n in negatives)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def build_prompt_text(options: list[tuple[str, float]]) -> str:
|
2026-04-23 14:24:20 +08:00
|
|
|
|
parts = ["请确认刚才使用的耗材是下面哪一项。"]
|
feat: surgery pipeline API, video inference, voice confirm, and tests
- Add FastAPI routes for surgery start/end, results, pending confirmation (WAV upload), and health checks.
- Implement RTSP/Hikvision capture, consumable classification, session manager, MinIO/Baidu voice resolution, and DB persistence.
- Add documentation (client API, video backends, staging checklist) and sample camera/RTSP config.
- Add pytest suite (API contract, session manager, voice, repositories, pipeline persistence) and httpx dev dependency.
- Replace deprecated HTTP_422_UNPROCESSABLE_ENTITY with HTTP_422_UNPROCESSABLE_CONTENT.
- Fix SurgeryPipeline DB reads to use an explicit transaction with autobegin disabled.
Made-with: Cursor
2026-04-21 18:33:54 +08:00
|
|
|
|
for i, (name, _conf) in enumerate(options, start=1):
|
|
|
|
|
|
parts.append(f"第{i}个,{name}。")
|
|
|
|
|
|
return "".join(parts)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
|
|
class VoiceAttemptResult:
|
|
|
|
|
|
chosen_label: str | None
|
|
|
|
|
|
asr_text: str | None
|
|
|
|
|
|
error: str | None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class VoiceConfirmationOrchestrator:
|
|
|
|
|
|
"""服务端 TTS 播报 + ffmpeg 采集 + 百度 ASR + 文本解析。"""
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, settings: Settings, baidu: BaiduSpeechService) -> None:
|
|
|
|
|
|
self._s = settings
|
|
|
|
|
|
self._baidu = baidu
|
|
|
|
|
|
self._lock = asyncio.Lock()
|
|
|
|
|
|
|
|
|
|
|
|
def _ffplay_path(self) -> str | None:
|
|
|
|
|
|
return shutil.which("ffplay")
|
|
|
|
|
|
|
|
|
|
|
|
def _ffmpeg_path(self) -> str | None:
|
|
|
|
|
|
return shutil.which("ffmpeg")
|
|
|
|
|
|
|
|
|
|
|
|
def _record_pcm_ffmpeg(self, seconds: float) -> tuple[bytes | None, str | None]:
|
|
|
|
|
|
ffmpeg = self._ffmpeg_path()
|
|
|
|
|
|
if not ffmpeg:
|
|
|
|
|
|
return None, "ffmpeg not found in PATH"
|
|
|
|
|
|
system = platform.system()
|
|
|
|
|
|
if system == "Darwin":
|
|
|
|
|
|
dev = self._s.voice_ffmpeg_input.strip() or ":0"
|
|
|
|
|
|
input_args = ["-f", "avfoundation", "-i", dev]
|
|
|
|
|
|
else:
|
|
|
|
|
|
dev = self._s.voice_ffmpeg_input.strip() or "default"
|
|
|
|
|
|
input_args = ["-f", "alsa", "-i", dev]
|
|
|
|
|
|
|
|
|
|
|
|
cmd = [
|
|
|
|
|
|
ffmpeg,
|
|
|
|
|
|
"-nostdin",
|
|
|
|
|
|
"-loglevel",
|
|
|
|
|
|
"error",
|
|
|
|
|
|
"-y",
|
|
|
|
|
|
*input_args,
|
|
|
|
|
|
"-t",
|
|
|
|
|
|
str(seconds),
|
|
|
|
|
|
"-ar",
|
|
|
|
|
|
"16000",
|
|
|
|
|
|
"-ac",
|
|
|
|
|
|
"1",
|
|
|
|
|
|
"-f",
|
|
|
|
|
|
"s16le",
|
|
|
|
|
|
"-acodec",
|
|
|
|
|
|
"pcm_s16le",
|
|
|
|
|
|
"pipe:1",
|
|
|
|
|
|
]
|
|
|
|
|
|
try:
|
|
|
|
|
|
proc = subprocess.run(
|
|
|
|
|
|
cmd,
|
|
|
|
|
|
capture_output=True,
|
|
|
|
|
|
timeout=seconds + 5.0,
|
|
|
|
|
|
check=False,
|
|
|
|
|
|
)
|
|
|
|
|
|
except subprocess.TimeoutExpired:
|
|
|
|
|
|
return None, "ffmpeg record timeout"
|
|
|
|
|
|
if proc.returncode != 0:
|
|
|
|
|
|
err = (proc.stderr or b"").decode("utf-8", errors="replace")
|
|
|
|
|
|
return None, f"ffmpeg failed: {err or proc.returncode}"
|
|
|
|
|
|
return proc.stdout, None
|
|
|
|
|
|
|
|
|
|
|
|
def _play_mp3_file(self, path: str) -> str | None:
|
|
|
|
|
|
ffplay = self._ffplay_path()
|
|
|
|
|
|
if not ffplay:
|
|
|
|
|
|
return "ffplay not found in PATH"
|
|
|
|
|
|
try:
|
|
|
|
|
|
proc = subprocess.run(
|
|
|
|
|
|
[
|
|
|
|
|
|
ffplay,
|
|
|
|
|
|
"-nodisp",
|
|
|
|
|
|
"-autoexit",
|
|
|
|
|
|
"-loglevel",
|
|
|
|
|
|
"quiet",
|
|
|
|
|
|
path,
|
|
|
|
|
|
],
|
|
|
|
|
|
capture_output=True,
|
|
|
|
|
|
timeout=120.0,
|
|
|
|
|
|
check=False,
|
|
|
|
|
|
)
|
|
|
|
|
|
except subprocess.TimeoutExpired:
|
|
|
|
|
|
return "ffplay timeout"
|
|
|
|
|
|
if proc.returncode != 0:
|
|
|
|
|
|
return f"ffplay exit {proc.returncode}"
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
def _synthesize_to_temp_mp3(self, text: str) -> tuple[str | None, str | None]:
|
|
|
|
|
|
try:
|
|
|
|
|
|
audio = self._baidu.synthesis(
|
|
|
|
|
|
text,
|
|
|
|
|
|
"zh",
|
|
|
|
|
|
1,
|
|
|
|
|
|
{"spd": 5, "pit": 5, "vol": 9, "per": 0},
|
|
|
|
|
|
)
|
|
|
|
|
|
except BaiduSpeechNotConfiguredError as exc:
|
|
|
|
|
|
return None, str(exc)
|
|
|
|
|
|
if isinstance(audio, dict):
|
|
|
|
|
|
return None, f"TTS error: {audio!r}"
|
|
|
|
|
|
tmp = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False)
|
|
|
|
|
|
try:
|
|
|
|
|
|
tmp.write(audio)
|
|
|
|
|
|
tmp.flush()
|
|
|
|
|
|
path = tmp.name
|
|
|
|
|
|
finally:
|
|
|
|
|
|
tmp.close()
|
|
|
|
|
|
return path, None
|
|
|
|
|
|
|
2026-04-23 14:24:20 +08:00
|
|
|
|
async def speak_prompt(self, text: str) -> None:
|
|
|
|
|
|
"""仅百度 TTS + ffplay 播报,不录音。供待确认入队时提示手术室。"""
|
|
|
|
|
|
if not (text or "").strip():
|
|
|
|
|
|
return
|
|
|
|
|
|
if not self._s.voice_tts_on_pending_enqueued:
|
|
|
|
|
|
return
|
|
|
|
|
|
if not self._s.voice_confirmation_enabled:
|
|
|
|
|
|
return
|
|
|
|
|
|
if not self._baidu.configured:
|
|
|
|
|
|
logger.debug("speak_prompt skipped: baidu_speech not configured")
|
|
|
|
|
|
return
|
|
|
|
|
|
async with self._lock:
|
|
|
|
|
|
mp3_path, err = await run_in_threadpool(self._synthesize_to_temp_mp3, text)
|
|
|
|
|
|
if err or not mp3_path:
|
|
|
|
|
|
logger.warning("TTS synthesis failed: {}", err)
|
|
|
|
|
|
return
|
|
|
|
|
|
try:
|
|
|
|
|
|
play_err = await run_in_threadpool(self._play_mp3_file, mp3_path)
|
|
|
|
|
|
if play_err:
|
|
|
|
|
|
logger.warning("TTS play failed: {}", play_err)
|
|
|
|
|
|
finally:
|
|
|
|
|
|
try:
|
|
|
|
|
|
os.unlink(mp3_path)
|
|
|
|
|
|
except OSError:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
feat: surgery pipeline API, video inference, voice confirm, and tests
- Add FastAPI routes for surgery start/end, results, pending confirmation (WAV upload), and health checks.
- Implement RTSP/Hikvision capture, consumable classification, session manager, MinIO/Baidu voice resolution, and DB persistence.
- Add documentation (client API, video backends, staging checklist) and sample camera/RTSP config.
- Add pytest suite (API contract, session manager, voice, repositories, pipeline persistence) and httpx dev dependency.
- Replace deprecated HTTP_422_UNPROCESSABLE_ENTITY with HTTP_422_UNPROCESSABLE_CONTENT.
- Fix SurgeryPipeline DB reads to use an explicit transaction with autobegin disabled.
Made-with: Cursor
2026-04-21 18:33:54 +08:00
|
|
|
|
async def run_confirmation(
|
|
|
|
|
|
self,
|
|
|
|
|
|
*,
|
|
|
|
|
|
surgery_id: str,
|
|
|
|
|
|
options: list[tuple[str, float]],
|
|
|
|
|
|
) -> VoiceAttemptResult:
|
|
|
|
|
|
if not self._s.voice_confirmation_enabled:
|
|
|
|
|
|
return VoiceAttemptResult(None, None, "voice_confirmation_disabled")
|
|
|
|
|
|
if not options:
|
|
|
|
|
|
return VoiceAttemptResult(None, None, "no_options")
|
|
|
|
|
|
if not self._baidu.configured:
|
|
|
|
|
|
return VoiceAttemptResult(None, None, "baidu_speech_not_configured")
|
|
|
|
|
|
|
|
|
|
|
|
labels = [o[0] for o in options]
|
|
|
|
|
|
prompt = build_prompt_text(options)
|
|
|
|
|
|
logger.info("Voice confirm surgery={} prompt_len={}", surgery_id, len(prompt))
|
|
|
|
|
|
|
|
|
|
|
|
async with self._lock:
|
|
|
|
|
|
mp3_path, err = await run_in_threadpool(self._synthesize_to_temp_mp3, prompt)
|
|
|
|
|
|
if err or not mp3_path:
|
|
|
|
|
|
return VoiceAttemptResult(None, None, err or "tts_failed")
|
|
|
|
|
|
try:
|
|
|
|
|
|
play_err = await run_in_threadpool(self._play_mp3_file, mp3_path)
|
|
|
|
|
|
if play_err:
|
|
|
|
|
|
return VoiceAttemptResult(None, None, play_err)
|
|
|
|
|
|
finally:
|
|
|
|
|
|
try:
|
|
|
|
|
|
os.unlink(mp3_path)
|
|
|
|
|
|
except OSError:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
pcm, rec_err = await run_in_threadpool(
|
|
|
|
|
|
self._record_pcm_ffmpeg, float(self._s.voice_record_seconds)
|
|
|
|
|
|
)
|
|
|
|
|
|
if rec_err or not pcm:
|
|
|
|
|
|
return VoiceAttemptResult(None, None, rec_err or "empty_audio")
|
|
|
|
|
|
|
|
|
|
|
|
asr_payload = await run_in_threadpool(self._baidu.asr, pcm, "pcm", 16000, None)
|
|
|
|
|
|
if not isinstance(asr_payload, dict):
|
|
|
|
|
|
return VoiceAttemptResult(None, None, "asr_invalid_response")
|
|
|
|
|
|
if asr_payload.get("err_no") != 0:
|
|
|
|
|
|
return VoiceAttemptResult(
|
|
|
|
|
|
None,
|
|
|
|
|
|
None,
|
|
|
|
|
|
f"asr_err_{asr_payload.get('err_no')}: {asr_payload.get('err_msg')}",
|
|
|
|
|
|
)
|
|
|
|
|
|
results = asr_payload.get("result")
|
|
|
|
|
|
text: str | None = None
|
|
|
|
|
|
if isinstance(results, list) and results:
|
|
|
|
|
|
text = str(results[0])
|
|
|
|
|
|
elif isinstance(results, str):
|
|
|
|
|
|
text = results
|
|
|
|
|
|
if not text:
|
|
|
|
|
|
return VoiceAttemptResult(None, None, "asr_empty_text")
|
|
|
|
|
|
|
|
|
|
|
|
chosen = parse_voice_choice(text, labels)
|
|
|
|
|
|
return VoiceAttemptResult(chosen, text, None)
|