feat: 手术视频消耗、待确认与持久化改造
- 新增 Alembic 初始迁移、领域明细模型及归档持久化与重试链路\n- 拆分视频会话注册表、分类处理、推理时间窗聚合与流处理\n- 消耗日志:TSV/Markdown 含 top2/top3;item_id 优先产品编码;待确认记「待确认」行,语音确认后落正式行并更新汇总\n- 待确认时内存/DB 明细为占位行,确认后替换;拒绝时移除占位\n- 分类 probs 先 detach/cpu 再转 NumPy,修复 MPS/CUDA 上推理被静默跳过\n- 补充集成测试、归档与设备张量等单测 Made-with: Cursor
This commit is contained in:
@@ -1,19 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
from dataclasses import dataclass
|
||||
|
||||
from fastapi.concurrency import run_in_threadpool
|
||||
from loguru import logger
|
||||
|
||||
from app.config import Settings
|
||||
from app.services.baidu_speech import BaiduSpeechNotConfiguredError, BaiduSpeechService
|
||||
|
||||
|
||||
_CN_DIGITS = {
|
||||
@@ -200,199 +187,3 @@ def build_prompt_text(options: list[tuple[str, float]]) -> str:
|
||||
for i, (name, _conf) in enumerate(options, start=1):
|
||||
parts.append(f"第{i}个,{name}。")
|
||||
return "".join(parts)
|
||||
|
||||
|
||||
@dataclass
|
||||
class VoiceAttemptResult:
|
||||
chosen_label: str | None
|
||||
asr_text: str | None
|
||||
error: str | None
|
||||
|
||||
|
||||
class VoiceConfirmationOrchestrator:
|
||||
"""服务端 TTS 播报 + ffmpeg 采集 + 百度 ASR + 文本解析。"""
|
||||
|
||||
def __init__(self, settings: Settings, baidu: BaiduSpeechService) -> None:
|
||||
self._s = settings
|
||||
self._baidu = baidu
|
||||
self._lock = asyncio.Lock()
|
||||
|
||||
def _ffplay_path(self) -> str | None:
|
||||
return shutil.which("ffplay")
|
||||
|
||||
def _ffmpeg_path(self) -> str | None:
|
||||
return shutil.which("ffmpeg")
|
||||
|
||||
def _record_pcm_ffmpeg(self, seconds: float) -> tuple[bytes | None, str | None]:
|
||||
ffmpeg = self._ffmpeg_path()
|
||||
if not ffmpeg:
|
||||
return None, "ffmpeg not found in PATH"
|
||||
system = platform.system()
|
||||
if system == "Darwin":
|
||||
dev = self._s.voice_ffmpeg_input.strip() or ":0"
|
||||
input_args = ["-f", "avfoundation", "-i", dev]
|
||||
else:
|
||||
dev = self._s.voice_ffmpeg_input.strip() or "default"
|
||||
input_args = ["-f", "alsa", "-i", dev]
|
||||
|
||||
cmd = [
|
||||
ffmpeg,
|
||||
"-nostdin",
|
||||
"-loglevel",
|
||||
"error",
|
||||
"-y",
|
||||
*input_args,
|
||||
"-t",
|
||||
str(seconds),
|
||||
"-ar",
|
||||
"16000",
|
||||
"-ac",
|
||||
"1",
|
||||
"-f",
|
||||
"s16le",
|
||||
"-acodec",
|
||||
"pcm_s16le",
|
||||
"pipe:1",
|
||||
]
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
timeout=seconds + 5.0,
|
||||
check=False,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
return None, "ffmpeg record timeout"
|
||||
if proc.returncode != 0:
|
||||
err = (proc.stderr or b"").decode("utf-8", errors="replace")
|
||||
return None, f"ffmpeg failed: {err or proc.returncode}"
|
||||
return proc.stdout, None
|
||||
|
||||
def _play_mp3_file(self, path: str) -> str | None:
|
||||
ffplay = self._ffplay_path()
|
||||
if not ffplay:
|
||||
return "ffplay not found in PATH"
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
[
|
||||
ffplay,
|
||||
"-nodisp",
|
||||
"-autoexit",
|
||||
"-loglevel",
|
||||
"quiet",
|
||||
path,
|
||||
],
|
||||
capture_output=True,
|
||||
timeout=120.0,
|
||||
check=False,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
return "ffplay timeout"
|
||||
if proc.returncode != 0:
|
||||
return f"ffplay exit {proc.returncode}"
|
||||
return None
|
||||
|
||||
def _synthesize_to_temp_mp3(self, text: str) -> tuple[str | None, str | None]:
|
||||
try:
|
||||
audio = self._baidu.synthesis(
|
||||
text,
|
||||
"zh",
|
||||
1,
|
||||
{"spd": 5, "pit": 5, "vol": 9, "per": 0},
|
||||
)
|
||||
except BaiduSpeechNotConfiguredError as exc:
|
||||
return None, str(exc)
|
||||
if isinstance(audio, dict):
|
||||
return None, f"TTS error: {audio!r}"
|
||||
tmp = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False)
|
||||
try:
|
||||
tmp.write(audio)
|
||||
tmp.flush()
|
||||
path = tmp.name
|
||||
finally:
|
||||
tmp.close()
|
||||
return path, None
|
||||
|
||||
async def speak_prompt(self, text: str) -> None:
|
||||
"""仅百度 TTS + ffplay 播报,不录音。供待确认入队时提示手术室。"""
|
||||
if not (text or "").strip():
|
||||
return
|
||||
if not self._s.voice_tts_on_pending_enqueued:
|
||||
return
|
||||
if not self._s.voice_confirmation_enabled:
|
||||
return
|
||||
if not self._baidu.configured:
|
||||
logger.debug("speak_prompt skipped: baidu_speech not configured")
|
||||
return
|
||||
async with self._lock:
|
||||
mp3_path, err = await run_in_threadpool(self._synthesize_to_temp_mp3, text)
|
||||
if err or not mp3_path:
|
||||
logger.warning("TTS synthesis failed: {}", err)
|
||||
return
|
||||
try:
|
||||
play_err = await run_in_threadpool(self._play_mp3_file, mp3_path)
|
||||
if play_err:
|
||||
logger.warning("TTS play failed: {}", play_err)
|
||||
finally:
|
||||
try:
|
||||
os.unlink(mp3_path)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
async def run_confirmation(
|
||||
self,
|
||||
*,
|
||||
surgery_id: str,
|
||||
options: list[tuple[str, float]],
|
||||
) -> VoiceAttemptResult:
|
||||
if not self._s.voice_confirmation_enabled:
|
||||
return VoiceAttemptResult(None, None, "voice_confirmation_disabled")
|
||||
if not options:
|
||||
return VoiceAttemptResult(None, None, "no_options")
|
||||
if not self._baidu.configured:
|
||||
return VoiceAttemptResult(None, None, "baidu_speech_not_configured")
|
||||
|
||||
labels = [o[0] for o in options]
|
||||
prompt = build_prompt_text(options)
|
||||
logger.info("Voice confirm surgery={} prompt_len={}", surgery_id, len(prompt))
|
||||
|
||||
async with self._lock:
|
||||
mp3_path, err = await run_in_threadpool(self._synthesize_to_temp_mp3, prompt)
|
||||
if err or not mp3_path:
|
||||
return VoiceAttemptResult(None, None, err or "tts_failed")
|
||||
try:
|
||||
play_err = await run_in_threadpool(self._play_mp3_file, mp3_path)
|
||||
if play_err:
|
||||
return VoiceAttemptResult(None, None, play_err)
|
||||
finally:
|
||||
try:
|
||||
os.unlink(mp3_path)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
pcm, rec_err = await run_in_threadpool(
|
||||
self._record_pcm_ffmpeg, float(self._s.voice_record_seconds)
|
||||
)
|
||||
if rec_err or not pcm:
|
||||
return VoiceAttemptResult(None, None, rec_err or "empty_audio")
|
||||
|
||||
asr_payload = await run_in_threadpool(self._baidu.asr, pcm, "pcm", 16000, None)
|
||||
if not isinstance(asr_payload, dict):
|
||||
return VoiceAttemptResult(None, None, "asr_invalid_response")
|
||||
if asr_payload.get("err_no") != 0:
|
||||
return VoiceAttemptResult(
|
||||
None,
|
||||
None,
|
||||
f"asr_err_{asr_payload.get('err_no')}: {asr_payload.get('err_msg')}",
|
||||
)
|
||||
results = asr_payload.get("result")
|
||||
text: str | None = None
|
||||
if isinstance(results, list) and results:
|
||||
text = str(results[0])
|
||||
elif isinstance(results, str):
|
||||
text = results
|
||||
if not text:
|
||||
return VoiceAttemptResult(None, None, "asr_empty_text")
|
||||
|
||||
chosen = parse_voice_choice(text, labels)
|
||||
return VoiceAttemptResult(chosen, text, None)
|
||||
|
||||
Reference in New Issue
Block a user