feat: 语音确认、联调与运维增强
- 语音:序数解析(第一个/第二个等)、解析失败计数与 API detail.retry_remaining; 百度 ASR 固定 dev_pid 为普通话;SurgeryPipelineError 支持 extra 并入 HTTP detail。 - Demo:demo 路由与假 RTSP、客户端 index 与 README;BackendResolver 与配置调整。 - 可观测:消耗 TSV 日志、语音文件日志、终端 Markdown 辅助;相关测试与依赖更新。 - 注意:.env 仍被 gitignore,本地密钥不会进入本提交。 Made-with: Cursor
This commit is contained in:
@@ -52,8 +52,13 @@ class BaiduSpeechService:
|
||||
rate: int = 16000,
|
||||
options: dict[str, Any] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""短语音识别。返回百度 JSON(含 `err_no`、`result` 等)。"""
|
||||
return self._client_or_raise().asr(speech, format, rate, options)
|
||||
"""短语音识别。返回百度 JSON(含 `err_no`、`result` 等)。
|
||||
|
||||
固定使用普通话模型(`dev_pid` 来自配置),避免未传参时误用服务端默认导致偏英语等结果。
|
||||
"""
|
||||
merged: dict[str, Any] = dict(options or {})
|
||||
merged["dev_pid"] = int(settings.baidu_speech_asr_dev_pid)
|
||||
return self._client_or_raise().asr(speech, format, rate, merged)
|
||||
|
||||
def synthesis(
|
||||
self,
|
||||
|
||||
241
app/services/consumption_tsv_log.py
Normal file
241
app/services/consumption_tsv_log.py
Normal file
@@ -0,0 +1,241 @@
|
||||
"""每例手术一个文本文件(制表符列):`start_surgery` 时截断并写表头,每次时间窗识别**追加**一行。终端 Markdown 中时间戳为可读形式;落盘行内仍为 ISO 便于程序解析。
|
||||
|
||||
时间戳:在拉流起点记录 `time.time()`,与 `time.monotonic()` 时间窗对齐。直播 RTSP 经 OpenCV 一般无可靠绝对时码,以本机接收时刻为准。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import threading
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from app.config import settings
|
||||
from app.services.consumable_vision_algorithm import ClsTop3
|
||||
from app.terminal_markdown import print_markdown_stderr
|
||||
|
||||
# 制表符分隔;时间范围用 U+2013 连接;Top2/3 仅名称;本窗消耗数量恒为 1
|
||||
HEADER = "物品id\t物品名称\tTop2物品名称\tTop3物品名称\t消耗数量\t医生id\t时间戳\n"
|
||||
_RANGE_SEP = "\u2013" # en dash,与样例 `00:00:00.000–00:00:45.000` 一致
|
||||
|
||||
_lock = threading.Lock()
|
||||
|
||||
|
||||
def _consumption_tzinfo():
|
||||
raw = (settings.consumption_log_timezone or "").strip()
|
||||
if not raw:
|
||||
lt = datetime.now().astimezone().tzinfo
|
||||
return lt if lt is not None else timezone.utc
|
||||
try:
|
||||
return ZoneInfo(raw)
|
||||
except ZoneInfoNotFoundError:
|
||||
logger.warning("无效的 consumption_log_timezone={!r},回退为 UTC", raw)
|
||||
return timezone.utc
|
||||
|
||||
|
||||
def format_consumption_timestamp(
|
||||
camera_id: str,
|
||||
wall_start_epoch: float,
|
||||
wall_end_epoch: float,
|
||||
) -> str:
|
||||
"""落盘用:墙钟 + 配置时区 → `camXX@ISO8601–ISO8601`。"""
|
||||
tz = _consumption_tzinfo()
|
||||
a = datetime.fromtimestamp(wall_start_epoch, tz=tz)
|
||||
b = datetime.fromtimestamp(wall_end_epoch, tz=tz)
|
||||
cam = short_camera_label(camera_id)
|
||||
return f"{cam}@{a.isoformat(timespec='milliseconds')}{_RANGE_SEP}{b.isoformat(timespec='milliseconds')}"
|
||||
|
||||
|
||||
def format_consumption_timestamp_readable(
|
||||
camera_id: str,
|
||||
wall_start_epoch: float,
|
||||
wall_end_epoch: float,
|
||||
) -> str:
|
||||
"""仅终端 Rich:不含 `T` 的本地可读区间 + 摄像头简名,便于人眼对时。"""
|
||||
tz = _consumption_tzinfo()
|
||||
a = datetime.fromtimestamp(wall_start_epoch, tz=tz)
|
||||
b = datetime.fromtimestamp(wall_end_epoch, tz=tz)
|
||||
cam = short_camera_label(camera_id)
|
||||
|
||||
def _fmt(d: datetime) -> str:
|
||||
return d.strftime("%Y-%m-%d %H:%M:%S") + f".{d.microsecond // 1000:03d}"
|
||||
|
||||
return f"{_fmt(a)} {_RANGE_SEP} {_fmt(b)} · {cam}"
|
||||
|
||||
|
||||
def short_camera_label(camera_id: str) -> str:
|
||||
s = (camera_id or "").strip()
|
||||
m = re.match(r"^or-cam-(\d+)$", s, re.IGNORECASE)
|
||||
if m:
|
||||
return f"cam{int(m.group(1)):02d}"
|
||||
m2 = re.match(r"^cam-?0*(\d+)$", s, re.IGNORECASE)
|
||||
if m2:
|
||||
return f"cam{int(m2.group(1)):02d}"
|
||||
alnum = re.sub(r"[^\w-]", "", s)[:12]
|
||||
return alnum or "cam"
|
||||
|
||||
|
||||
def _encode_cell(value: str) -> str:
|
||||
s = (value or "").replace("\r", " ").replace("\n", " ").replace("\t", " ")
|
||||
return s
|
||||
|
||||
|
||||
def _item_id_for_row(name: str, pid: str, name_to_code: dict[str, str]) -> str:
|
||||
p = (pid or "").strip()
|
||||
if p:
|
||||
return p
|
||||
n = (name or "").strip()
|
||||
if n in name_to_code:
|
||||
return (name_to_code.get(n) or n).strip()
|
||||
return n
|
||||
|
||||
|
||||
def build_tsv_line(
|
||||
*,
|
||||
name_to_code: dict[str, str],
|
||||
best: ClsTop3,
|
||||
doctor_id: str,
|
||||
camera_id: str,
|
||||
wall_start_epoch: float,
|
||||
wall_end_epoch: float,
|
||||
) -> str:
|
||||
id1 = _item_id_for_row(best.t1_name, best.t1_pid, name_to_code)
|
||||
# 与历史样例:Top1 为「名称 置信度」四位小数
|
||||
name1 = f"{(best.t1_name or '').strip()} {best.t1_conf:.4f}".strip()
|
||||
n2 = (best.t2_name or "").strip()
|
||||
n3 = (best.t3_name or "").strip()
|
||||
ts = format_consumption_timestamp(camera_id, wall_start_epoch, wall_end_epoch)
|
||||
row = [
|
||||
_encode_cell(id1),
|
||||
_encode_cell(name1),
|
||||
_encode_cell(n2),
|
||||
_encode_cell(n3),
|
||||
"1",
|
||||
_encode_cell(doctor_id),
|
||||
_encode_cell(ts),
|
||||
]
|
||||
return "\t".join(row) + "\n"
|
||||
|
||||
|
||||
def _safe_surgery_path_segment(surgery_id: str) -> str:
|
||||
s = (surgery_id or "unknown").strip() or "unknown"
|
||||
s = re.sub(r"[^\w\-.@]", "_", s)
|
||||
return s[:200] if len(s) > 200 else s
|
||||
|
||||
|
||||
def resolved_consumption_log_path(surgery_id: str) -> Path:
|
||||
raw = (settings.consumption_tsv_log_path or "logs/consumption_{surgery_id}.txt").strip()
|
||||
safe = _safe_surgery_path_segment(surgery_id)
|
||||
if "{surgery_id}" in raw:
|
||||
raw = raw.replace("{surgery_id}", safe)
|
||||
else:
|
||||
p0 = Path(raw)
|
||||
if p0.suffix:
|
||||
raw = str(p0.with_name(f"{p0.stem}_{safe}{p0.suffix}"))
|
||||
else:
|
||||
raw = f"{raw.rstrip('/')}_{safe}.txt"
|
||||
p = Path(raw).expanduser()
|
||||
if not p.is_absolute():
|
||||
p = Path.cwd() / p
|
||||
return p
|
||||
|
||||
|
||||
def init_consumption_log_file(surgery_id: str) -> None:
|
||||
"""新手术开始:截断该手术对应文件并写入表头(一次)。"""
|
||||
if not settings.consumption_tsv_log_enabled:
|
||||
return
|
||||
path = resolved_consumption_log_path(surgery_id)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with _lock:
|
||||
with path.open("w", encoding="utf-8") as f:
|
||||
f.write(HEADER)
|
||||
|
||||
|
||||
def append_consumption_tsv_line(surgery_id: str, line: str) -> None:
|
||||
if not settings.consumption_tsv_log_enabled:
|
||||
return
|
||||
path = resolved_consumption_log_path(surgery_id)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with _lock:
|
||||
with path.open("a", encoding="utf-8") as f:
|
||||
f.write(line)
|
||||
|
||||
|
||||
def _md_cell(value: str) -> str:
|
||||
"""避免破坏 Markdown 表格的 | 与换行。"""
|
||||
s = (value or "").replace("\r", " ").replace("\n", " ").replace("|", "|")
|
||||
return s
|
||||
|
||||
|
||||
def build_consumption_markdown(
|
||||
*,
|
||||
name_to_code: dict[str, str],
|
||||
best: ClsTop3,
|
||||
doctor_id: str,
|
||||
camera_id: str,
|
||||
wall_start_epoch: float,
|
||||
wall_end_epoch: float,
|
||||
) -> str:
|
||||
"""终端用:Top1 含 id/名称/置信度;Top2/3 仅名称;消耗数量恒为 1。"""
|
||||
id1 = _item_id_for_row(best.t1_name, best.t1_pid, name_to_code)
|
||||
n1 = (best.t1_name or "").strip()
|
||||
has2 = bool((best.t2_name or "").strip())
|
||||
has3 = bool((best.t3_name or "").strip())
|
||||
n2 = (best.t2_name or "").strip() if has2 else ""
|
||||
n3 = (best.t3_name or "").strip() if has3 else ""
|
||||
dash = "—"
|
||||
ts = format_consumption_timestamp_readable(camera_id, wall_start_epoch, wall_end_epoch)
|
||||
return "\n".join(
|
||||
[
|
||||
"| Top1 物品id | Top1 物品名称 | Top1 置信度 | Top2 物品名称 | Top3 物品名称 | 消耗数量 | 医生id | 时间戳 |",
|
||||
"| :--- | :--- | ---: | :--- | :--- | ---: | :--- | :--- |",
|
||||
"| {} | {} | {:.4f} | {} | {} | 1 | {} | {} |".format(
|
||||
_md_cell(id1),
|
||||
_md_cell(n1),
|
||||
best.t1_conf,
|
||||
_md_cell(n2) if has2 else dash,
|
||||
_md_cell(n3) if has3 else dash,
|
||||
_md_cell(doctor_id),
|
||||
_md_cell(ts),
|
||||
),
|
||||
"",
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def append_consumption_window(
|
||||
*,
|
||||
surgery_id: str,
|
||||
name_to_code: dict[str, str],
|
||||
best: ClsTop3,
|
||||
doctor_id: str,
|
||||
camera_id: str,
|
||||
wall_start_epoch: float,
|
||||
wall_end_epoch: float,
|
||||
) -> None:
|
||||
if not settings.consumption_tsv_log_enabled and not settings.consumption_log_markdown_terminal:
|
||||
return
|
||||
if settings.consumption_tsv_log_enabled:
|
||||
line = build_tsv_line(
|
||||
name_to_code=name_to_code,
|
||||
best=best,
|
||||
doctor_id=doctor_id,
|
||||
camera_id=camera_id,
|
||||
wall_start_epoch=wall_start_epoch,
|
||||
wall_end_epoch=wall_end_epoch,
|
||||
)
|
||||
append_consumption_tsv_line(surgery_id, line)
|
||||
if settings.consumption_log_markdown_terminal:
|
||||
print_markdown_stderr(
|
||||
build_consumption_markdown(
|
||||
name_to_code=name_to_code,
|
||||
best=best,
|
||||
doctor_id=doctor_id,
|
||||
camera_id=camera_id,
|
||||
wall_start_epoch=wall_start_epoch,
|
||||
wall_end_epoch=wall_end_epoch,
|
||||
),
|
||||
)
|
||||
@@ -10,6 +10,8 @@ from app.schemas import (
|
||||
SurgeryPendingConfirmationResponse,
|
||||
)
|
||||
from app.services.video.session_manager import CameraSessionManager
|
||||
from fastapi.concurrency import run_in_threadpool
|
||||
|
||||
from app.services.voice_resolution import VoiceConfirmationService, VoiceResolveResult
|
||||
from app.surgery_errors import SurgeryPipelineError
|
||||
|
||||
@@ -79,6 +81,18 @@ class SurgeryPipeline:
|
||||
def voice_status(self, surgery_id: str) -> dict[str, object] | None:
|
||||
return self._sessions.voice_status(surgery_id)
|
||||
|
||||
async def list_voice_audits(
|
||||
self,
|
||||
surgery_id: str,
|
||||
*,
|
||||
limit: int = 50,
|
||||
offset: int = 0,
|
||||
):
|
||||
"""持久化表 `voice_confirmation_audits` 分页,用于追溯/对账/报表。"""
|
||||
return await self._voice.list_voice_audits_for_surgery(
|
||||
surgery_id, limit=limit, offset=offset
|
||||
)
|
||||
|
||||
def get_pending_confirmation_for_client(
|
||||
self, surgery_id: str
|
||||
) -> SurgeryPendingConfirmationResponse | None:
|
||||
@@ -114,3 +128,35 @@ class SurgeryPipeline:
|
||||
filename=filename,
|
||||
content_type=content_type,
|
||||
)
|
||||
|
||||
async def resolve_pending_confirmation_from_client_text(
|
||||
self,
|
||||
surgery_id: str,
|
||||
confirmation_id: str,
|
||||
recognized_text: str,
|
||||
) -> VoiceResolveResult:
|
||||
"""浏览器等客户端本机识别后的文本,解析规则与 WAV 路径一致(无需 MinIO/百度)。"""
|
||||
return await self._voice.resolve_from_recognized_text(
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
recognized_text=recognized_text,
|
||||
)
|
||||
|
||||
async def get_pending_prompt_audio_mp3(
|
||||
self,
|
||||
surgery_id: str,
|
||||
confirmation_id: str,
|
||||
) -> bytes:
|
||||
"""待确认 `prompt_text` 的百度 TTS MP3,供模拟客户端用 Audio 直放。"""
|
||||
pending = self._sessions.get_pending_confirmation_by_id(
|
||||
surgery_id, confirmation_id
|
||||
)
|
||||
if pending is None or pending.status != "pending":
|
||||
raise SurgeryPipelineError(
|
||||
"CONFIRMATION_NOT_FOUND",
|
||||
"未找到该待确认项或已处理。",
|
||||
)
|
||||
return await run_in_threadpool(
|
||||
self._voice.synthesize_prompt_to_mp3,
|
||||
pending.prompt_text,
|
||||
)
|
||||
|
||||
242
app/services/synthetic_rtsp.py
Normal file
242
app/services/synthetic_rtsp.py
Normal file
@@ -0,0 +1,242 @@
|
||||
"""Start/stop local fake RTSP streams (MediaMTX + ffmpeg) for dev orchestration."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import socket
|
||||
import subprocess
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import ClassVar
|
||||
|
||||
from loguru import logger
|
||||
|
||||
MEDIAMTX_IMAGE = os.environ.get("MEDIAMTX_DOCKER_IMAGE", "bluenviron/mediamtx:latest")
|
||||
CONTAINER_NAME_PREFIX = "orm-fake-rtsp-"
|
||||
# 等待 127.0.0.1:host_port 可连接(避免开录时 Connection refused)
|
||||
_MEDIAMTX_TCP_READY_SEC = float(os.environ.get("MEDIAMTX_TCP_READY_SEC", "30"))
|
||||
|
||||
|
||||
def _wait_tcp_listening(host: str, port: int, *, total_timeout: float) -> None:
|
||||
"""Block until something accepts TCP on host:port (MediaMTX 映射口就绪)."""
|
||||
deadline = time.monotonic() + max(1.0, total_timeout)
|
||||
last: OSError | None = None
|
||||
while time.monotonic() < deadline:
|
||||
try:
|
||||
with socket.create_connection((host, port), timeout=1.5):
|
||||
logger.info("RTSP port ready {}:{}", host, port)
|
||||
return
|
||||
except OSError as exc:
|
||||
last = exc
|
||||
time.sleep(0.2)
|
||||
hint = " MediaMTX 未监听:检查 docker 是否起成功、18554 是否被占用(orm-fake-rtsp-*) 已 docker ps。"
|
||||
if last is not None:
|
||||
raise RuntimeError(
|
||||
f"等待 {host}:{port} 可连接超时({total_timeout:g}s): {last}{hint}"
|
||||
) from last
|
||||
raise RuntimeError(
|
||||
f"等待 {host}:{port} 可连接超时({total_timeout:g}s).{hint}"
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class StreamSpec:
|
||||
camera_id: str
|
||||
file_path: Path
|
||||
rtsp_path: str # last segment, e.g. demo1
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
self.rtsp_path = (self.rtsp_path or "demo").strip().strip("/") or "demo"
|
||||
|
||||
|
||||
@dataclass
|
||||
class SyntheticRtspRun:
|
||||
"""Holds Popen handles and docker container for one multi-stream session."""
|
||||
|
||||
container_name: str
|
||||
procs: list[subprocess.Popen] = field(default_factory=list)
|
||||
work_dir: Path | None = None # temp dir for uploaded video files; removed on stop
|
||||
|
||||
def stop(self) -> None:
|
||||
for p in self.procs:
|
||||
if p.poll() is None:
|
||||
p.terminate()
|
||||
try:
|
||||
p.wait(timeout=5.0)
|
||||
except subprocess.TimeoutExpired:
|
||||
p.kill()
|
||||
self.procs.clear()
|
||||
if self.work_dir is not None and self.work_dir.is_dir():
|
||||
try:
|
||||
shutil.rmtree(self.work_dir, ignore_errors=True)
|
||||
except OSError as exc:
|
||||
logger.debug("rmtree work_dir: {}", exc)
|
||||
self.work_dir = None
|
||||
if shutil.which("docker") is not None:
|
||||
try:
|
||||
subprocess.run(
|
||||
["docker", "rm", "-f", self.container_name],
|
||||
capture_output=True,
|
||||
timeout=30,
|
||||
)
|
||||
except (OSError, subprocess.SubprocessError) as exc:
|
||||
logger.debug("docker rm: {}", exc)
|
||||
self.work_dir = None
|
||||
|
||||
|
||||
class SyntheticRtspManager:
|
||||
_instance: ClassVar[SyntheticRtspManager | None] = None
|
||||
_active: ClassVar[SyntheticRtspRun | None] = None
|
||||
|
||||
@classmethod
|
||||
def get(cls) -> SyntheticRtspManager:
|
||||
if cls._instance is None:
|
||||
cls._instance = cls()
|
||||
return cls._instance
|
||||
|
||||
@classmethod
|
||||
def active_run(cls) -> SyntheticRtspRun | None:
|
||||
return cls._active
|
||||
|
||||
@classmethod
|
||||
def _cleanup_prefixed_containers(cls) -> None:
|
||||
"""Remove stale MediaMTX containers left by earlier runs/reloads."""
|
||||
if shutil.which("docker") is None:
|
||||
return
|
||||
try:
|
||||
listed = subprocess.run(
|
||||
[
|
||||
"docker",
|
||||
"ps",
|
||||
"-aq",
|
||||
"--filter",
|
||||
f"name={CONTAINER_NAME_PREFIX}",
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
check=False,
|
||||
)
|
||||
except (OSError, subprocess.SubprocessError) as exc:
|
||||
logger.debug("docker ps stale cleanup: {}", exc)
|
||||
return
|
||||
ids = [x.strip() for x in (listed.stdout or "").splitlines() if x.strip()]
|
||||
if not ids:
|
||||
return
|
||||
try:
|
||||
subprocess.run(
|
||||
["docker", "rm", "-f", *ids],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60,
|
||||
check=False,
|
||||
)
|
||||
logger.info("Removed stale fake RTSP containers: {}", ids)
|
||||
except (OSError, subprocess.SubprocessError) as exc:
|
||||
logger.debug("docker rm stale cleanup: {}", exc)
|
||||
|
||||
@classmethod
|
||||
def stop_active(cls) -> None:
|
||||
if cls._active is not None:
|
||||
cls._active.stop()
|
||||
cls._active = None
|
||||
cls._cleanup_prefixed_containers()
|
||||
|
||||
def start(
|
||||
self,
|
||||
streams: list[StreamSpec],
|
||||
*,
|
||||
host_port: int,
|
||||
work_dir: Path,
|
||||
) -> tuple[SyntheticRtspRun, dict[str, str]]:
|
||||
"""Start MediaMTX and one ffmpeg per stream. Returns (run, url_by_camera)."""
|
||||
if not streams:
|
||||
raise ValueError("no streams")
|
||||
if not shutil.which("ffmpeg"):
|
||||
raise RuntimeError("ffmpeg not in PATH")
|
||||
if not shutil.which("docker"):
|
||||
raise RuntimeError("docker not in PATH (required to run MediaMTX)")
|
||||
|
||||
self.stop_active()
|
||||
|
||||
for s in streams:
|
||||
if not s.file_path.is_file():
|
||||
raise FileNotFoundError(str(s.file_path))
|
||||
for ch in s.rtsp_path:
|
||||
if ch not in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_.-":
|
||||
raise ValueError(f"invalid RTSP path segment: {s.rtsp_path!r}")
|
||||
|
||||
container = CONTAINER_NAME_PREFIX + uuid.uuid4().hex[:12]
|
||||
cmd = [
|
||||
"docker", "run", "-d", "--name", container,
|
||||
"-p", f"127.0.0.1:{host_port}:8554",
|
||||
MEDIAMTX_IMAGE,
|
||||
]
|
||||
r = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
|
||||
if r.returncode != 0:
|
||||
try:
|
||||
subprocess.run(
|
||||
["docker", "rm", "-f", container],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
check=False,
|
||||
)
|
||||
except (OSError, subprocess.SubprocessError) as exc:
|
||||
logger.debug("docker rm failed container cleanup: {}", exc)
|
||||
err = (r.stderr or r.stdout or "").strip()
|
||||
raise RuntimeError(f"MediaMTX docker failed: {err}")
|
||||
|
||||
run = SyntheticRtspRun(container_name=container)
|
||||
url_map: dict[str, str] = {}
|
||||
time.sleep(0.5)
|
||||
_wait_tcp_listening("127.0.0.1", host_port, total_timeout=_MEDIAMTX_TCP_READY_SEC)
|
||||
|
||||
run.work_dir = work_dir
|
||||
try:
|
||||
for s in streams:
|
||||
dest = f"rtsp://127.0.0.1:{host_port}/{s.rtsp_path}"
|
||||
url_map[s.camera_id] = dest
|
||||
pub = [
|
||||
"ffmpeg", "-hide_banner", "-loglevel", "warning",
|
||||
"-re", "-stream_loop", "-1",
|
||||
"-i", str(s.file_path),
|
||||
"-c", "copy", "-f", "rtsp", "-rtsp_transport", "tcp", dest,
|
||||
]
|
||||
p = subprocess.Popen(pub) # noqa: S603
|
||||
run.procs.append(p)
|
||||
except Exception:
|
||||
run.stop()
|
||||
raise
|
||||
|
||||
# 给 ffmpeg 一点时间连上 MediaMTX,减少首帧前 OpenCV 连上却 DESCRIBE 失败
|
||||
time.sleep(0.4)
|
||||
|
||||
self._active = run
|
||||
return run, url_map
|
||||
|
||||
|
||||
def write_rtsp_url_json_file(
|
||||
path: Path,
|
||||
url_map: dict[str, str],
|
||||
*,
|
||||
replace_host: str,
|
||||
) -> None:
|
||||
"""Write JSON map; replace 127.0.0.1 in values with `replace_host` (e.g. host.docker.internal)."""
|
||||
if replace_host in ("", "127.0.0.1"):
|
||||
out = url_map
|
||||
else:
|
||||
out = {
|
||||
k: v.replace("127.0.0.1", replace_host, 1)
|
||||
for k, v in url_map.items()
|
||||
}
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
text = json.dumps(out, ensure_ascii=False, indent=2, sort_keys=True) + "\n"
|
||||
temp = path.with_name(path.name + ".tmp")
|
||||
temp.write_text(text, encoding="utf-8")
|
||||
temp.replace(path)
|
||||
logger.info("Wrote RTSP map to {}", path)
|
||||
@@ -21,7 +21,6 @@ class BackendResolver:
|
||||
) -> None:
|
||||
self._s = settings
|
||||
self._hik = hikvision_runtime
|
||||
self._rtsp_urls_map = settings.video_rtsp_url_map()
|
||||
|
||||
def _parse_json_object(self, raw: str) -> dict[str, Any]:
|
||||
raw = (raw or "").strip()
|
||||
@@ -55,8 +54,10 @@ class BackendResolver:
|
||||
return VideoBackendKind.RTSP
|
||||
|
||||
def rtsp_url_for_camera(self, camera_id: str) -> str:
|
||||
if camera_id in self._rtsp_urls_map:
|
||||
return self._rtsp_urls_map[camera_id]
|
||||
# Re-read on each use so VIDEO_RTSP_URLS_JSON_FILE can be hot-updated (e.g. dev orchestrator).
|
||||
m = self._s.video_rtsp_url_map()
|
||||
if camera_id in m:
|
||||
return m[camera_id]
|
||||
tpl = (self._s.video_rtsp_url_template or "").strip()
|
||||
if tpl:
|
||||
try:
|
||||
|
||||
@@ -26,6 +26,8 @@ from app.services.video.backend_resolver import BackendResolver
|
||||
from app.services.video.hikvision_runtime import HikvisionInitRefCount, HikvisionRuntime
|
||||
from app.services.video.rtsp_capture import RtspCapture
|
||||
from app.services.video.types import VideoBackendKind
|
||||
from app.services.consumption_tsv_log import append_consumption_window, init_consumption_log_file
|
||||
from app.services.voice_file_log import init_voice_log_file
|
||||
from app.services.voice_confirm import build_prompt_text
|
||||
from app.surgery_errors import SurgeryPipelineError
|
||||
|
||||
@@ -41,6 +43,8 @@ class PendingConsumableConfirmation:
|
||||
created_at: datetime
|
||||
model_top1_label: str
|
||||
model_top1_confidence: float
|
||||
#: 本轮待确认在解析失败时累计次数(首败 + 重试),供 API 计算 retry_remaining。
|
||||
voice_parse_failures: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -49,6 +53,8 @@ class CameraStreamInferState:
|
||||
|
||||
votes: list[tuple[float, str, ClsTop3]] = field(default_factory=list)
|
||||
stream_t0: float | None = None
|
||||
#: 与 `stream_t0` 同一次初始化时的 `time.time()`,与 monotonic 流逝秒相加得到墙钟时间戳
|
||||
stream_wall_start: float | None = None
|
||||
next_bucket: int = 0
|
||||
|
||||
|
||||
@@ -258,6 +264,8 @@ class CameraSessionManager:
|
||||
)
|
||||
|
||||
run = RunningSurgery(stop_event=stop_event, state=state, tasks=tasks)
|
||||
init_consumption_log_file(surgery_id)
|
||||
init_voice_log_file(surgery_id, self._s)
|
||||
async with self._manager_lock:
|
||||
self._active[surgery_id] = run
|
||||
|
||||
@@ -408,6 +416,22 @@ class CameraSessionManager:
|
||||
return []
|
||||
return list(self._active[surgery_id].state.candidate_consumables)
|
||||
|
||||
async def record_voice_parse_failure(
|
||||
self, surgery_id: str, confirmation_id: str
|
||||
) -> tuple[int, int]:
|
||||
"""解析失败时累加计数,返回 (当前失败次数, 距上限还剩几次「重试机会」)。"""
|
||||
if surgery_id not in self._active:
|
||||
return 0, 0
|
||||
st = self._active[surgery_id].state
|
||||
max_r = int(self._s.voice_confirm_max_failed_parse_rounds)
|
||||
async with st.lock:
|
||||
p = st.pending_by_id.get(confirmation_id)
|
||||
if p is None or p.status != "pending":
|
||||
return 0, 0
|
||||
p.voice_parse_failures += 1
|
||||
remaining = max(0, max_r - p.voice_parse_failures)
|
||||
return p.voice_parse_failures, remaining
|
||||
|
||||
def next_pending_confirmation(
|
||||
self, surgery_id: str
|
||||
) -> PendingConsumableConfirmation | None:
|
||||
@@ -622,6 +646,19 @@ class CameraSessionManager:
|
||||
if snap is None:
|
||||
continue
|
||||
|
||||
if self._s.video_log_inference_results:
|
||||
logger.info(
|
||||
"Vision result surgery={} camera={} top1={}({:.3f}) top2={}({:.3f}) top3={}({:.3f})",
|
||||
surgery_id,
|
||||
camera_id,
|
||||
snap.t1_name,
|
||||
snap.t1_conf,
|
||||
snap.t2_name,
|
||||
snap.t2_conf,
|
||||
snap.t3_name,
|
||||
snap.t3_conf,
|
||||
)
|
||||
|
||||
wsec = self._s.consumable_vision_window_sec
|
||||
pending_preds: list[PredictionResult] = []
|
||||
async with state.lock:
|
||||
@@ -630,6 +667,7 @@ class CameraSessionManager:
|
||||
)
|
||||
if cis.stream_t0 is None:
|
||||
cis.stream_t0 = time.monotonic()
|
||||
cis.stream_wall_start = time.time()
|
||||
t_rel = time.monotonic() - cis.stream_t0
|
||||
cis.votes.append((t_rel, snap.t1_name, snap))
|
||||
current_b = int(t_rel // wsec)
|
||||
@@ -648,7 +686,19 @@ class CameraSessionManager:
|
||||
if not bucket_pts:
|
||||
continue
|
||||
best = window_bucket_to_best_snap(bucket_pts)
|
||||
if best is not None:
|
||||
if best is not None and cis.stream_wall_start is not None:
|
||||
if self._s.consumption_tsv_log_enabled or self._s.consumption_log_markdown_terminal:
|
||||
wall_lo = cis.stream_wall_start + lo
|
||||
wall_hi = cis.stream_wall_start + hi
|
||||
append_consumption_window(
|
||||
surgery_id=surgery_id,
|
||||
name_to_code=state.name_to_code,
|
||||
best=best,
|
||||
doctor_id=self._s.video_result_doctor_id,
|
||||
camera_id=camera_id,
|
||||
wall_start_epoch=wall_lo,
|
||||
wall_end_epoch=wall_hi,
|
||||
)
|
||||
pending_preds.append(
|
||||
cls_top3_to_prediction_result(best)
|
||||
)
|
||||
|
||||
@@ -32,12 +32,100 @@ _CN_DIGITS = {
|
||||
}
|
||||
|
||||
|
||||
def _parse_ordinal_index_1based(token: str) -> int | None:
|
||||
"""将「1」「3」「一」「三」「十一」等解析为 1-based 序数,失败返回 None。"""
|
||||
t = (token or "").strip()
|
||||
if not t:
|
||||
return None
|
||||
if t.isdigit():
|
||||
v = int(t)
|
||||
return v if 1 <= v <= 99 else None
|
||||
if t in _CN_DIGITS and t != "零" and t != "十":
|
||||
return int(_CN_DIGITS[t])
|
||||
if t == "十":
|
||||
return 10
|
||||
if len(t) == 2 and t[0] == "十" and t[1] in _CN_DIGITS and t[1] not in ("零", "十"):
|
||||
return 10 + int(_CN_DIGITS[t[1]])
|
||||
if len(t) == 2 and t[1] == "十" and t[0] in _CN_DIGITS and t[0] != "零":
|
||||
return int(_CN_DIGITS[t[0]]) * 10
|
||||
if len(t) == 3 and t[0] in _CN_DIGITS and t[1] == "十" and t[2] in _CN_DIGITS:
|
||||
return int(_CN_DIGITS[t[0]]) * 10 + int(_CN_DIGITS[t[2]])
|
||||
return None
|
||||
|
||||
|
||||
def _label_from_ordinal_1based(n1: int, options: list[str]) -> str | None:
|
||||
if n1 < 1:
|
||||
return None
|
||||
idx = n1 - 1
|
||||
if 0 <= idx < len(options):
|
||||
return options[idx]
|
||||
return None
|
||||
|
||||
|
||||
def _choose_from_ordinal_text(raw: str, options: list[str]) -> str | None:
|
||||
"""从「第一个」「第2个」「选3」「1号」等表述解析选项。返回 None 表示本函数未识别。"""
|
||||
n_opt = len(options)
|
||||
if n_opt < 1:
|
||||
return None
|
||||
|
||||
# 1) 显式「第N个/项/款/…」,允许夹带后噪声,如「第一个对」
|
||||
for m in re.finditer(
|
||||
r"第([0-9]+|[一二两三四五六七八九十百]+)(?:个|项|款|的|种|名)?", raw
|
||||
):
|
||||
n1 = _parse_ordinal_index_1based(m.group(1))
|
||||
if n1 is not None:
|
||||
ch = _label_from_ordinal_1based(n1, options)
|
||||
if ch is not None:
|
||||
return ch
|
||||
m_pick = re.search(
|
||||
r"(?:^|[\s,,;;::])(?:选|要|就)\s*0*([1-9]\d?)(?:\s*号|个|项|款)?",
|
||||
raw,
|
||||
)
|
||||
if m_pick:
|
||||
n1 = int(m_pick.group(1))
|
||||
ch = _label_from_ordinal_1based(n1, options)
|
||||
if ch is not None:
|
||||
return ch
|
||||
norm_for_opt = raw.replace(" ", "").lower()
|
||||
m_op = re.search(r"(?:option|选项)\s*[::]?\s*(\d+)", norm_for_opt, re.IGNORECASE)
|
||||
if m_op:
|
||||
n1 = int(m_op.group(1))
|
||||
ch = _label_from_ordinal_1based(n1, options)
|
||||
if ch is not None:
|
||||
return ch
|
||||
|
||||
# 2) 行首/句末「一」「二」单字,仅当候选项数较少时
|
||||
s = raw.replace(" ", "")
|
||||
if n_opt <= 3:
|
||||
m_one = re.match(r"^([一二两三四])$", s)
|
||||
if m_one:
|
||||
tok = m_one.group(1)
|
||||
if tok in _CN_DIGITS and tok not in ("零", "十"):
|
||||
n1 = int(_CN_DIGITS[tok])
|
||||
ch = _label_from_ordinal_1based(n1, options)
|
||||
if ch is not None:
|
||||
return ch
|
||||
m_tail = re.search(r"([0-9一二两三四五六七八九十]+)\s*号$", s)
|
||||
if m_tail:
|
||||
n1 = _parse_ordinal_index_1based(m_tail.group(1))
|
||||
if n1 is not None:
|
||||
ch = _label_from_ordinal_1based(n1, options)
|
||||
if ch is not None:
|
||||
return ch
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def parse_voice_choice(asr_text: str, options: list[str]) -> str | None:
|
||||
"""
|
||||
从识别文本中解析医生选择的耗材名称。
|
||||
支持:完全匹配、子串匹配、第 N 个(1/一/第一个)。
|
||||
"""
|
||||
raw = (asr_text or "").strip()
|
||||
raw = re.sub(
|
||||
r"^[。,、;:!?\s]+|[。,、;:!?\s]+$",
|
||||
"",
|
||||
(asr_text or "").strip(),
|
||||
)
|
||||
if not raw:
|
||||
return None
|
||||
normalized = raw.replace(" ", "").lower()
|
||||
@@ -46,6 +134,10 @@ def parse_voice_choice(asr_text: str, options: list[str]) -> str | None:
|
||||
if opt and opt in raw:
|
||||
return opt
|
||||
|
||||
chosen_ord = _choose_from_ordinal_text(raw, options)
|
||||
if chosen_ord is not None:
|
||||
return chosen_ord
|
||||
|
||||
m_num = re.search(r"(\d+)", raw)
|
||||
if m_num:
|
||||
idx = int(m_num.group(1)) - 1
|
||||
@@ -55,14 +147,11 @@ def parse_voice_choice(asr_text: str, options: list[str]) -> str | None:
|
||||
m_cn = re.search(r"第([一二两三四五六七八九十\d]+)个", raw)
|
||||
if m_cn:
|
||||
token = m_cn.group(1)
|
||||
if token.isdigit():
|
||||
idx = int(token) - 1
|
||||
elif token in _CN_DIGITS:
|
||||
idx = _CN_DIGITS[token] - 1
|
||||
else:
|
||||
idx = -1
|
||||
if 0 <= idx < len(options):
|
||||
return options[idx]
|
||||
n1 = int(token) if token.isdigit() else _parse_ordinal_index_1based(token)
|
||||
if n1 is not None:
|
||||
ch = _label_from_ordinal_1based(n1, options)
|
||||
if ch is not None:
|
||||
return ch
|
||||
|
||||
for i, opt in enumerate(options):
|
||||
if not opt:
|
||||
@@ -107,13 +196,9 @@ def is_rejection_phrase(asr_text: str) -> bool:
|
||||
|
||||
|
||||
def build_prompt_text(options: list[tuple[str, float]]) -> str:
|
||||
parts = [
|
||||
"请确认刚才使用的耗材是下面哪一项,可以说序号或名称;"
|
||||
"若是清单内其它耗材,也可以直接说该耗材名称。"
|
||||
]
|
||||
parts = ["请确认刚才使用的耗材是下面哪一项。"]
|
||||
for i, (name, _conf) in enumerate(options, start=1):
|
||||
parts.append(f"第{i}个,{name}。")
|
||||
parts.append("若都不是请说不是。")
|
||||
return "".join(parts)
|
||||
|
||||
|
||||
@@ -228,6 +313,32 @@ class VoiceConfirmationOrchestrator:
|
||||
tmp.close()
|
||||
return path, None
|
||||
|
||||
async def speak_prompt(self, text: str) -> None:
|
||||
"""仅百度 TTS + ffplay 播报,不录音。供待确认入队时提示手术室。"""
|
||||
if not (text or "").strip():
|
||||
return
|
||||
if not self._s.voice_tts_on_pending_enqueued:
|
||||
return
|
||||
if not self._s.voice_confirmation_enabled:
|
||||
return
|
||||
if not self._baidu.configured:
|
||||
logger.debug("speak_prompt skipped: baidu_speech not configured")
|
||||
return
|
||||
async with self._lock:
|
||||
mp3_path, err = await run_in_threadpool(self._synthesize_to_temp_mp3, text)
|
||||
if err or not mp3_path:
|
||||
logger.warning("TTS synthesis failed: {}", err)
|
||||
return
|
||||
try:
|
||||
play_err = await run_in_threadpool(self._play_mp3_file, mp3_path)
|
||||
if play_err:
|
||||
logger.warning("TTS play failed: {}", play_err)
|
||||
finally:
|
||||
try:
|
||||
os.unlink(mp3_path)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
async def run_confirmation(
|
||||
self,
|
||||
*,
|
||||
|
||||
167
app/services/voice_file_log.py
Normal file
167
app/services/voice_file_log.py
Normal file
@@ -0,0 +1,167 @@
|
||||
"""语音确认(ASR/解析/审计)的终端 loguru 行 + 每手术 TSV 落盘,与 `consumption_tsv_log` 并列。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import threading
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from app.config import Settings
|
||||
|
||||
_lock = threading.Lock()
|
||||
|
||||
HEADER = (
|
||||
"时间戳(ISO,UTC)\t来源\t状态\tconfirmation_id\tasr/识别文本\t"
|
||||
"resolved_label\trejected\terror\taudio_object_key\n"
|
||||
)
|
||||
|
||||
|
||||
def _ts_iso_utc() -> str:
|
||||
return datetime.now(timezone.utc).isoformat(timespec="milliseconds")
|
||||
|
||||
|
||||
def _encode_cell(value: str) -> str:
|
||||
return (value or "").replace("\r", " ").replace("\n", " ").replace("\t", " ")
|
||||
|
||||
|
||||
def _log_tz_info(settings: Settings) -> object:
|
||||
raw = (settings.consumption_log_timezone or "").strip()
|
||||
if not raw:
|
||||
lt = datetime.now().astimezone().tzinfo
|
||||
return lt if lt is not None else timezone.utc
|
||||
try:
|
||||
return ZoneInfo(raw)
|
||||
except ZoneInfoNotFoundError:
|
||||
return timezone.utc
|
||||
|
||||
|
||||
def _ts_local_for_display(settings: Settings) -> str:
|
||||
tz = _log_tz_info(settings)
|
||||
return datetime.now(tz).isoformat(timespec="milliseconds")
|
||||
|
||||
|
||||
def _safe_surgery_path_segment(surgery_id: str) -> str:
|
||||
s = (surgery_id or "unknown").strip() or "unknown"
|
||||
s = re.sub(r"[^\w\-.@]", "_", s)
|
||||
return s[:200] if len(s) > 200 else s
|
||||
|
||||
|
||||
def resolved_voice_log_path(surgery_id: str, settings: Settings) -> Path:
|
||||
raw = (settings.voice_file_log_path or "logs/voice_{surgery_id}.txt").strip()
|
||||
safe = _safe_surgery_path_segment(surgery_id)
|
||||
if "{surgery_id}" in raw:
|
||||
raw = raw.replace("{surgery_id}", safe)
|
||||
else:
|
||||
p0 = Path(raw)
|
||||
if p0.suffix:
|
||||
raw = str(p0.with_name(f"{p0.stem}_{safe}{p0.suffix}"))
|
||||
else:
|
||||
raw = f"{raw.rstrip('/')}_{safe}.txt"
|
||||
p = Path(raw).expanduser()
|
||||
if not p.is_absolute():
|
||||
p = Path.cwd() / p
|
||||
return p
|
||||
|
||||
|
||||
def init_voice_log_file(surgery_id: str, settings: Settings) -> None:
|
||||
"""与 `init_consumption_log_file` 同生命周期:`start_surgery` 时截断并写表头。"""
|
||||
if not settings.voice_file_log_enabled:
|
||||
return
|
||||
path = resolved_voice_log_path(surgery_id, settings)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with _lock:
|
||||
with path.open("w", encoding="utf-8") as f:
|
||||
f.write(HEADER)
|
||||
|
||||
|
||||
def append_voice_tsv_line(surgery_id: str, line: str, settings: Settings) -> None:
|
||||
if not settings.voice_file_log_enabled:
|
||||
return
|
||||
path = resolved_voice_log_path(surgery_id, settings)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with _lock:
|
||||
with path.open("a", encoding="utf-8") as f:
|
||||
f.write(line)
|
||||
|
||||
|
||||
def emit_voice_event(
|
||||
settings: Settings,
|
||||
*,
|
||||
surgery_id: str,
|
||||
source: str,
|
||||
status: str,
|
||||
confirmation_id: str,
|
||||
asr_text: str | None = None,
|
||||
resolved_label: str | None = None,
|
||||
rejected: str | bool | None = None,
|
||||
error_message: str | None = None,
|
||||
audio_object_key: str | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
终端:单条可 grep 的 VoiceConfirm 行;文件:TSV 一行(与启用的 `voice_file_log_enabled` 一致)。
|
||||
|
||||
:param source: `wav` | `text` | `n/a`
|
||||
:param status: 与审计 `status` 或 `minio_not_configured` 等说明型状态一致
|
||||
"""
|
||||
rj: str
|
||||
if rejected is None:
|
||||
rj = ""
|
||||
elif isinstance(rejected, bool):
|
||||
rj = "true" if rejected else "false"
|
||||
else:
|
||||
rj = str(rejected)
|
||||
|
||||
ts_utc = _ts_iso_utc()
|
||||
local_hint = _ts_local_for_display(settings)
|
||||
if status in ("recognized", "rejected"):
|
||||
logger.info(
|
||||
"VoiceConfirm local_ts={!r} surgery_id={} source={} status={} "
|
||||
"confirmation_id={} asr_text={!r} resolved_label={!r} rejected={} "
|
||||
"error={!r} audio_key={!r}",
|
||||
local_hint,
|
||||
surgery_id,
|
||||
source,
|
||||
status,
|
||||
confirmation_id,
|
||||
asr_text,
|
||||
resolved_label,
|
||||
rj,
|
||||
error_message,
|
||||
audio_object_key,
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"VoiceConfirm local_ts={!r} surgery_id={} source={} status={} "
|
||||
"confirmation_id={} asr_text={!r} resolved_label={!r} rejected={} "
|
||||
"error={!r} audio_key={!r}",
|
||||
local_hint,
|
||||
surgery_id,
|
||||
source,
|
||||
status,
|
||||
confirmation_id,
|
||||
asr_text,
|
||||
resolved_label,
|
||||
rj,
|
||||
error_message,
|
||||
audio_object_key,
|
||||
)
|
||||
|
||||
if not settings.voice_file_log_enabled:
|
||||
return
|
||||
row = [
|
||||
_encode_cell(ts_utc),
|
||||
_encode_cell(source),
|
||||
_encode_cell(status),
|
||||
_encode_cell(confirmation_id),
|
||||
_encode_cell("" if asr_text is None else asr_text),
|
||||
_encode_cell("" if resolved_label is None else resolved_label),
|
||||
_encode_cell(rj),
|
||||
_encode_cell("" if error_message is None else error_message),
|
||||
_encode_cell("" if audio_object_key is None else audio_object_key),
|
||||
]
|
||||
line = "\t".join(row) + "\n"
|
||||
append_voice_tsv_line(surgery_id, line, settings)
|
||||
@@ -9,7 +9,9 @@ from fastapi.concurrency import run_in_threadpool
|
||||
from loguru import logger
|
||||
|
||||
from app.config import Settings
|
||||
from app.services.voice_file_log import emit_voice_event
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.db.models import VoiceConfirmationAudit
|
||||
from app.repositories.voice_audits import VoiceAuditRepository
|
||||
from app.services.audio_wav import WavDecodeError, wav_bytes_to_pcm16k_mono_s16le
|
||||
from app.services.baidu_speech import BaiduSpeechNotConfiguredError, BaiduSpeechService
|
||||
@@ -49,6 +51,50 @@ class VoiceConfirmationService:
|
||||
self._minio = minio
|
||||
self._audits = audits
|
||||
|
||||
def _emit_voice_trace(
|
||||
self,
|
||||
*,
|
||||
source: str,
|
||||
status: str,
|
||||
surgery_id: str,
|
||||
confirmation_id: str,
|
||||
asr_text: str | None = None,
|
||||
resolved_label: str | None = None,
|
||||
rejected: bool | str | None = None,
|
||||
error_message: str | None = None,
|
||||
audio_object_key: str | None = None,
|
||||
) -> None:
|
||||
emit_voice_event(
|
||||
self._s,
|
||||
surgery_id=surgery_id,
|
||||
source=source,
|
||||
status=status,
|
||||
confirmation_id=confirmation_id,
|
||||
asr_text=asr_text,
|
||||
resolved_label=resolved_label,
|
||||
rejected=rejected,
|
||||
error_message=error_message,
|
||||
audio_object_key=audio_object_key,
|
||||
)
|
||||
|
||||
def synthesize_prompt_to_mp3(self, text: str) -> bytes:
|
||||
"""百度在线语音合成,供浏览器直接播放,与 `voice_confirm._synthesize_to_temp_mp3` 同参。"""
|
||||
t = (text or "").strip()
|
||||
if not t:
|
||||
raise SurgeryPipelineError("TTS_TEXT_EMPTY", "提示文本为空。")
|
||||
try:
|
||||
r = self._baidu.synthesis(
|
||||
t, "zh", 1, {"spd": 5, "pit": 5, "vol": 9, "per": 0}
|
||||
)
|
||||
except BaiduSpeechNotConfiguredError as exc:
|
||||
raise SurgeryPipelineError(
|
||||
"BAIDU_NOT_CONFIGURED",
|
||||
"服务端未配置百度语音,无法合成播报音频。",
|
||||
) from exc
|
||||
if isinstance(r, dict):
|
||||
raise SurgeryPipelineError("TTS_ERROR", f"百度 TTS 失败: {r!r}")
|
||||
return r
|
||||
|
||||
async def resolve_from_wav(
|
||||
self,
|
||||
*,
|
||||
@@ -74,18 +120,39 @@ class VoiceConfirmationService:
|
||||
options_snapshot_json=None,
|
||||
error_message="音频超过大小限制",
|
||||
)
|
||||
self._emit_voice_trace(
|
||||
source="wav",
|
||||
status="invalid_audio",
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
error_message="音频超过大小限制",
|
||||
)
|
||||
raise SurgeryPipelineError(
|
||||
"VOICE_AUDIO_INVALID",
|
||||
f"音频大小超过限制(最大 {self._s.voice_upload_max_bytes} 字节)。",
|
||||
)
|
||||
|
||||
if not self._minio.configured:
|
||||
self._emit_voice_trace(
|
||||
source="wav",
|
||||
status="minio_not_configured",
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
error_message="服务端未配置 MinIO,无法保存语音追溯文件。",
|
||||
)
|
||||
raise SurgeryPipelineError(
|
||||
"MINIO_NOT_CONFIGURED",
|
||||
"服务端未配置 MinIO,无法保存语音追溯文件。",
|
||||
)
|
||||
|
||||
if not self._baidu.configured:
|
||||
self._emit_voice_trace(
|
||||
source="wav",
|
||||
status="baidu_not_configured",
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
error_message="服务端未配置百度语音,无法进行语音识别。",
|
||||
)
|
||||
raise SurgeryPipelineError(
|
||||
"BAIDU_NOT_CONFIGURED",
|
||||
"服务端未配置百度语音,无法进行语音识别。",
|
||||
@@ -95,6 +162,13 @@ class VoiceConfirmationService:
|
||||
surgery_id, confirmation_id
|
||||
)
|
||||
if pending is None:
|
||||
self._emit_voice_trace(
|
||||
source="wav",
|
||||
status="confirmation_not_found",
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
error_message="未找到该待确认项或已处理。",
|
||||
)
|
||||
raise SurgeryPipelineError(
|
||||
"CONFIRMATION_NOT_FOUND",
|
||||
"未找到该待确认项或已处理。",
|
||||
@@ -133,6 +207,13 @@ class VoiceConfirmationService:
|
||||
error_message=str(exc),
|
||||
)
|
||||
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=str(exc))
|
||||
self._emit_voice_trace(
|
||||
source="wav",
|
||||
status="upload_failed",
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
error_message=str(exc),
|
||||
)
|
||||
raise SurgeryPipelineError(
|
||||
"MINIO_UPLOAD_FAILED",
|
||||
f"语音文件上传失败:{exc}",
|
||||
@@ -155,6 +236,14 @@ class VoiceConfirmationService:
|
||||
error_message=str(exc),
|
||||
)
|
||||
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=str(exc))
|
||||
self._emit_voice_trace(
|
||||
source="wav",
|
||||
status="invalid_audio",
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
error_message=str(exc),
|
||||
audio_object_key=stored.object_key,
|
||||
)
|
||||
raise SurgeryPipelineError(
|
||||
"VOICE_AUDIO_INVALID",
|
||||
f"无法解析 WAV 音频:{exc}",
|
||||
@@ -165,6 +254,14 @@ class VoiceConfirmationService:
|
||||
self._baidu.asr, pcm, "pcm", 16000, None
|
||||
)
|
||||
except BaiduSpeechNotConfiguredError as exc:
|
||||
self._emit_voice_trace(
|
||||
source="wav",
|
||||
status="baidu_not_configured",
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
error_message=str(exc),
|
||||
audio_object_key=stored.object_key,
|
||||
)
|
||||
raise SurgeryPipelineError(
|
||||
"BAIDU_NOT_CONFIGURED",
|
||||
str(exc),
|
||||
@@ -184,6 +281,14 @@ class VoiceConfirmationService:
|
||||
error_message=str(exc),
|
||||
)
|
||||
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=str(exc))
|
||||
self._emit_voice_trace(
|
||||
source="wav",
|
||||
status="asr_failed",
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
error_message=str(exc),
|
||||
audio_object_key=stored.object_key,
|
||||
)
|
||||
raise SurgeryPipelineError(
|
||||
"VOICE_ASR_FAILED",
|
||||
f"语音识别调用失败:{exc}",
|
||||
@@ -205,6 +310,14 @@ class VoiceConfirmationService:
|
||||
error_message=msg,
|
||||
)
|
||||
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=msg)
|
||||
self._emit_voice_trace(
|
||||
source="wav",
|
||||
status="asr_failed",
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
error_message=msg,
|
||||
audio_object_key=stored.object_key,
|
||||
)
|
||||
raise SurgeryPipelineError("VOICE_ASR_FAILED", msg)
|
||||
|
||||
if asr_payload.get("err_no") != 0:
|
||||
@@ -226,6 +339,14 @@ class VoiceConfirmationService:
|
||||
error_message=msg,
|
||||
)
|
||||
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=msg)
|
||||
self._emit_voice_trace(
|
||||
source="wav",
|
||||
status="asr_failed",
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
error_message=msg,
|
||||
audio_object_key=stored.object_key,
|
||||
)
|
||||
raise SurgeryPipelineError("VOICE_ASR_FAILED", msg)
|
||||
|
||||
results = asr_payload.get("result")
|
||||
@@ -252,6 +373,14 @@ class VoiceConfirmationService:
|
||||
error_message=msg,
|
||||
)
|
||||
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=msg)
|
||||
self._emit_voice_trace(
|
||||
source="wav",
|
||||
status="asr_failed",
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
error_message=msg,
|
||||
audio_object_key=stored.object_key,
|
||||
)
|
||||
raise SurgeryPipelineError("VOICE_ASR_FAILED", msg)
|
||||
|
||||
self._sessions.record_voice_trace(surgery_id, asr_text=text, error=None)
|
||||
@@ -269,10 +398,24 @@ class VoiceConfirmationService:
|
||||
)
|
||||
|
||||
if not rejected and not chosen:
|
||||
msg = (
|
||||
"无法从语音中匹配候选项或本台手术候选清单中的耗材名称,"
|
||||
"请重试或说「不是」否认全部"
|
||||
_, retry_remaining = await self._sessions.record_voice_parse_failure(
|
||||
surgery_id, confirmation_id
|
||||
)
|
||||
base = (
|
||||
"无法从语音中匹配候选项或本台手术候选清单中的耗材名称,"
|
||||
"请重试或说「不是」否认全部。"
|
||||
)
|
||||
if retry_remaining > 0:
|
||||
msg = (
|
||||
f"{base} 本次未听清或未能解析,"
|
||||
f"您还可重试 {retry_remaining} 次,"
|
||||
"请说「第一个」「第二个」等序号或候选项全名。"
|
||||
)
|
||||
else:
|
||||
msg = (
|
||||
f"{base} 本轮重试机会已用完,"
|
||||
"请再清晰地说序号/全名,或说「不是」否认全部。"
|
||||
)
|
||||
await self._persist_audit(
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
@@ -287,7 +430,23 @@ class VoiceConfirmationService:
|
||||
error_message=msg,
|
||||
)
|
||||
self._sessions.record_voice_trace(surgery_id, asr_text=text, error=msg)
|
||||
raise SurgeryPipelineError("VOICE_PARSE_FAILED", msg)
|
||||
self._emit_voice_trace(
|
||||
source="wav",
|
||||
status="parse_failed",
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
asr_text=text,
|
||||
error_message=msg,
|
||||
audio_object_key=stored.object_key,
|
||||
)
|
||||
raise SurgeryPipelineError(
|
||||
"VOICE_PARSE_FAILED",
|
||||
msg,
|
||||
extra={
|
||||
"confirmation_id": confirmation_id,
|
||||
"retry_remaining": retry_remaining,
|
||||
},
|
||||
)
|
||||
|
||||
await self._sessions.resolve_pending_confirmation(
|
||||
surgery_id,
|
||||
@@ -310,6 +469,16 @@ class VoiceConfirmationService:
|
||||
options_snapshot_json=options_snapshot,
|
||||
error_message=None,
|
||||
)
|
||||
self._emit_voice_trace(
|
||||
source="wav",
|
||||
status=final_status,
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
asr_text=text,
|
||||
resolved_label=chosen if not rejected else None,
|
||||
rejected=rejected,
|
||||
audio_object_key=stored.object_key,
|
||||
)
|
||||
|
||||
if rejected:
|
||||
return VoiceResolveResult(
|
||||
@@ -327,6 +496,186 @@ class VoiceConfirmationService:
|
||||
message="已确认并记一条消耗。",
|
||||
)
|
||||
|
||||
async def resolve_from_recognized_text(
|
||||
self,
|
||||
*,
|
||||
surgery_id: str,
|
||||
confirmation_id: str,
|
||||
recognized_text: str,
|
||||
) -> VoiceResolveResult:
|
||||
"""浏览器 Web Speech 等客户端本机识别后的文本,不经 MinIO/百度 ASR,解析规则与 `resolve_from_wav` 一致。"""
|
||||
pending = self._sessions.get_pending_confirmation_by_id(
|
||||
surgery_id, confirmation_id
|
||||
)
|
||||
if pending is None:
|
||||
self._emit_voice_trace(
|
||||
source="text",
|
||||
status="confirmation_not_found",
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
error_message="未找到该待确认项或已处理。",
|
||||
)
|
||||
raise SurgeryPipelineError(
|
||||
"CONFIRMATION_NOT_FOUND",
|
||||
"未找到该待确认项或已处理。",
|
||||
)
|
||||
|
||||
option_labels = [a.strip() for a, _ in pending.options if a.strip()]
|
||||
options_snapshot = json.dumps(
|
||||
[{"label": a, "confidence": b} for a, b in pending.options],
|
||||
ensure_ascii=False,
|
||||
)
|
||||
|
||||
text = (recognized_text or "").strip()
|
||||
if not text:
|
||||
await self._persist_audit(
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
status="client_stt_empty",
|
||||
audio_object_key=None,
|
||||
audio_content_type=None,
|
||||
audio_size_bytes=None,
|
||||
audio_sha256=None,
|
||||
asr_text=None,
|
||||
resolved_label=None,
|
||||
options_snapshot_json=options_snapshot,
|
||||
error_message="客户端识别文本为空",
|
||||
)
|
||||
self._sessions.record_voice_trace(surgery_id, asr_text=None, error="empty text")
|
||||
self._emit_voice_trace(
|
||||
source="text",
|
||||
status="client_stt_empty",
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
error_message="客户端识别文本为空",
|
||||
)
|
||||
raise SurgeryPipelineError("VOICE_TEXT_EMPTY", "recognized_text 为空。")
|
||||
|
||||
self._sessions.record_voice_trace(surgery_id, asr_text=text, error=None)
|
||||
|
||||
rejected = is_rejection_phrase(text)
|
||||
chosen: str | None = None
|
||||
if not rejected:
|
||||
chosen = parse_voice_choice(text, option_labels)
|
||||
if chosen is None:
|
||||
surgery_candidates = self._sessions.get_surgery_candidate_consumables(
|
||||
surgery_id
|
||||
)
|
||||
chosen = match_voice_choice_against_candidates(text, surgery_candidates)
|
||||
|
||||
if not rejected and not chosen:
|
||||
_, retry_remaining = await self._sessions.record_voice_parse_failure(
|
||||
surgery_id, confirmation_id
|
||||
)
|
||||
base = (
|
||||
"无法从文本中匹配候选项或本台手术候选清单中的耗材名称,"
|
||||
"请重试或说「不是」否认全部。"
|
||||
)
|
||||
if retry_remaining > 0:
|
||||
msg = (
|
||||
f"{base} 本次未能解析,"
|
||||
f"您还可重试 {retry_remaining} 次,"
|
||||
"请输入「第一个」「第二个」等或候选项全名。"
|
||||
)
|
||||
else:
|
||||
msg = (
|
||||
f"{base} 本轮重试机会已用完,"
|
||||
"请再输入序号/全名,或说「不是」否认全部。"
|
||||
)
|
||||
await self._persist_audit(
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
status="client_stt_parse_failed",
|
||||
audio_object_key=None,
|
||||
audio_content_type=None,
|
||||
audio_size_bytes=None,
|
||||
audio_sha256=None,
|
||||
asr_text=text,
|
||||
resolved_label=None,
|
||||
options_snapshot_json=options_snapshot,
|
||||
error_message=msg,
|
||||
)
|
||||
self._sessions.record_voice_trace(surgery_id, asr_text=text, error=msg)
|
||||
self._emit_voice_trace(
|
||||
source="text",
|
||||
status="client_stt_parse_failed",
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
asr_text=text,
|
||||
error_message=msg,
|
||||
)
|
||||
raise SurgeryPipelineError(
|
||||
"VOICE_PARSE_FAILED",
|
||||
msg,
|
||||
extra={
|
||||
"confirmation_id": confirmation_id,
|
||||
"retry_remaining": retry_remaining,
|
||||
},
|
||||
)
|
||||
|
||||
await self._sessions.resolve_pending_confirmation(
|
||||
surgery_id,
|
||||
confirmation_id,
|
||||
chosen_label=chosen,
|
||||
rejected=rejected,
|
||||
)
|
||||
|
||||
final_status = "rejected" if rejected else "recognized"
|
||||
await self._persist_audit(
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
status=final_status,
|
||||
audio_object_key=None,
|
||||
audio_content_type=None,
|
||||
audio_size_bytes=None,
|
||||
audio_sha256=None,
|
||||
asr_text=text,
|
||||
resolved_label=chosen if not rejected else None,
|
||||
options_snapshot_json=options_snapshot,
|
||||
error_message=None,
|
||||
)
|
||||
self._emit_voice_trace(
|
||||
source="text",
|
||||
status=final_status,
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
asr_text=text,
|
||||
resolved_label=chosen if not rejected else None,
|
||||
rejected=rejected,
|
||||
)
|
||||
|
||||
if rejected:
|
||||
return VoiceResolveResult(
|
||||
resolved_label=None,
|
||||
rejected=True,
|
||||
asr_text=text,
|
||||
audio_object_key=None,
|
||||
message="已否认全部候选,未记消耗。",
|
||||
)
|
||||
return VoiceResolveResult(
|
||||
resolved_label=chosen,
|
||||
rejected=False,
|
||||
asr_text=text,
|
||||
audio_object_key=None,
|
||||
message="已确认并记一条消耗。",
|
||||
)
|
||||
|
||||
async def list_voice_audits_for_surgery(
|
||||
self,
|
||||
surgery_id: str,
|
||||
*,
|
||||
limit: int = 50,
|
||||
offset: int = 0,
|
||||
) -> tuple[list[VoiceConfirmationAudit], int]:
|
||||
"""从 `voice_confirmation_audits` 表分页读取,供内部查询与报表。"""
|
||||
async with AsyncSessionLocal() as session:
|
||||
return await self._audits.list_by_surgery(
|
||||
session,
|
||||
surgery_id,
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
)
|
||||
|
||||
async def _persist_audit(
|
||||
self,
|
||||
*,
|
||||
|
||||
Reference in New Issue
Block a user