feat: 语音确认、联调与运维增强

- 语音:序数解析(第一个/第二个等)、解析失败计数与 API detail.retry_remaining;
  百度 ASR 固定 dev_pid 为普通话;SurgeryPipelineError 支持 extra 并入 HTTP detail。
- Demo:demo 路由与假 RTSP、客户端 index 与 README;BackendResolver 与配置调整。
- 可观测:消耗 TSV 日志、语音文件日志、终端 Markdown 辅助;相关测试与依赖更新。
- 注意:.env 仍被 gitignore,本地密钥不会进入本提交。

Made-with: Cursor
This commit is contained in:
Kevin
2026-04-23 14:24:20 +08:00
parent 42720f81cf
commit 0c05463617
39 changed files with 3030 additions and 143 deletions

View File

@@ -52,8 +52,13 @@ class BaiduSpeechService:
rate: int = 16000,
options: dict[str, Any] | None = None,
) -> dict[str, Any]:
"""短语音识别。返回百度 JSON含 `err_no`、`result` 等)。"""
return self._client_or_raise().asr(speech, format, rate, options)
"""短语音识别。返回百度 JSON含 `err_no`、`result` 等)。
固定使用普通话模型(`dev_pid` 来自配置),避免未传参时误用服务端默认导致偏英语等结果。
"""
merged: dict[str, Any] = dict(options or {})
merged["dev_pid"] = int(settings.baidu_speech_asr_dev_pid)
return self._client_or_raise().asr(speech, format, rate, merged)
def synthesis(
self,

View File

@@ -0,0 +1,241 @@
"""每例手术一个文本文件(制表符列):`start_surgery` 时截断并写表头,每次时间窗识别**追加**一行。终端 Markdown 中时间戳为可读形式;落盘行内仍为 ISO 便于程序解析。
时间戳:在拉流起点记录 `time.time()`,与 `time.monotonic()` 时间窗对齐。直播 RTSP 经 OpenCV 一般无可靠绝对时码,以本机接收时刻为准。
"""
from __future__ import annotations
import re
import threading
from datetime import datetime, timezone
from pathlib import Path
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
from loguru import logger
from app.config import settings
from app.services.consumable_vision_algorithm import ClsTop3
from app.terminal_markdown import print_markdown_stderr
# 制表符分隔;时间范围用 U+2013 连接Top2/3 仅名称;本窗消耗数量恒为 1
HEADER = "物品id\t物品名称\tTop2物品名称\tTop3物品名称\t消耗数量\t医生id\t时间戳\n"
_RANGE_SEP = "\u2013" # en dash与样例 `00:00:00.00000:00:45.000` 一致
_lock = threading.Lock()
def _consumption_tzinfo():
raw = (settings.consumption_log_timezone or "").strip()
if not raw:
lt = datetime.now().astimezone().tzinfo
return lt if lt is not None else timezone.utc
try:
return ZoneInfo(raw)
except ZoneInfoNotFoundError:
logger.warning("无效的 consumption_log_timezone={!r},回退为 UTC", raw)
return timezone.utc
def format_consumption_timestamp(
camera_id: str,
wall_start_epoch: float,
wall_end_epoch: float,
) -> str:
"""落盘用:墙钟 + 配置时区 → `camXX@ISO8601ISO8601`。"""
tz = _consumption_tzinfo()
a = datetime.fromtimestamp(wall_start_epoch, tz=tz)
b = datetime.fromtimestamp(wall_end_epoch, tz=tz)
cam = short_camera_label(camera_id)
return f"{cam}@{a.isoformat(timespec='milliseconds')}{_RANGE_SEP}{b.isoformat(timespec='milliseconds')}"
def format_consumption_timestamp_readable(
camera_id: str,
wall_start_epoch: float,
wall_end_epoch: float,
) -> str:
"""仅终端 Rich不含 `T` 的本地可读区间 + 摄像头简名,便于人眼对时。"""
tz = _consumption_tzinfo()
a = datetime.fromtimestamp(wall_start_epoch, tz=tz)
b = datetime.fromtimestamp(wall_end_epoch, tz=tz)
cam = short_camera_label(camera_id)
def _fmt(d: datetime) -> str:
return d.strftime("%Y-%m-%d %H:%M:%S") + f".{d.microsecond // 1000:03d}"
return f"{_fmt(a)} {_RANGE_SEP} {_fmt(b)} · {cam}"
def short_camera_label(camera_id: str) -> str:
s = (camera_id or "").strip()
m = re.match(r"^or-cam-(\d+)$", s, re.IGNORECASE)
if m:
return f"cam{int(m.group(1)):02d}"
m2 = re.match(r"^cam-?0*(\d+)$", s, re.IGNORECASE)
if m2:
return f"cam{int(m2.group(1)):02d}"
alnum = re.sub(r"[^\w-]", "", s)[:12]
return alnum or "cam"
def _encode_cell(value: str) -> str:
s = (value or "").replace("\r", " ").replace("\n", " ").replace("\t", " ")
return s
def _item_id_for_row(name: str, pid: str, name_to_code: dict[str, str]) -> str:
p = (pid or "").strip()
if p:
return p
n = (name or "").strip()
if n in name_to_code:
return (name_to_code.get(n) or n).strip()
return n
def build_tsv_line(
*,
name_to_code: dict[str, str],
best: ClsTop3,
doctor_id: str,
camera_id: str,
wall_start_epoch: float,
wall_end_epoch: float,
) -> str:
id1 = _item_id_for_row(best.t1_name, best.t1_pid, name_to_code)
# 与历史样例Top1 为「名称 置信度」四位小数
name1 = f"{(best.t1_name or '').strip()} {best.t1_conf:.4f}".strip()
n2 = (best.t2_name or "").strip()
n3 = (best.t3_name or "").strip()
ts = format_consumption_timestamp(camera_id, wall_start_epoch, wall_end_epoch)
row = [
_encode_cell(id1),
_encode_cell(name1),
_encode_cell(n2),
_encode_cell(n3),
"1",
_encode_cell(doctor_id),
_encode_cell(ts),
]
return "\t".join(row) + "\n"
def _safe_surgery_path_segment(surgery_id: str) -> str:
s = (surgery_id or "unknown").strip() or "unknown"
s = re.sub(r"[^\w\-.@]", "_", s)
return s[:200] if len(s) > 200 else s
def resolved_consumption_log_path(surgery_id: str) -> Path:
raw = (settings.consumption_tsv_log_path or "logs/consumption_{surgery_id}.txt").strip()
safe = _safe_surgery_path_segment(surgery_id)
if "{surgery_id}" in raw:
raw = raw.replace("{surgery_id}", safe)
else:
p0 = Path(raw)
if p0.suffix:
raw = str(p0.with_name(f"{p0.stem}_{safe}{p0.suffix}"))
else:
raw = f"{raw.rstrip('/')}_{safe}.txt"
p = Path(raw).expanduser()
if not p.is_absolute():
p = Path.cwd() / p
return p
def init_consumption_log_file(surgery_id: str) -> None:
"""新手术开始:截断该手术对应文件并写入表头(一次)。"""
if not settings.consumption_tsv_log_enabled:
return
path = resolved_consumption_log_path(surgery_id)
path.parent.mkdir(parents=True, exist_ok=True)
with _lock:
with path.open("w", encoding="utf-8") as f:
f.write(HEADER)
def append_consumption_tsv_line(surgery_id: str, line: str) -> None:
if not settings.consumption_tsv_log_enabled:
return
path = resolved_consumption_log_path(surgery_id)
path.parent.mkdir(parents=True, exist_ok=True)
with _lock:
with path.open("a", encoding="utf-8") as f:
f.write(line)
def _md_cell(value: str) -> str:
"""避免破坏 Markdown 表格的 | 与换行。"""
s = (value or "").replace("\r", " ").replace("\n", " ").replace("|", "")
return s
def build_consumption_markdown(
*,
name_to_code: dict[str, str],
best: ClsTop3,
doctor_id: str,
camera_id: str,
wall_start_epoch: float,
wall_end_epoch: float,
) -> str:
"""终端用Top1 含 id/名称/置信度Top2/3 仅名称;消耗数量恒为 1。"""
id1 = _item_id_for_row(best.t1_name, best.t1_pid, name_to_code)
n1 = (best.t1_name or "").strip()
has2 = bool((best.t2_name or "").strip())
has3 = bool((best.t3_name or "").strip())
n2 = (best.t2_name or "").strip() if has2 else ""
n3 = (best.t3_name or "").strip() if has3 else ""
dash = ""
ts = format_consumption_timestamp_readable(camera_id, wall_start_epoch, wall_end_epoch)
return "\n".join(
[
"| Top1 物品id | Top1 物品名称 | Top1 置信度 | Top2 物品名称 | Top3 物品名称 | 消耗数量 | 医生id | 时间戳 |",
"| :--- | :--- | ---: | :--- | :--- | ---: | :--- | :--- |",
"| {} | {} | {:.4f} | {} | {} | 1 | {} | {} |".format(
_md_cell(id1),
_md_cell(n1),
best.t1_conf,
_md_cell(n2) if has2 else dash,
_md_cell(n3) if has3 else dash,
_md_cell(doctor_id),
_md_cell(ts),
),
"",
]
)
def append_consumption_window(
*,
surgery_id: str,
name_to_code: dict[str, str],
best: ClsTop3,
doctor_id: str,
camera_id: str,
wall_start_epoch: float,
wall_end_epoch: float,
) -> None:
if not settings.consumption_tsv_log_enabled and not settings.consumption_log_markdown_terminal:
return
if settings.consumption_tsv_log_enabled:
line = build_tsv_line(
name_to_code=name_to_code,
best=best,
doctor_id=doctor_id,
camera_id=camera_id,
wall_start_epoch=wall_start_epoch,
wall_end_epoch=wall_end_epoch,
)
append_consumption_tsv_line(surgery_id, line)
if settings.consumption_log_markdown_terminal:
print_markdown_stderr(
build_consumption_markdown(
name_to_code=name_to_code,
best=best,
doctor_id=doctor_id,
camera_id=camera_id,
wall_start_epoch=wall_start_epoch,
wall_end_epoch=wall_end_epoch,
),
)

View File

@@ -10,6 +10,8 @@ from app.schemas import (
SurgeryPendingConfirmationResponse,
)
from app.services.video.session_manager import CameraSessionManager
from fastapi.concurrency import run_in_threadpool
from app.services.voice_resolution import VoiceConfirmationService, VoiceResolveResult
from app.surgery_errors import SurgeryPipelineError
@@ -79,6 +81,18 @@ class SurgeryPipeline:
def voice_status(self, surgery_id: str) -> dict[str, object] | None:
return self._sessions.voice_status(surgery_id)
async def list_voice_audits(
self,
surgery_id: str,
*,
limit: int = 50,
offset: int = 0,
):
"""持久化表 `voice_confirmation_audits` 分页,用于追溯/对账/报表。"""
return await self._voice.list_voice_audits_for_surgery(
surgery_id, limit=limit, offset=offset
)
def get_pending_confirmation_for_client(
self, surgery_id: str
) -> SurgeryPendingConfirmationResponse | None:
@@ -114,3 +128,35 @@ class SurgeryPipeline:
filename=filename,
content_type=content_type,
)
async def resolve_pending_confirmation_from_client_text(
self,
surgery_id: str,
confirmation_id: str,
recognized_text: str,
) -> VoiceResolveResult:
"""浏览器等客户端本机识别后的文本,解析规则与 WAV 路径一致(无需 MinIO/百度)。"""
return await self._voice.resolve_from_recognized_text(
surgery_id=surgery_id,
confirmation_id=confirmation_id,
recognized_text=recognized_text,
)
async def get_pending_prompt_audio_mp3(
self,
surgery_id: str,
confirmation_id: str,
) -> bytes:
"""待确认 `prompt_text` 的百度 TTS MP3供模拟客户端用 Audio 直放。"""
pending = self._sessions.get_pending_confirmation_by_id(
surgery_id, confirmation_id
)
if pending is None or pending.status != "pending":
raise SurgeryPipelineError(
"CONFIRMATION_NOT_FOUND",
"未找到该待确认项或已处理。",
)
return await run_in_threadpool(
self._voice.synthesize_prompt_to_mp3,
pending.prompt_text,
)

View File

@@ -0,0 +1,242 @@
"""Start/stop local fake RTSP streams (MediaMTX + ffmpeg) for dev orchestration."""
from __future__ import annotations
import json
import os
import shutil
import socket
import subprocess
import time
import uuid
from dataclasses import dataclass, field
from pathlib import Path
from typing import ClassVar
from loguru import logger
MEDIAMTX_IMAGE = os.environ.get("MEDIAMTX_DOCKER_IMAGE", "bluenviron/mediamtx:latest")
CONTAINER_NAME_PREFIX = "orm-fake-rtsp-"
# 等待 127.0.0.1:host_port 可连接(避免开录时 Connection refused
_MEDIAMTX_TCP_READY_SEC = float(os.environ.get("MEDIAMTX_TCP_READY_SEC", "30"))
def _wait_tcp_listening(host: str, port: int, *, total_timeout: float) -> None:
"""Block until something accepts TCP on host:port (MediaMTX 映射口就绪)."""
deadline = time.monotonic() + max(1.0, total_timeout)
last: OSError | None = None
while time.monotonic() < deadline:
try:
with socket.create_connection((host, port), timeout=1.5):
logger.info("RTSP port ready {}:{}", host, port)
return
except OSError as exc:
last = exc
time.sleep(0.2)
hint = " MediaMTX 未监听:检查 docker 是否起成功、18554 是否被占用(orm-fake-rtsp-*) 已 docker ps。"
if last is not None:
raise RuntimeError(
f"等待 {host}:{port} 可连接超时({total_timeout:g}s: {last}{hint}"
) from last
raise RuntimeError(
f"等待 {host}:{port} 可连接超时({total_timeout:g}s.{hint}"
)
@dataclass
class StreamSpec:
camera_id: str
file_path: Path
rtsp_path: str # last segment, e.g. demo1
def __post_init__(self) -> None:
self.rtsp_path = (self.rtsp_path or "demo").strip().strip("/") or "demo"
@dataclass
class SyntheticRtspRun:
"""Holds Popen handles and docker container for one multi-stream session."""
container_name: str
procs: list[subprocess.Popen] = field(default_factory=list)
work_dir: Path | None = None # temp dir for uploaded video files; removed on stop
def stop(self) -> None:
for p in self.procs:
if p.poll() is None:
p.terminate()
try:
p.wait(timeout=5.0)
except subprocess.TimeoutExpired:
p.kill()
self.procs.clear()
if self.work_dir is not None and self.work_dir.is_dir():
try:
shutil.rmtree(self.work_dir, ignore_errors=True)
except OSError as exc:
logger.debug("rmtree work_dir: {}", exc)
self.work_dir = None
if shutil.which("docker") is not None:
try:
subprocess.run(
["docker", "rm", "-f", self.container_name],
capture_output=True,
timeout=30,
)
except (OSError, subprocess.SubprocessError) as exc:
logger.debug("docker rm: {}", exc)
self.work_dir = None
class SyntheticRtspManager:
_instance: ClassVar[SyntheticRtspManager | None] = None
_active: ClassVar[SyntheticRtspRun | None] = None
@classmethod
def get(cls) -> SyntheticRtspManager:
if cls._instance is None:
cls._instance = cls()
return cls._instance
@classmethod
def active_run(cls) -> SyntheticRtspRun | None:
return cls._active
@classmethod
def _cleanup_prefixed_containers(cls) -> None:
"""Remove stale MediaMTX containers left by earlier runs/reloads."""
if shutil.which("docker") is None:
return
try:
listed = subprocess.run(
[
"docker",
"ps",
"-aq",
"--filter",
f"name={CONTAINER_NAME_PREFIX}",
],
capture_output=True,
text=True,
timeout=30,
check=False,
)
except (OSError, subprocess.SubprocessError) as exc:
logger.debug("docker ps stale cleanup: {}", exc)
return
ids = [x.strip() for x in (listed.stdout or "").splitlines() if x.strip()]
if not ids:
return
try:
subprocess.run(
["docker", "rm", "-f", *ids],
capture_output=True,
text=True,
timeout=60,
check=False,
)
logger.info("Removed stale fake RTSP containers: {}", ids)
except (OSError, subprocess.SubprocessError) as exc:
logger.debug("docker rm stale cleanup: {}", exc)
@classmethod
def stop_active(cls) -> None:
if cls._active is not None:
cls._active.stop()
cls._active = None
cls._cleanup_prefixed_containers()
def start(
self,
streams: list[StreamSpec],
*,
host_port: int,
work_dir: Path,
) -> tuple[SyntheticRtspRun, dict[str, str]]:
"""Start MediaMTX and one ffmpeg per stream. Returns (run, url_by_camera)."""
if not streams:
raise ValueError("no streams")
if not shutil.which("ffmpeg"):
raise RuntimeError("ffmpeg not in PATH")
if not shutil.which("docker"):
raise RuntimeError("docker not in PATH (required to run MediaMTX)")
self.stop_active()
for s in streams:
if not s.file_path.is_file():
raise FileNotFoundError(str(s.file_path))
for ch in s.rtsp_path:
if ch not in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_.-":
raise ValueError(f"invalid RTSP path segment: {s.rtsp_path!r}")
container = CONTAINER_NAME_PREFIX + uuid.uuid4().hex[:12]
cmd = [
"docker", "run", "-d", "--name", container,
"-p", f"127.0.0.1:{host_port}:8554",
MEDIAMTX_IMAGE,
]
r = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
if r.returncode != 0:
try:
subprocess.run(
["docker", "rm", "-f", container],
capture_output=True,
text=True,
timeout=30,
check=False,
)
except (OSError, subprocess.SubprocessError) as exc:
logger.debug("docker rm failed container cleanup: {}", exc)
err = (r.stderr or r.stdout or "").strip()
raise RuntimeError(f"MediaMTX docker failed: {err}")
run = SyntheticRtspRun(container_name=container)
url_map: dict[str, str] = {}
time.sleep(0.5)
_wait_tcp_listening("127.0.0.1", host_port, total_timeout=_MEDIAMTX_TCP_READY_SEC)
run.work_dir = work_dir
try:
for s in streams:
dest = f"rtsp://127.0.0.1:{host_port}/{s.rtsp_path}"
url_map[s.camera_id] = dest
pub = [
"ffmpeg", "-hide_banner", "-loglevel", "warning",
"-re", "-stream_loop", "-1",
"-i", str(s.file_path),
"-c", "copy", "-f", "rtsp", "-rtsp_transport", "tcp", dest,
]
p = subprocess.Popen(pub) # noqa: S603
run.procs.append(p)
except Exception:
run.stop()
raise
# 给 ffmpeg 一点时间连上 MediaMTX减少首帧前 OpenCV 连上却 DESCRIBE 失败
time.sleep(0.4)
self._active = run
return run, url_map
def write_rtsp_url_json_file(
path: Path,
url_map: dict[str, str],
*,
replace_host: str,
) -> None:
"""Write JSON map; replace 127.0.0.1 in values with `replace_host` (e.g. host.docker.internal)."""
if replace_host in ("", "127.0.0.1"):
out = url_map
else:
out = {
k: v.replace("127.0.0.1", replace_host, 1)
for k, v in url_map.items()
}
path.parent.mkdir(parents=True, exist_ok=True)
text = json.dumps(out, ensure_ascii=False, indent=2, sort_keys=True) + "\n"
temp = path.with_name(path.name + ".tmp")
temp.write_text(text, encoding="utf-8")
temp.replace(path)
logger.info("Wrote RTSP map to {}", path)

View File

@@ -21,7 +21,6 @@ class BackendResolver:
) -> None:
self._s = settings
self._hik = hikvision_runtime
self._rtsp_urls_map = settings.video_rtsp_url_map()
def _parse_json_object(self, raw: str) -> dict[str, Any]:
raw = (raw or "").strip()
@@ -55,8 +54,10 @@ class BackendResolver:
return VideoBackendKind.RTSP
def rtsp_url_for_camera(self, camera_id: str) -> str:
if camera_id in self._rtsp_urls_map:
return self._rtsp_urls_map[camera_id]
# Re-read on each use so VIDEO_RTSP_URLS_JSON_FILE can be hot-updated (e.g. dev orchestrator).
m = self._s.video_rtsp_url_map()
if camera_id in m:
return m[camera_id]
tpl = (self._s.video_rtsp_url_template or "").strip()
if tpl:
try:

View File

@@ -26,6 +26,8 @@ from app.services.video.backend_resolver import BackendResolver
from app.services.video.hikvision_runtime import HikvisionInitRefCount, HikvisionRuntime
from app.services.video.rtsp_capture import RtspCapture
from app.services.video.types import VideoBackendKind
from app.services.consumption_tsv_log import append_consumption_window, init_consumption_log_file
from app.services.voice_file_log import init_voice_log_file
from app.services.voice_confirm import build_prompt_text
from app.surgery_errors import SurgeryPipelineError
@@ -41,6 +43,8 @@ class PendingConsumableConfirmation:
created_at: datetime
model_top1_label: str
model_top1_confidence: float
#: 本轮待确认在解析失败时累计次数(首败 + 重试),供 API 计算 retry_remaining。
voice_parse_failures: int = 0
@dataclass
@@ -49,6 +53,8 @@ class CameraStreamInferState:
votes: list[tuple[float, str, ClsTop3]] = field(default_factory=list)
stream_t0: float | None = None
#: 与 `stream_t0` 同一次初始化时的 `time.time()`,与 monotonic 流逝秒相加得到墙钟时间戳
stream_wall_start: float | None = None
next_bucket: int = 0
@@ -258,6 +264,8 @@ class CameraSessionManager:
)
run = RunningSurgery(stop_event=stop_event, state=state, tasks=tasks)
init_consumption_log_file(surgery_id)
init_voice_log_file(surgery_id, self._s)
async with self._manager_lock:
self._active[surgery_id] = run
@@ -408,6 +416,22 @@ class CameraSessionManager:
return []
return list(self._active[surgery_id].state.candidate_consumables)
async def record_voice_parse_failure(
self, surgery_id: str, confirmation_id: str
) -> tuple[int, int]:
"""解析失败时累加计数,返回 (当前失败次数, 距上限还剩几次「重试机会」)。"""
if surgery_id not in self._active:
return 0, 0
st = self._active[surgery_id].state
max_r = int(self._s.voice_confirm_max_failed_parse_rounds)
async with st.lock:
p = st.pending_by_id.get(confirmation_id)
if p is None or p.status != "pending":
return 0, 0
p.voice_parse_failures += 1
remaining = max(0, max_r - p.voice_parse_failures)
return p.voice_parse_failures, remaining
def next_pending_confirmation(
self, surgery_id: str
) -> PendingConsumableConfirmation | None:
@@ -622,6 +646,19 @@ class CameraSessionManager:
if snap is None:
continue
if self._s.video_log_inference_results:
logger.info(
"Vision result surgery={} camera={} top1={}({:.3f}) top2={}({:.3f}) top3={}({:.3f})",
surgery_id,
camera_id,
snap.t1_name,
snap.t1_conf,
snap.t2_name,
snap.t2_conf,
snap.t3_name,
snap.t3_conf,
)
wsec = self._s.consumable_vision_window_sec
pending_preds: list[PredictionResult] = []
async with state.lock:
@@ -630,6 +667,7 @@ class CameraSessionManager:
)
if cis.stream_t0 is None:
cis.stream_t0 = time.monotonic()
cis.stream_wall_start = time.time()
t_rel = time.monotonic() - cis.stream_t0
cis.votes.append((t_rel, snap.t1_name, snap))
current_b = int(t_rel // wsec)
@@ -648,7 +686,19 @@ class CameraSessionManager:
if not bucket_pts:
continue
best = window_bucket_to_best_snap(bucket_pts)
if best is not None:
if best is not None and cis.stream_wall_start is not None:
if self._s.consumption_tsv_log_enabled or self._s.consumption_log_markdown_terminal:
wall_lo = cis.stream_wall_start + lo
wall_hi = cis.stream_wall_start + hi
append_consumption_window(
surgery_id=surgery_id,
name_to_code=state.name_to_code,
best=best,
doctor_id=self._s.video_result_doctor_id,
camera_id=camera_id,
wall_start_epoch=wall_lo,
wall_end_epoch=wall_hi,
)
pending_preds.append(
cls_top3_to_prediction_result(best)
)

View File

@@ -32,12 +32,100 @@ _CN_DIGITS = {
}
def _parse_ordinal_index_1based(token: str) -> int | None:
"""将「1」「3」「一」「三」「十一」等解析为 1-based 序数,失败返回 None。"""
t = (token or "").strip()
if not t:
return None
if t.isdigit():
v = int(t)
return v if 1 <= v <= 99 else None
if t in _CN_DIGITS and t != "" and t != "":
return int(_CN_DIGITS[t])
if t == "":
return 10
if len(t) == 2 and t[0] == "" and t[1] in _CN_DIGITS and t[1] not in ("", ""):
return 10 + int(_CN_DIGITS[t[1]])
if len(t) == 2 and t[1] == "" and t[0] in _CN_DIGITS and t[0] != "":
return int(_CN_DIGITS[t[0]]) * 10
if len(t) == 3 and t[0] in _CN_DIGITS and t[1] == "" and t[2] in _CN_DIGITS:
return int(_CN_DIGITS[t[0]]) * 10 + int(_CN_DIGITS[t[2]])
return None
def _label_from_ordinal_1based(n1: int, options: list[str]) -> str | None:
if n1 < 1:
return None
idx = n1 - 1
if 0 <= idx < len(options):
return options[idx]
return None
def _choose_from_ordinal_text(raw: str, options: list[str]) -> str | None:
"""从「第一个」「第2个」「选3」「1号」等表述解析选项。返回 None 表示本函数未识别。"""
n_opt = len(options)
if n_opt < 1:
return None
# 1) 显式「第N个/项/款/…」,允许夹带后噪声,如「第一个对」
for m in re.finditer(
r"第([0-9]+|[一二两三四五六七八九十百]+)(?:个|项|款|的|种|名)?", raw
):
n1 = _parse_ordinal_index_1based(m.group(1))
if n1 is not None:
ch = _label_from_ordinal_1based(n1, options)
if ch is not None:
return ch
m_pick = re.search(
r"(?:^|[\s,;:])(?:选|要|就)\s*0*([1-9]\d?)(?:\s*号|个|项|款)?",
raw,
)
if m_pick:
n1 = int(m_pick.group(1))
ch = _label_from_ordinal_1based(n1, options)
if ch is not None:
return ch
norm_for_opt = raw.replace(" ", "").lower()
m_op = re.search(r"(?:option|选项)\s*[:]?\s*(\d+)", norm_for_opt, re.IGNORECASE)
if m_op:
n1 = int(m_op.group(1))
ch = _label_from_ordinal_1based(n1, options)
if ch is not None:
return ch
# 2) 行首/句末「一」「二」单字,仅当候选项数较少时
s = raw.replace(" ", "")
if n_opt <= 3:
m_one = re.match(r"^([一二两三四])$", s)
if m_one:
tok = m_one.group(1)
if tok in _CN_DIGITS and tok not in ("", ""):
n1 = int(_CN_DIGITS[tok])
ch = _label_from_ordinal_1based(n1, options)
if ch is not None:
return ch
m_tail = re.search(r"([0-9一二两三四五六七八九十]+)\s*号$", s)
if m_tail:
n1 = _parse_ordinal_index_1based(m_tail.group(1))
if n1 is not None:
ch = _label_from_ordinal_1based(n1, options)
if ch is not None:
return ch
return None
def parse_voice_choice(asr_text: str, options: list[str]) -> str | None:
"""
从识别文本中解析医生选择的耗材名称。
支持:完全匹配、子串匹配、第 N 个1/一/第一个)。
"""
raw = (asr_text or "").strip()
raw = re.sub(
r"^[。,、;:!?\s]+|[。,、;:!?\s]+$",
"",
(asr_text or "").strip(),
)
if not raw:
return None
normalized = raw.replace(" ", "").lower()
@@ -46,6 +134,10 @@ def parse_voice_choice(asr_text: str, options: list[str]) -> str | None:
if opt and opt in raw:
return opt
chosen_ord = _choose_from_ordinal_text(raw, options)
if chosen_ord is not None:
return chosen_ord
m_num = re.search(r"(\d+)", raw)
if m_num:
idx = int(m_num.group(1)) - 1
@@ -55,14 +147,11 @@ def parse_voice_choice(asr_text: str, options: list[str]) -> str | None:
m_cn = re.search(r"第([一二两三四五六七八九十\d]+)个", raw)
if m_cn:
token = m_cn.group(1)
if token.isdigit():
idx = int(token) - 1
elif token in _CN_DIGITS:
idx = _CN_DIGITS[token] - 1
else:
idx = -1
if 0 <= idx < len(options):
return options[idx]
n1 = int(token) if token.isdigit() else _parse_ordinal_index_1based(token)
if n1 is not None:
ch = _label_from_ordinal_1based(n1, options)
if ch is not None:
return ch
for i, opt in enumerate(options):
if not opt:
@@ -107,13 +196,9 @@ def is_rejection_phrase(asr_text: str) -> bool:
def build_prompt_text(options: list[tuple[str, float]]) -> str:
parts = [
"请确认刚才使用的耗材是下面哪一项,可以说序号或名称;"
"若是清单内其它耗材,也可以直接说该耗材名称。"
]
parts = ["请确认刚才使用的耗材是下面哪一项。"]
for i, (name, _conf) in enumerate(options, start=1):
parts.append(f"{i}个,{name}")
parts.append("若都不是请说不是。")
return "".join(parts)
@@ -228,6 +313,32 @@ class VoiceConfirmationOrchestrator:
tmp.close()
return path, None
async def speak_prompt(self, text: str) -> None:
"""仅百度 TTS + ffplay 播报,不录音。供待确认入队时提示手术室。"""
if not (text or "").strip():
return
if not self._s.voice_tts_on_pending_enqueued:
return
if not self._s.voice_confirmation_enabled:
return
if not self._baidu.configured:
logger.debug("speak_prompt skipped: baidu_speech not configured")
return
async with self._lock:
mp3_path, err = await run_in_threadpool(self._synthesize_to_temp_mp3, text)
if err or not mp3_path:
logger.warning("TTS synthesis failed: {}", err)
return
try:
play_err = await run_in_threadpool(self._play_mp3_file, mp3_path)
if play_err:
logger.warning("TTS play failed: {}", play_err)
finally:
try:
os.unlink(mp3_path)
except OSError:
pass
async def run_confirmation(
self,
*,

View File

@@ -0,0 +1,167 @@
"""语音确认ASR/解析/审计)的终端 loguru 行 + 每手术 TSV 落盘,与 `consumption_tsv_log` 并列。"""
from __future__ import annotations
import re
import threading
from datetime import datetime, timezone
from pathlib import Path
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
from loguru import logger
from app.config import Settings
_lock = threading.Lock()
HEADER = (
"时间戳(ISO,UTC)\t来源\t状态\tconfirmation_id\tasr/识别文本\t"
"resolved_label\trejected\terror\taudio_object_key\n"
)
def _ts_iso_utc() -> str:
return datetime.now(timezone.utc).isoformat(timespec="milliseconds")
def _encode_cell(value: str) -> str:
return (value or "").replace("\r", " ").replace("\n", " ").replace("\t", " ")
def _log_tz_info(settings: Settings) -> object:
raw = (settings.consumption_log_timezone or "").strip()
if not raw:
lt = datetime.now().astimezone().tzinfo
return lt if lt is not None else timezone.utc
try:
return ZoneInfo(raw)
except ZoneInfoNotFoundError:
return timezone.utc
def _ts_local_for_display(settings: Settings) -> str:
tz = _log_tz_info(settings)
return datetime.now(tz).isoformat(timespec="milliseconds")
def _safe_surgery_path_segment(surgery_id: str) -> str:
s = (surgery_id or "unknown").strip() or "unknown"
s = re.sub(r"[^\w\-.@]", "_", s)
return s[:200] if len(s) > 200 else s
def resolved_voice_log_path(surgery_id: str, settings: Settings) -> Path:
raw = (settings.voice_file_log_path or "logs/voice_{surgery_id}.txt").strip()
safe = _safe_surgery_path_segment(surgery_id)
if "{surgery_id}" in raw:
raw = raw.replace("{surgery_id}", safe)
else:
p0 = Path(raw)
if p0.suffix:
raw = str(p0.with_name(f"{p0.stem}_{safe}{p0.suffix}"))
else:
raw = f"{raw.rstrip('/')}_{safe}.txt"
p = Path(raw).expanduser()
if not p.is_absolute():
p = Path.cwd() / p
return p
def init_voice_log_file(surgery_id: str, settings: Settings) -> None:
"""与 `init_consumption_log_file` 同生命周期:`start_surgery` 时截断并写表头。"""
if not settings.voice_file_log_enabled:
return
path = resolved_voice_log_path(surgery_id, settings)
path.parent.mkdir(parents=True, exist_ok=True)
with _lock:
with path.open("w", encoding="utf-8") as f:
f.write(HEADER)
def append_voice_tsv_line(surgery_id: str, line: str, settings: Settings) -> None:
if not settings.voice_file_log_enabled:
return
path = resolved_voice_log_path(surgery_id, settings)
path.parent.mkdir(parents=True, exist_ok=True)
with _lock:
with path.open("a", encoding="utf-8") as f:
f.write(line)
def emit_voice_event(
settings: Settings,
*,
surgery_id: str,
source: str,
status: str,
confirmation_id: str,
asr_text: str | None = None,
resolved_label: str | None = None,
rejected: str | bool | None = None,
error_message: str | None = None,
audio_object_key: str | None = None,
) -> None:
"""
终端:单条可 grep 的 VoiceConfirm 行文件TSV 一行(与启用的 `voice_file_log_enabled` 一致)。
:param source: `wav` | `text` | `n/a`
:param status: 与审计 `status` 或 `minio_not_configured` 等说明型状态一致
"""
rj: str
if rejected is None:
rj = ""
elif isinstance(rejected, bool):
rj = "true" if rejected else "false"
else:
rj = str(rejected)
ts_utc = _ts_iso_utc()
local_hint = _ts_local_for_display(settings)
if status in ("recognized", "rejected"):
logger.info(
"VoiceConfirm local_ts={!r} surgery_id={} source={} status={} "
"confirmation_id={} asr_text={!r} resolved_label={!r} rejected={} "
"error={!r} audio_key={!r}",
local_hint,
surgery_id,
source,
status,
confirmation_id,
asr_text,
resolved_label,
rj,
error_message,
audio_object_key,
)
else:
logger.warning(
"VoiceConfirm local_ts={!r} surgery_id={} source={} status={} "
"confirmation_id={} asr_text={!r} resolved_label={!r} rejected={} "
"error={!r} audio_key={!r}",
local_hint,
surgery_id,
source,
status,
confirmation_id,
asr_text,
resolved_label,
rj,
error_message,
audio_object_key,
)
if not settings.voice_file_log_enabled:
return
row = [
_encode_cell(ts_utc),
_encode_cell(source),
_encode_cell(status),
_encode_cell(confirmation_id),
_encode_cell("" if asr_text is None else asr_text),
_encode_cell("" if resolved_label is None else resolved_label),
_encode_cell(rj),
_encode_cell("" if error_message is None else error_message),
_encode_cell("" if audio_object_key is None else audio_object_key),
]
line = "\t".join(row) + "\n"
append_voice_tsv_line(surgery_id, line, settings)

View File

@@ -9,7 +9,9 @@ from fastapi.concurrency import run_in_threadpool
from loguru import logger
from app.config import Settings
from app.services.voice_file_log import emit_voice_event
from app.database import AsyncSessionLocal
from app.db.models import VoiceConfirmationAudit
from app.repositories.voice_audits import VoiceAuditRepository
from app.services.audio_wav import WavDecodeError, wav_bytes_to_pcm16k_mono_s16le
from app.services.baidu_speech import BaiduSpeechNotConfiguredError, BaiduSpeechService
@@ -49,6 +51,50 @@ class VoiceConfirmationService:
self._minio = minio
self._audits = audits
def _emit_voice_trace(
self,
*,
source: str,
status: str,
surgery_id: str,
confirmation_id: str,
asr_text: str | None = None,
resolved_label: str | None = None,
rejected: bool | str | None = None,
error_message: str | None = None,
audio_object_key: str | None = None,
) -> None:
emit_voice_event(
self._s,
surgery_id=surgery_id,
source=source,
status=status,
confirmation_id=confirmation_id,
asr_text=asr_text,
resolved_label=resolved_label,
rejected=rejected,
error_message=error_message,
audio_object_key=audio_object_key,
)
def synthesize_prompt_to_mp3(self, text: str) -> bytes:
"""百度在线语音合成,供浏览器直接播放,与 `voice_confirm._synthesize_to_temp_mp3` 同参。"""
t = (text or "").strip()
if not t:
raise SurgeryPipelineError("TTS_TEXT_EMPTY", "提示文本为空。")
try:
r = self._baidu.synthesis(
t, "zh", 1, {"spd": 5, "pit": 5, "vol": 9, "per": 0}
)
except BaiduSpeechNotConfiguredError as exc:
raise SurgeryPipelineError(
"BAIDU_NOT_CONFIGURED",
"服务端未配置百度语音,无法合成播报音频。",
) from exc
if isinstance(r, dict):
raise SurgeryPipelineError("TTS_ERROR", f"百度 TTS 失败: {r!r}")
return r
async def resolve_from_wav(
self,
*,
@@ -74,18 +120,39 @@ class VoiceConfirmationService:
options_snapshot_json=None,
error_message="音频超过大小限制",
)
self._emit_voice_trace(
source="wav",
status="invalid_audio",
surgery_id=surgery_id,
confirmation_id=confirmation_id,
error_message="音频超过大小限制",
)
raise SurgeryPipelineError(
"VOICE_AUDIO_INVALID",
f"音频大小超过限制(最大 {self._s.voice_upload_max_bytes} 字节)。",
)
if not self._minio.configured:
self._emit_voice_trace(
source="wav",
status="minio_not_configured",
surgery_id=surgery_id,
confirmation_id=confirmation_id,
error_message="服务端未配置 MinIO无法保存语音追溯文件。",
)
raise SurgeryPipelineError(
"MINIO_NOT_CONFIGURED",
"服务端未配置 MinIO无法保存语音追溯文件。",
)
if not self._baidu.configured:
self._emit_voice_trace(
source="wav",
status="baidu_not_configured",
surgery_id=surgery_id,
confirmation_id=confirmation_id,
error_message="服务端未配置百度语音,无法进行语音识别。",
)
raise SurgeryPipelineError(
"BAIDU_NOT_CONFIGURED",
"服务端未配置百度语音,无法进行语音识别。",
@@ -95,6 +162,13 @@ class VoiceConfirmationService:
surgery_id, confirmation_id
)
if pending is None:
self._emit_voice_trace(
source="wav",
status="confirmation_not_found",
surgery_id=surgery_id,
confirmation_id=confirmation_id,
error_message="未找到该待确认项或已处理。",
)
raise SurgeryPipelineError(
"CONFIRMATION_NOT_FOUND",
"未找到该待确认项或已处理。",
@@ -133,6 +207,13 @@ class VoiceConfirmationService:
error_message=str(exc),
)
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=str(exc))
self._emit_voice_trace(
source="wav",
status="upload_failed",
surgery_id=surgery_id,
confirmation_id=confirmation_id,
error_message=str(exc),
)
raise SurgeryPipelineError(
"MINIO_UPLOAD_FAILED",
f"语音文件上传失败:{exc}",
@@ -155,6 +236,14 @@ class VoiceConfirmationService:
error_message=str(exc),
)
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=str(exc))
self._emit_voice_trace(
source="wav",
status="invalid_audio",
surgery_id=surgery_id,
confirmation_id=confirmation_id,
error_message=str(exc),
audio_object_key=stored.object_key,
)
raise SurgeryPipelineError(
"VOICE_AUDIO_INVALID",
f"无法解析 WAV 音频:{exc}",
@@ -165,6 +254,14 @@ class VoiceConfirmationService:
self._baidu.asr, pcm, "pcm", 16000, None
)
except BaiduSpeechNotConfiguredError as exc:
self._emit_voice_trace(
source="wav",
status="baidu_not_configured",
surgery_id=surgery_id,
confirmation_id=confirmation_id,
error_message=str(exc),
audio_object_key=stored.object_key,
)
raise SurgeryPipelineError(
"BAIDU_NOT_CONFIGURED",
str(exc),
@@ -184,6 +281,14 @@ class VoiceConfirmationService:
error_message=str(exc),
)
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=str(exc))
self._emit_voice_trace(
source="wav",
status="asr_failed",
surgery_id=surgery_id,
confirmation_id=confirmation_id,
error_message=str(exc),
audio_object_key=stored.object_key,
)
raise SurgeryPipelineError(
"VOICE_ASR_FAILED",
f"语音识别调用失败:{exc}",
@@ -205,6 +310,14 @@ class VoiceConfirmationService:
error_message=msg,
)
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=msg)
self._emit_voice_trace(
source="wav",
status="asr_failed",
surgery_id=surgery_id,
confirmation_id=confirmation_id,
error_message=msg,
audio_object_key=stored.object_key,
)
raise SurgeryPipelineError("VOICE_ASR_FAILED", msg)
if asr_payload.get("err_no") != 0:
@@ -226,6 +339,14 @@ class VoiceConfirmationService:
error_message=msg,
)
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=msg)
self._emit_voice_trace(
source="wav",
status="asr_failed",
surgery_id=surgery_id,
confirmation_id=confirmation_id,
error_message=msg,
audio_object_key=stored.object_key,
)
raise SurgeryPipelineError("VOICE_ASR_FAILED", msg)
results = asr_payload.get("result")
@@ -252,6 +373,14 @@ class VoiceConfirmationService:
error_message=msg,
)
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=msg)
self._emit_voice_trace(
source="wav",
status="asr_failed",
surgery_id=surgery_id,
confirmation_id=confirmation_id,
error_message=msg,
audio_object_key=stored.object_key,
)
raise SurgeryPipelineError("VOICE_ASR_FAILED", msg)
self._sessions.record_voice_trace(surgery_id, asr_text=text, error=None)
@@ -269,10 +398,24 @@ class VoiceConfirmationService:
)
if not rejected and not chosen:
msg = (
"无法从语音中匹配候选项或本台手术候选清单中的耗材名称,"
"请重试或说「不是」否认全部"
_, retry_remaining = await self._sessions.record_voice_parse_failure(
surgery_id, confirmation_id
)
base = (
"无法从语音中匹配候选项或本台手术候选清单中的耗材名称,"
"请重试或说「不是」否认全部。"
)
if retry_remaining > 0:
msg = (
f"{base} 本次未听清或未能解析,"
f"您还可重试 {retry_remaining} 次,"
"请说「第一个」「第二个」等序号或候选项全名。"
)
else:
msg = (
f"{base} 本轮重试机会已用完,"
"请再清晰地说序号/全名,或说「不是」否认全部。"
)
await self._persist_audit(
surgery_id=surgery_id,
confirmation_id=confirmation_id,
@@ -287,7 +430,23 @@ class VoiceConfirmationService:
error_message=msg,
)
self._sessions.record_voice_trace(surgery_id, asr_text=text, error=msg)
raise SurgeryPipelineError("VOICE_PARSE_FAILED", msg)
self._emit_voice_trace(
source="wav",
status="parse_failed",
surgery_id=surgery_id,
confirmation_id=confirmation_id,
asr_text=text,
error_message=msg,
audio_object_key=stored.object_key,
)
raise SurgeryPipelineError(
"VOICE_PARSE_FAILED",
msg,
extra={
"confirmation_id": confirmation_id,
"retry_remaining": retry_remaining,
},
)
await self._sessions.resolve_pending_confirmation(
surgery_id,
@@ -310,6 +469,16 @@ class VoiceConfirmationService:
options_snapshot_json=options_snapshot,
error_message=None,
)
self._emit_voice_trace(
source="wav",
status=final_status,
surgery_id=surgery_id,
confirmation_id=confirmation_id,
asr_text=text,
resolved_label=chosen if not rejected else None,
rejected=rejected,
audio_object_key=stored.object_key,
)
if rejected:
return VoiceResolveResult(
@@ -327,6 +496,186 @@ class VoiceConfirmationService:
message="已确认并记一条消耗。",
)
async def resolve_from_recognized_text(
self,
*,
surgery_id: str,
confirmation_id: str,
recognized_text: str,
) -> VoiceResolveResult:
"""浏览器 Web Speech 等客户端本机识别后的文本,不经 MinIO/百度 ASR解析规则与 `resolve_from_wav` 一致。"""
pending = self._sessions.get_pending_confirmation_by_id(
surgery_id, confirmation_id
)
if pending is None:
self._emit_voice_trace(
source="text",
status="confirmation_not_found",
surgery_id=surgery_id,
confirmation_id=confirmation_id,
error_message="未找到该待确认项或已处理。",
)
raise SurgeryPipelineError(
"CONFIRMATION_NOT_FOUND",
"未找到该待确认项或已处理。",
)
option_labels = [a.strip() for a, _ in pending.options if a.strip()]
options_snapshot = json.dumps(
[{"label": a, "confidence": b} for a, b in pending.options],
ensure_ascii=False,
)
text = (recognized_text or "").strip()
if not text:
await self._persist_audit(
surgery_id=surgery_id,
confirmation_id=confirmation_id,
status="client_stt_empty",
audio_object_key=None,
audio_content_type=None,
audio_size_bytes=None,
audio_sha256=None,
asr_text=None,
resolved_label=None,
options_snapshot_json=options_snapshot,
error_message="客户端识别文本为空",
)
self._sessions.record_voice_trace(surgery_id, asr_text=None, error="empty text")
self._emit_voice_trace(
source="text",
status="client_stt_empty",
surgery_id=surgery_id,
confirmation_id=confirmation_id,
error_message="客户端识别文本为空",
)
raise SurgeryPipelineError("VOICE_TEXT_EMPTY", "recognized_text 为空。")
self._sessions.record_voice_trace(surgery_id, asr_text=text, error=None)
rejected = is_rejection_phrase(text)
chosen: str | None = None
if not rejected:
chosen = parse_voice_choice(text, option_labels)
if chosen is None:
surgery_candidates = self._sessions.get_surgery_candidate_consumables(
surgery_id
)
chosen = match_voice_choice_against_candidates(text, surgery_candidates)
if not rejected and not chosen:
_, retry_remaining = await self._sessions.record_voice_parse_failure(
surgery_id, confirmation_id
)
base = (
"无法从文本中匹配候选项或本台手术候选清单中的耗材名称,"
"请重试或说「不是」否认全部。"
)
if retry_remaining > 0:
msg = (
f"{base} 本次未能解析,"
f"您还可重试 {retry_remaining} 次,"
"请输入「第一个」「第二个」等或候选项全名。"
)
else:
msg = (
f"{base} 本轮重试机会已用完,"
"请再输入序号/全名,或说「不是」否认全部。"
)
await self._persist_audit(
surgery_id=surgery_id,
confirmation_id=confirmation_id,
status="client_stt_parse_failed",
audio_object_key=None,
audio_content_type=None,
audio_size_bytes=None,
audio_sha256=None,
asr_text=text,
resolved_label=None,
options_snapshot_json=options_snapshot,
error_message=msg,
)
self._sessions.record_voice_trace(surgery_id, asr_text=text, error=msg)
self._emit_voice_trace(
source="text",
status="client_stt_parse_failed",
surgery_id=surgery_id,
confirmation_id=confirmation_id,
asr_text=text,
error_message=msg,
)
raise SurgeryPipelineError(
"VOICE_PARSE_FAILED",
msg,
extra={
"confirmation_id": confirmation_id,
"retry_remaining": retry_remaining,
},
)
await self._sessions.resolve_pending_confirmation(
surgery_id,
confirmation_id,
chosen_label=chosen,
rejected=rejected,
)
final_status = "rejected" if rejected else "recognized"
await self._persist_audit(
surgery_id=surgery_id,
confirmation_id=confirmation_id,
status=final_status,
audio_object_key=None,
audio_content_type=None,
audio_size_bytes=None,
audio_sha256=None,
asr_text=text,
resolved_label=chosen if not rejected else None,
options_snapshot_json=options_snapshot,
error_message=None,
)
self._emit_voice_trace(
source="text",
status=final_status,
surgery_id=surgery_id,
confirmation_id=confirmation_id,
asr_text=text,
resolved_label=chosen if not rejected else None,
rejected=rejected,
)
if rejected:
return VoiceResolveResult(
resolved_label=None,
rejected=True,
asr_text=text,
audio_object_key=None,
message="已否认全部候选,未记消耗。",
)
return VoiceResolveResult(
resolved_label=chosen,
rejected=False,
asr_text=text,
audio_object_key=None,
message="已确认并记一条消耗。",
)
async def list_voice_audits_for_surgery(
self,
surgery_id: str,
*,
limit: int = 50,
offset: int = 0,
) -> tuple[list[VoiceConfirmationAudit], int]:
"""从 `voice_confirmation_audits` 表分页读取,供内部查询与报表。"""
async with AsyncSessionLocal() as session:
return await self._audits.list_by_surgery(
session,
surgery_id,
limit=limit,
offset=offset,
)
async def _persist_audit(
self,
*,