feat: 语音确认、联调与运维增强
- 语音:序数解析(第一个/第二个等)、解析失败计数与 API detail.retry_remaining; 百度 ASR 固定 dev_pid 为普通话;SurgeryPipelineError 支持 extra 并入 HTTP detail。 - Demo:demo 路由与假 RTSP、客户端 index 与 README;BackendResolver 与配置调整。 - 可观测:消耗 TSV 日志、语音文件日志、终端 Markdown 辅助;相关测试与依赖更新。 - 注意:.env 仍被 gitignore,本地密钥不会进入本提交。 Made-with: Cursor
This commit is contained in:
@@ -9,7 +9,9 @@ from fastapi.concurrency import run_in_threadpool
|
||||
from loguru import logger
|
||||
|
||||
from app.config import Settings
|
||||
from app.services.voice_file_log import emit_voice_event
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.db.models import VoiceConfirmationAudit
|
||||
from app.repositories.voice_audits import VoiceAuditRepository
|
||||
from app.services.audio_wav import WavDecodeError, wav_bytes_to_pcm16k_mono_s16le
|
||||
from app.services.baidu_speech import BaiduSpeechNotConfiguredError, BaiduSpeechService
|
||||
@@ -49,6 +51,50 @@ class VoiceConfirmationService:
|
||||
self._minio = minio
|
||||
self._audits = audits
|
||||
|
||||
def _emit_voice_trace(
|
||||
self,
|
||||
*,
|
||||
source: str,
|
||||
status: str,
|
||||
surgery_id: str,
|
||||
confirmation_id: str,
|
||||
asr_text: str | None = None,
|
||||
resolved_label: str | None = None,
|
||||
rejected: bool | str | None = None,
|
||||
error_message: str | None = None,
|
||||
audio_object_key: str | None = None,
|
||||
) -> None:
|
||||
emit_voice_event(
|
||||
self._s,
|
||||
surgery_id=surgery_id,
|
||||
source=source,
|
||||
status=status,
|
||||
confirmation_id=confirmation_id,
|
||||
asr_text=asr_text,
|
||||
resolved_label=resolved_label,
|
||||
rejected=rejected,
|
||||
error_message=error_message,
|
||||
audio_object_key=audio_object_key,
|
||||
)
|
||||
|
||||
def synthesize_prompt_to_mp3(self, text: str) -> bytes:
|
||||
"""百度在线语音合成,供浏览器直接播放,与 `voice_confirm._synthesize_to_temp_mp3` 同参。"""
|
||||
t = (text or "").strip()
|
||||
if not t:
|
||||
raise SurgeryPipelineError("TTS_TEXT_EMPTY", "提示文本为空。")
|
||||
try:
|
||||
r = self._baidu.synthesis(
|
||||
t, "zh", 1, {"spd": 5, "pit": 5, "vol": 9, "per": 0}
|
||||
)
|
||||
except BaiduSpeechNotConfiguredError as exc:
|
||||
raise SurgeryPipelineError(
|
||||
"BAIDU_NOT_CONFIGURED",
|
||||
"服务端未配置百度语音,无法合成播报音频。",
|
||||
) from exc
|
||||
if isinstance(r, dict):
|
||||
raise SurgeryPipelineError("TTS_ERROR", f"百度 TTS 失败: {r!r}")
|
||||
return r
|
||||
|
||||
async def resolve_from_wav(
|
||||
self,
|
||||
*,
|
||||
@@ -74,18 +120,39 @@ class VoiceConfirmationService:
|
||||
options_snapshot_json=None,
|
||||
error_message="音频超过大小限制",
|
||||
)
|
||||
self._emit_voice_trace(
|
||||
source="wav",
|
||||
status="invalid_audio",
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
error_message="音频超过大小限制",
|
||||
)
|
||||
raise SurgeryPipelineError(
|
||||
"VOICE_AUDIO_INVALID",
|
||||
f"音频大小超过限制(最大 {self._s.voice_upload_max_bytes} 字节)。",
|
||||
)
|
||||
|
||||
if not self._minio.configured:
|
||||
self._emit_voice_trace(
|
||||
source="wav",
|
||||
status="minio_not_configured",
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
error_message="服务端未配置 MinIO,无法保存语音追溯文件。",
|
||||
)
|
||||
raise SurgeryPipelineError(
|
||||
"MINIO_NOT_CONFIGURED",
|
||||
"服务端未配置 MinIO,无法保存语音追溯文件。",
|
||||
)
|
||||
|
||||
if not self._baidu.configured:
|
||||
self._emit_voice_trace(
|
||||
source="wav",
|
||||
status="baidu_not_configured",
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
error_message="服务端未配置百度语音,无法进行语音识别。",
|
||||
)
|
||||
raise SurgeryPipelineError(
|
||||
"BAIDU_NOT_CONFIGURED",
|
||||
"服务端未配置百度语音,无法进行语音识别。",
|
||||
@@ -95,6 +162,13 @@ class VoiceConfirmationService:
|
||||
surgery_id, confirmation_id
|
||||
)
|
||||
if pending is None:
|
||||
self._emit_voice_trace(
|
||||
source="wav",
|
||||
status="confirmation_not_found",
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
error_message="未找到该待确认项或已处理。",
|
||||
)
|
||||
raise SurgeryPipelineError(
|
||||
"CONFIRMATION_NOT_FOUND",
|
||||
"未找到该待确认项或已处理。",
|
||||
@@ -133,6 +207,13 @@ class VoiceConfirmationService:
|
||||
error_message=str(exc),
|
||||
)
|
||||
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=str(exc))
|
||||
self._emit_voice_trace(
|
||||
source="wav",
|
||||
status="upload_failed",
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
error_message=str(exc),
|
||||
)
|
||||
raise SurgeryPipelineError(
|
||||
"MINIO_UPLOAD_FAILED",
|
||||
f"语音文件上传失败:{exc}",
|
||||
@@ -155,6 +236,14 @@ class VoiceConfirmationService:
|
||||
error_message=str(exc),
|
||||
)
|
||||
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=str(exc))
|
||||
self._emit_voice_trace(
|
||||
source="wav",
|
||||
status="invalid_audio",
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
error_message=str(exc),
|
||||
audio_object_key=stored.object_key,
|
||||
)
|
||||
raise SurgeryPipelineError(
|
||||
"VOICE_AUDIO_INVALID",
|
||||
f"无法解析 WAV 音频:{exc}",
|
||||
@@ -165,6 +254,14 @@ class VoiceConfirmationService:
|
||||
self._baidu.asr, pcm, "pcm", 16000, None
|
||||
)
|
||||
except BaiduSpeechNotConfiguredError as exc:
|
||||
self._emit_voice_trace(
|
||||
source="wav",
|
||||
status="baidu_not_configured",
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
error_message=str(exc),
|
||||
audio_object_key=stored.object_key,
|
||||
)
|
||||
raise SurgeryPipelineError(
|
||||
"BAIDU_NOT_CONFIGURED",
|
||||
str(exc),
|
||||
@@ -184,6 +281,14 @@ class VoiceConfirmationService:
|
||||
error_message=str(exc),
|
||||
)
|
||||
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=str(exc))
|
||||
self._emit_voice_trace(
|
||||
source="wav",
|
||||
status="asr_failed",
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
error_message=str(exc),
|
||||
audio_object_key=stored.object_key,
|
||||
)
|
||||
raise SurgeryPipelineError(
|
||||
"VOICE_ASR_FAILED",
|
||||
f"语音识别调用失败:{exc}",
|
||||
@@ -205,6 +310,14 @@ class VoiceConfirmationService:
|
||||
error_message=msg,
|
||||
)
|
||||
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=msg)
|
||||
self._emit_voice_trace(
|
||||
source="wav",
|
||||
status="asr_failed",
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
error_message=msg,
|
||||
audio_object_key=stored.object_key,
|
||||
)
|
||||
raise SurgeryPipelineError("VOICE_ASR_FAILED", msg)
|
||||
|
||||
if asr_payload.get("err_no") != 0:
|
||||
@@ -226,6 +339,14 @@ class VoiceConfirmationService:
|
||||
error_message=msg,
|
||||
)
|
||||
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=msg)
|
||||
self._emit_voice_trace(
|
||||
source="wav",
|
||||
status="asr_failed",
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
error_message=msg,
|
||||
audio_object_key=stored.object_key,
|
||||
)
|
||||
raise SurgeryPipelineError("VOICE_ASR_FAILED", msg)
|
||||
|
||||
results = asr_payload.get("result")
|
||||
@@ -252,6 +373,14 @@ class VoiceConfirmationService:
|
||||
error_message=msg,
|
||||
)
|
||||
self._sessions.record_voice_trace(surgery_id, asr_text=None, error=msg)
|
||||
self._emit_voice_trace(
|
||||
source="wav",
|
||||
status="asr_failed",
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
error_message=msg,
|
||||
audio_object_key=stored.object_key,
|
||||
)
|
||||
raise SurgeryPipelineError("VOICE_ASR_FAILED", msg)
|
||||
|
||||
self._sessions.record_voice_trace(surgery_id, asr_text=text, error=None)
|
||||
@@ -269,10 +398,24 @@ class VoiceConfirmationService:
|
||||
)
|
||||
|
||||
if not rejected and not chosen:
|
||||
msg = (
|
||||
"无法从语音中匹配候选项或本台手术候选清单中的耗材名称,"
|
||||
"请重试或说「不是」否认全部"
|
||||
_, retry_remaining = await self._sessions.record_voice_parse_failure(
|
||||
surgery_id, confirmation_id
|
||||
)
|
||||
base = (
|
||||
"无法从语音中匹配候选项或本台手术候选清单中的耗材名称,"
|
||||
"请重试或说「不是」否认全部。"
|
||||
)
|
||||
if retry_remaining > 0:
|
||||
msg = (
|
||||
f"{base} 本次未听清或未能解析,"
|
||||
f"您还可重试 {retry_remaining} 次,"
|
||||
"请说「第一个」「第二个」等序号或候选项全名。"
|
||||
)
|
||||
else:
|
||||
msg = (
|
||||
f"{base} 本轮重试机会已用完,"
|
||||
"请再清晰地说序号/全名,或说「不是」否认全部。"
|
||||
)
|
||||
await self._persist_audit(
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
@@ -287,7 +430,23 @@ class VoiceConfirmationService:
|
||||
error_message=msg,
|
||||
)
|
||||
self._sessions.record_voice_trace(surgery_id, asr_text=text, error=msg)
|
||||
raise SurgeryPipelineError("VOICE_PARSE_FAILED", msg)
|
||||
self._emit_voice_trace(
|
||||
source="wav",
|
||||
status="parse_failed",
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
asr_text=text,
|
||||
error_message=msg,
|
||||
audio_object_key=stored.object_key,
|
||||
)
|
||||
raise SurgeryPipelineError(
|
||||
"VOICE_PARSE_FAILED",
|
||||
msg,
|
||||
extra={
|
||||
"confirmation_id": confirmation_id,
|
||||
"retry_remaining": retry_remaining,
|
||||
},
|
||||
)
|
||||
|
||||
await self._sessions.resolve_pending_confirmation(
|
||||
surgery_id,
|
||||
@@ -310,6 +469,16 @@ class VoiceConfirmationService:
|
||||
options_snapshot_json=options_snapshot,
|
||||
error_message=None,
|
||||
)
|
||||
self._emit_voice_trace(
|
||||
source="wav",
|
||||
status=final_status,
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
asr_text=text,
|
||||
resolved_label=chosen if not rejected else None,
|
||||
rejected=rejected,
|
||||
audio_object_key=stored.object_key,
|
||||
)
|
||||
|
||||
if rejected:
|
||||
return VoiceResolveResult(
|
||||
@@ -327,6 +496,186 @@ class VoiceConfirmationService:
|
||||
message="已确认并记一条消耗。",
|
||||
)
|
||||
|
||||
async def resolve_from_recognized_text(
|
||||
self,
|
||||
*,
|
||||
surgery_id: str,
|
||||
confirmation_id: str,
|
||||
recognized_text: str,
|
||||
) -> VoiceResolveResult:
|
||||
"""浏览器 Web Speech 等客户端本机识别后的文本,不经 MinIO/百度 ASR,解析规则与 `resolve_from_wav` 一致。"""
|
||||
pending = self._sessions.get_pending_confirmation_by_id(
|
||||
surgery_id, confirmation_id
|
||||
)
|
||||
if pending is None:
|
||||
self._emit_voice_trace(
|
||||
source="text",
|
||||
status="confirmation_not_found",
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
error_message="未找到该待确认项或已处理。",
|
||||
)
|
||||
raise SurgeryPipelineError(
|
||||
"CONFIRMATION_NOT_FOUND",
|
||||
"未找到该待确认项或已处理。",
|
||||
)
|
||||
|
||||
option_labels = [a.strip() for a, _ in pending.options if a.strip()]
|
||||
options_snapshot = json.dumps(
|
||||
[{"label": a, "confidence": b} for a, b in pending.options],
|
||||
ensure_ascii=False,
|
||||
)
|
||||
|
||||
text = (recognized_text or "").strip()
|
||||
if not text:
|
||||
await self._persist_audit(
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
status="client_stt_empty",
|
||||
audio_object_key=None,
|
||||
audio_content_type=None,
|
||||
audio_size_bytes=None,
|
||||
audio_sha256=None,
|
||||
asr_text=None,
|
||||
resolved_label=None,
|
||||
options_snapshot_json=options_snapshot,
|
||||
error_message="客户端识别文本为空",
|
||||
)
|
||||
self._sessions.record_voice_trace(surgery_id, asr_text=None, error="empty text")
|
||||
self._emit_voice_trace(
|
||||
source="text",
|
||||
status="client_stt_empty",
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
error_message="客户端识别文本为空",
|
||||
)
|
||||
raise SurgeryPipelineError("VOICE_TEXT_EMPTY", "recognized_text 为空。")
|
||||
|
||||
self._sessions.record_voice_trace(surgery_id, asr_text=text, error=None)
|
||||
|
||||
rejected = is_rejection_phrase(text)
|
||||
chosen: str | None = None
|
||||
if not rejected:
|
||||
chosen = parse_voice_choice(text, option_labels)
|
||||
if chosen is None:
|
||||
surgery_candidates = self._sessions.get_surgery_candidate_consumables(
|
||||
surgery_id
|
||||
)
|
||||
chosen = match_voice_choice_against_candidates(text, surgery_candidates)
|
||||
|
||||
if not rejected and not chosen:
|
||||
_, retry_remaining = await self._sessions.record_voice_parse_failure(
|
||||
surgery_id, confirmation_id
|
||||
)
|
||||
base = (
|
||||
"无法从文本中匹配候选项或本台手术候选清单中的耗材名称,"
|
||||
"请重试或说「不是」否认全部。"
|
||||
)
|
||||
if retry_remaining > 0:
|
||||
msg = (
|
||||
f"{base} 本次未能解析,"
|
||||
f"您还可重试 {retry_remaining} 次,"
|
||||
"请输入「第一个」「第二个」等或候选项全名。"
|
||||
)
|
||||
else:
|
||||
msg = (
|
||||
f"{base} 本轮重试机会已用完,"
|
||||
"请再输入序号/全名,或说「不是」否认全部。"
|
||||
)
|
||||
await self._persist_audit(
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
status="client_stt_parse_failed",
|
||||
audio_object_key=None,
|
||||
audio_content_type=None,
|
||||
audio_size_bytes=None,
|
||||
audio_sha256=None,
|
||||
asr_text=text,
|
||||
resolved_label=None,
|
||||
options_snapshot_json=options_snapshot,
|
||||
error_message=msg,
|
||||
)
|
||||
self._sessions.record_voice_trace(surgery_id, asr_text=text, error=msg)
|
||||
self._emit_voice_trace(
|
||||
source="text",
|
||||
status="client_stt_parse_failed",
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
asr_text=text,
|
||||
error_message=msg,
|
||||
)
|
||||
raise SurgeryPipelineError(
|
||||
"VOICE_PARSE_FAILED",
|
||||
msg,
|
||||
extra={
|
||||
"confirmation_id": confirmation_id,
|
||||
"retry_remaining": retry_remaining,
|
||||
},
|
||||
)
|
||||
|
||||
await self._sessions.resolve_pending_confirmation(
|
||||
surgery_id,
|
||||
confirmation_id,
|
||||
chosen_label=chosen,
|
||||
rejected=rejected,
|
||||
)
|
||||
|
||||
final_status = "rejected" if rejected else "recognized"
|
||||
await self._persist_audit(
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
status=final_status,
|
||||
audio_object_key=None,
|
||||
audio_content_type=None,
|
||||
audio_size_bytes=None,
|
||||
audio_sha256=None,
|
||||
asr_text=text,
|
||||
resolved_label=chosen if not rejected else None,
|
||||
options_snapshot_json=options_snapshot,
|
||||
error_message=None,
|
||||
)
|
||||
self._emit_voice_trace(
|
||||
source="text",
|
||||
status=final_status,
|
||||
surgery_id=surgery_id,
|
||||
confirmation_id=confirmation_id,
|
||||
asr_text=text,
|
||||
resolved_label=chosen if not rejected else None,
|
||||
rejected=rejected,
|
||||
)
|
||||
|
||||
if rejected:
|
||||
return VoiceResolveResult(
|
||||
resolved_label=None,
|
||||
rejected=True,
|
||||
asr_text=text,
|
||||
audio_object_key=None,
|
||||
message="已否认全部候选,未记消耗。",
|
||||
)
|
||||
return VoiceResolveResult(
|
||||
resolved_label=chosen,
|
||||
rejected=False,
|
||||
asr_text=text,
|
||||
audio_object_key=None,
|
||||
message="已确认并记一条消耗。",
|
||||
)
|
||||
|
||||
async def list_voice_audits_for_surgery(
|
||||
self,
|
||||
surgery_id: str,
|
||||
*,
|
||||
limit: int = 50,
|
||||
offset: int = 0,
|
||||
) -> tuple[list[VoiceConfirmationAudit], int]:
|
||||
"""从 `voice_confirmation_audits` 表分页读取,供内部查询与报表。"""
|
||||
async with AsyncSessionLocal() as session:
|
||||
return await self._audits.list_by_surgery(
|
||||
session,
|
||||
surgery_id,
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
)
|
||||
|
||||
async def _persist_audit(
|
||||
self,
|
||||
*,
|
||||
|
||||
Reference in New Issue
Block a user