feat: 语音确认、联调与运维增强

- 语音：序数解析（第一个/第二个等）、解析失败计数与 API detail.retry_remaining；百度 ASR 固定 dev_pid 为普通话；SurgeryPipelineError 支持 extra 并入 HTTP detail。 - Demo：demo 路由与假 RTSP、客户端 index 与 README；BackendResolver 与配置调整。 - 可观测：消耗 TSV 日志、语音文件日志、终端 Markdown 辅助；相关测试与依赖更新。 - 注意：.env 仍被 gitignore，本地密钥不会进入本提交。 Made-with: Cursor
2026-04-23 14:24:20 +08:00
parent 42720f81cf
commit 0c05463617
39 changed files with 3030 additions and 143 deletions
--- a/app/services/voice_resolution.py
+++ b/app/services/voice_resolution.py
@@ -9,7 +9,9 @@ from fastapi.concurrency import run_in_threadpool
 from loguru import logger

 from app.config import Settings
+from app.services.voice_file_log import emit_voice_event
 from app.database import AsyncSessionLocal
+from app.db.models import VoiceConfirmationAudit
 from app.repositories.voice_audits import VoiceAuditRepository
 from app.services.audio_wav import WavDecodeError, wav_bytes_to_pcm16k_mono_s16le
 from app.services.baidu_speech import BaiduSpeechNotConfiguredError, BaiduSpeechService
@@ -49,6 +51,50 @@ class VoiceConfirmationService:
        self._minio = minio
        self._audits = audits

+    def _emit_voice_trace(
+        self,
+        *,
+        source: str,
+        status: str,
+        surgery_id: str,
+        confirmation_id: str,
+        asr_text: str | None = None,
+        resolved_label: str | None = None,
+        rejected: bool | str | None = None,
+        error_message: str | None = None,
+        audio_object_key: str | None = None,
+    ) -> None:
+        emit_voice_event(
+            self._s,
+            surgery_id=surgery_id,
+            source=source,
+            status=status,
+            confirmation_id=confirmation_id,
+            asr_text=asr_text,
+            resolved_label=resolved_label,
+            rejected=rejected,
+            error_message=error_message,
+            audio_object_key=audio_object_key,
+        )
+
+    def synthesize_prompt_to_mp3(self, text: str) -> bytes:
+        """百度在线语音合成，供浏览器直接播放，与 `voice_confirm._synthesize_to_temp_mp3` 同参。"""
+        t = (text or "").strip()
+        if not t:
+            raise SurgeryPipelineError("TTS_TEXT_EMPTY", "提示文本为空。")
+        try:
+            r = self._baidu.synthesis(
+                t, "zh", 1, {"spd": 5, "pit": 5, "vol": 9, "per": 0}
+            )
+        except BaiduSpeechNotConfiguredError as exc:
+            raise SurgeryPipelineError(
+                "BAIDU_NOT_CONFIGURED",
+                "服务端未配置百度语音，无法合成播报音频。",
+            ) from exc
+        if isinstance(r, dict):
+            raise SurgeryPipelineError("TTS_ERROR", f"百度 TTS 失败: {r!r}")
+        return r
+
    async def resolve_from_wav(
        self,
        *,
@@ -74,18 +120,39 @@ class VoiceConfirmationService:
                options_snapshot_json=None,
                error_message="音频超过大小限制",
            )
+            self._emit_voice_trace(
+                source="wav",
+                status="invalid_audio",
+                surgery_id=surgery_id,
+                confirmation_id=confirmation_id,
+                error_message="音频超过大小限制",
+            )
            raise SurgeryPipelineError(
                "VOICE_AUDIO_INVALID",
                f"音频大小超过限制（最大 {self._s.voice_upload_max_bytes} 字节）。",
            )

        if not self._minio.configured:
+            self._emit_voice_trace(
+                source="wav",
+                status="minio_not_configured",
+                surgery_id=surgery_id,
+                confirmation_id=confirmation_id,
+                error_message="服务端未配置 MinIO，无法保存语音追溯文件。",
+            )
            raise SurgeryPipelineError(
                "MINIO_NOT_CONFIGURED",
                "服务端未配置 MinIO，无法保存语音追溯文件。",
            )

        if not self._baidu.configured:
+            self._emit_voice_trace(
+                source="wav",
+                status="baidu_not_configured",
+                surgery_id=surgery_id,
+                confirmation_id=confirmation_id,
+                error_message="服务端未配置百度语音，无法进行语音识别。",
+            )
            raise SurgeryPipelineError(
                "BAIDU_NOT_CONFIGURED",
                "服务端未配置百度语音，无法进行语音识别。",
@@ -95,6 +162,13 @@ class VoiceConfirmationService:
            surgery_id, confirmation_id
        )
        if pending is None:
+            self._emit_voice_trace(
+                source="wav",
+                status="confirmation_not_found",
+                surgery_id=surgery_id,
+                confirmation_id=confirmation_id,
+                error_message="未找到该待确认项或已处理。",
+            )
            raise SurgeryPipelineError(
                "CONFIRMATION_NOT_FOUND",
                "未找到该待确认项或已处理。",
@@ -133,6 +207,13 @@ class VoiceConfirmationService:
                error_message=str(exc),
            )
            self._sessions.record_voice_trace(surgery_id, asr_text=None, error=str(exc))
+            self._emit_voice_trace(
+                source="wav",
+                status="upload_failed",
+                surgery_id=surgery_id,
+                confirmation_id=confirmation_id,
+                error_message=str(exc),
+            )
            raise SurgeryPipelineError(
                "MINIO_UPLOAD_FAILED",
                f"语音文件上传失败：{exc}",
@@ -155,6 +236,14 @@ class VoiceConfirmationService:
                error_message=str(exc),
            )
            self._sessions.record_voice_trace(surgery_id, asr_text=None, error=str(exc))
+            self._emit_voice_trace(
+                source="wav",
+                status="invalid_audio",
+                surgery_id=surgery_id,
+                confirmation_id=confirmation_id,
+                error_message=str(exc),
+                audio_object_key=stored.object_key,
+            )
            raise SurgeryPipelineError(
                "VOICE_AUDIO_INVALID",
                f"无法解析 WAV 音频：{exc}",
@@ -165,6 +254,14 @@ class VoiceConfirmationService:
                self._baidu.asr, pcm, "pcm", 16000, None
            )
        except BaiduSpeechNotConfiguredError as exc:
+            self._emit_voice_trace(
+                source="wav",
+                status="baidu_not_configured",
+                surgery_id=surgery_id,
+                confirmation_id=confirmation_id,
+                error_message=str(exc),
+                audio_object_key=stored.object_key,
+            )
            raise SurgeryPipelineError(
                "BAIDU_NOT_CONFIGURED",
                str(exc),
@@ -184,6 +281,14 @@ class VoiceConfirmationService:
                error_message=str(exc),
            )
            self._sessions.record_voice_trace(surgery_id, asr_text=None, error=str(exc))
+            self._emit_voice_trace(
+                source="wav",
+                status="asr_failed",
+                surgery_id=surgery_id,
+                confirmation_id=confirmation_id,
+                error_message=str(exc),
+                audio_object_key=stored.object_key,
+            )
            raise SurgeryPipelineError(
                "VOICE_ASR_FAILED",
                f"语音识别调用失败：{exc}",
@@ -205,6 +310,14 @@ class VoiceConfirmationService:
                error_message=msg,
            )
            self._sessions.record_voice_trace(surgery_id, asr_text=None, error=msg)
+            self._emit_voice_trace(
+                source="wav",
+                status="asr_failed",
+                surgery_id=surgery_id,
+                confirmation_id=confirmation_id,
+                error_message=msg,
+                audio_object_key=stored.object_key,
+            )
            raise SurgeryPipelineError("VOICE_ASR_FAILED", msg)

        if asr_payload.get("err_no") != 0:
@@ -226,6 +339,14 @@ class VoiceConfirmationService:
                error_message=msg,
            )
            self._sessions.record_voice_trace(surgery_id, asr_text=None, error=msg)
+            self._emit_voice_trace(
+                source="wav",
+                status="asr_failed",
+                surgery_id=surgery_id,
+                confirmation_id=confirmation_id,
+                error_message=msg,
+                audio_object_key=stored.object_key,
+            )
            raise SurgeryPipelineError("VOICE_ASR_FAILED", msg)

        results = asr_payload.get("result")
@@ -252,6 +373,14 @@ class VoiceConfirmationService:
                error_message=msg,
            )
            self._sessions.record_voice_trace(surgery_id, asr_text=None, error=msg)
+            self._emit_voice_trace(
+                source="wav",
+                status="asr_failed",
+                surgery_id=surgery_id,
+                confirmation_id=confirmation_id,
+                error_message=msg,
+                audio_object_key=stored.object_key,
+            )
            raise SurgeryPipelineError("VOICE_ASR_FAILED", msg)

        self._sessions.record_voice_trace(surgery_id, asr_text=text, error=None)
@@ -269,10 +398,24 @@ class VoiceConfirmationService:
                )

        if not rejected and not chosen:
-            msg = (
-                "无法从语音中匹配候选项或本台手术候选清单中的耗材名称，"
-                "请重试或说「不是」否认全部"
+            _, retry_remaining = await self._sessions.record_voice_parse_failure(
+                surgery_id, confirmation_id
            )
+            base = (
+                "无法从语音中匹配候选项或本台手术候选清单中的耗材名称，"
+                "请重试或说「不是」否认全部。"
+            )
+            if retry_remaining > 0:
+                msg = (
+                    f"{base} 本次未听清或未能解析，"
+                    f"您还可重试 {retry_remaining} 次，"
+                    "请说「第一个」「第二个」等序号或候选项全名。"
+                )
+            else:
+                msg = (
+                    f"{base} 本轮重试机会已用完，"
+                    "请再清晰地说序号/全名，或说「不是」否认全部。"
+                )
            await self._persist_audit(
                surgery_id=surgery_id,
                confirmation_id=confirmation_id,
@@ -287,7 +430,23 @@ class VoiceConfirmationService:
                error_message=msg,
            )
            self._sessions.record_voice_trace(surgery_id, asr_text=text, error=msg)
-            raise SurgeryPipelineError("VOICE_PARSE_FAILED", msg)
+            self._emit_voice_trace(
+                source="wav",
+                status="parse_failed",
+                surgery_id=surgery_id,
+                confirmation_id=confirmation_id,
+                asr_text=text,
+                error_message=msg,
+                audio_object_key=stored.object_key,
+            )
+            raise SurgeryPipelineError(
+                "VOICE_PARSE_FAILED",
+                msg,
+                extra={
+                    "confirmation_id": confirmation_id,
+                    "retry_remaining": retry_remaining,
+                },
+            )

        await self._sessions.resolve_pending_confirmation(
            surgery_id,
@@ -310,6 +469,16 @@ class VoiceConfirmationService:
            options_snapshot_json=options_snapshot,
            error_message=None,
        )
+        self._emit_voice_trace(
+            source="wav",
+            status=final_status,
+            surgery_id=surgery_id,
+            confirmation_id=confirmation_id,
+            asr_text=text,
+            resolved_label=chosen if not rejected else None,
+            rejected=rejected,
+            audio_object_key=stored.object_key,
+        )

        if rejected:
            return VoiceResolveResult(
@@ -327,6 +496,186 @@ class VoiceConfirmationService:
            message="已确认并记一条消耗。",
        )

+    async def resolve_from_recognized_text(
+        self,
+        *,
+        surgery_id: str,
+        confirmation_id: str,
+        recognized_text: str,
+    ) -> VoiceResolveResult:
+        """浏览器 Web Speech 等客户端本机识别后的文本，不经 MinIO/百度 ASR，解析规则与 `resolve_from_wav` 一致。"""
+        pending = self._sessions.get_pending_confirmation_by_id(
+            surgery_id, confirmation_id
+        )
+        if pending is None:
+            self._emit_voice_trace(
+                source="text",
+                status="confirmation_not_found",
+                surgery_id=surgery_id,
+                confirmation_id=confirmation_id,
+                error_message="未找到该待确认项或已处理。",
+            )
+            raise SurgeryPipelineError(
+                "CONFIRMATION_NOT_FOUND",
+                "未找到该待确认项或已处理。",
+            )
+
+        option_labels = [a.strip() for a, _ in pending.options if a.strip()]
+        options_snapshot = json.dumps(
+            [{"label": a, "confidence": b} for a, b in pending.options],
+            ensure_ascii=False,
+        )
+
+        text = (recognized_text or "").strip()
+        if not text:
+            await self._persist_audit(
+                surgery_id=surgery_id,
+                confirmation_id=confirmation_id,
+                status="client_stt_empty",
+                audio_object_key=None,
+                audio_content_type=None,
+                audio_size_bytes=None,
+                audio_sha256=None,
+                asr_text=None,
+                resolved_label=None,
+                options_snapshot_json=options_snapshot,
+                error_message="客户端识别文本为空",
+            )
+            self._sessions.record_voice_trace(surgery_id, asr_text=None, error="empty text")
+            self._emit_voice_trace(
+                source="text",
+                status="client_stt_empty",
+                surgery_id=surgery_id,
+                confirmation_id=confirmation_id,
+                error_message="客户端识别文本为空",
+            )
+            raise SurgeryPipelineError("VOICE_TEXT_EMPTY", "recognized_text 为空。")
+
+        self._sessions.record_voice_trace(surgery_id, asr_text=text, error=None)
+
+        rejected = is_rejection_phrase(text)
+        chosen: str | None = None
+        if not rejected:
+            chosen = parse_voice_choice(text, option_labels)
+            if chosen is None:
+                surgery_candidates = self._sessions.get_surgery_candidate_consumables(
+                    surgery_id
+                )
+                chosen = match_voice_choice_against_candidates(text, surgery_candidates)
+
+        if not rejected and not chosen:
+            _, retry_remaining = await self._sessions.record_voice_parse_failure(
+                surgery_id, confirmation_id
+            )
+            base = (
+                "无法从文本中匹配候选项或本台手术候选清单中的耗材名称，"
+                "请重试或说「不是」否认全部。"
+            )
+            if retry_remaining > 0:
+                msg = (
+                    f"{base} 本次未能解析，"
+                    f"您还可重试 {retry_remaining} 次，"
+                    "请输入「第一个」「第二个」等或候选项全名。"
+                )
+            else:
+                msg = (
+                    f"{base} 本轮重试机会已用完，"
+                    "请再输入序号/全名，或说「不是」否认全部。"
+                )
+            await self._persist_audit(
+                surgery_id=surgery_id,
+                confirmation_id=confirmation_id,
+                status="client_stt_parse_failed",
+                audio_object_key=None,
+                audio_content_type=None,
+                audio_size_bytes=None,
+                audio_sha256=None,
+                asr_text=text,
+                resolved_label=None,
+                options_snapshot_json=options_snapshot,
+                error_message=msg,
+            )
+            self._sessions.record_voice_trace(surgery_id, asr_text=text, error=msg)
+            self._emit_voice_trace(
+                source="text",
+                status="client_stt_parse_failed",
+                surgery_id=surgery_id,
+                confirmation_id=confirmation_id,
+                asr_text=text,
+                error_message=msg,
+            )
+            raise SurgeryPipelineError(
+                "VOICE_PARSE_FAILED",
+                msg,
+                extra={
+                    "confirmation_id": confirmation_id,
+                    "retry_remaining": retry_remaining,
+                },
+            )
+
+        await self._sessions.resolve_pending_confirmation(
+            surgery_id,
+            confirmation_id,
+            chosen_label=chosen,
+            rejected=rejected,
+        )
+
+        final_status = "rejected" if rejected else "recognized"
+        await self._persist_audit(
+            surgery_id=surgery_id,
+            confirmation_id=confirmation_id,
+            status=final_status,
+            audio_object_key=None,
+            audio_content_type=None,
+            audio_size_bytes=None,
+            audio_sha256=None,
+            asr_text=text,
+            resolved_label=chosen if not rejected else None,
+            options_snapshot_json=options_snapshot,
+            error_message=None,
+        )
+        self._emit_voice_trace(
+            source="text",
+            status=final_status,
+            surgery_id=surgery_id,
+            confirmation_id=confirmation_id,
+            asr_text=text,
+            resolved_label=chosen if not rejected else None,
+            rejected=rejected,
+        )
+
+        if rejected:
+            return VoiceResolveResult(
+                resolved_label=None,
+                rejected=True,
+                asr_text=text,
+                audio_object_key=None,
+                message="已否认全部候选，未记消耗。",
+            )
+        return VoiceResolveResult(
+            resolved_label=chosen,
+            rejected=False,
+            asr_text=text,
+            audio_object_key=None,
+            message="已确认并记一条消耗。",
+        )
+
+    async def list_voice_audits_for_surgery(
+        self,
+        surgery_id: str,
+        *,
+        limit: int = 50,
+        offset: int = 0,
+    ) -> tuple[list[VoiceConfirmationAudit], int]:
+        """从 `voice_confirmation_audits` 表分页读取，供内部查询与报表。"""
+        async with AsyncSessionLocal() as session:
+            return await self._audits.list_by_surgery(
+                session,
+                surgery_id,
+                limit=limit,
+                offset=offset,
+            )
+
    async def _persist_audit(
        self,
        *,