feat: 语音确认、联调与运维增强

- 语音：序数解析（第一个/第二个等）、解析失败计数与 API detail.retry_remaining；百度 ASR 固定 dev_pid 为普通话；SurgeryPipelineError 支持 extra 并入 HTTP detail。 - Demo：demo 路由与假 RTSP、客户端 index 与 README；BackendResolver 与配置调整。 - 可观测：消耗 TSV 日志、语音文件日志、终端 Markdown 辅助；相关测试与依赖更新。 - 注意：.env 仍被 gitignore，本地密钥不会进入本提交。 Made-with: Cursor
2026-04-23 14:24:20 +08:00
parent 42720f81cf
commit 0c05463617
39 changed files with 3030 additions and 143 deletions
--- a/app/services/video/backend_resolver.py
+++ b/app/services/video/backend_resolver.py
@@ -21,7 +21,6 @@ class BackendResolver:
    ) -> None:
        self._s = settings
        self._hik = hikvision_runtime
-        self._rtsp_urls_map = settings.video_rtsp_url_map()

    def _parse_json_object(self, raw: str) -> dict[str, Any]:
        raw = (raw or "").strip()
@@ -55,8 +54,10 @@ class BackendResolver:
        return VideoBackendKind.RTSP

    def rtsp_url_for_camera(self, camera_id: str) -> str:
-        if camera_id in self._rtsp_urls_map:
-            return self._rtsp_urls_map[camera_id]
+        # Re-read on each use so VIDEO_RTSP_URLS_JSON_FILE can be hot-updated (e.g. dev orchestrator).
+        m = self._s.video_rtsp_url_map()
+        if camera_id in m:
+            return m[camera_id]
        tpl = (self._s.video_rtsp_url_template or "").strip()
        if tpl:
            try:
--- a/app/services/video/session_manager.py
+++ b/app/services/video/session_manager.py
@@ -26,6 +26,8 @@ from app.services.video.backend_resolver import BackendResolver
 from app.services.video.hikvision_runtime import HikvisionInitRefCount, HikvisionRuntime
 from app.services.video.rtsp_capture import RtspCapture
 from app.services.video.types import VideoBackendKind
+from app.services.consumption_tsv_log import append_consumption_window, init_consumption_log_file
+from app.services.voice_file_log import init_voice_log_file
 from app.services.voice_confirm import build_prompt_text
 from app.surgery_errors import SurgeryPipelineError

@@ -41,6 +43,8 @@ class PendingConsumableConfirmation:
    created_at: datetime
    model_top1_label: str
    model_top1_confidence: float
+    #: 本轮待确认在解析失败时累计次数（首败 + 重试），供 API 计算 retry_remaining。
+    voice_parse_failures: int = 0


@dataclass
@@ -49,6 +53,8 @@ class CameraStreamInferState:

    votes: list[tuple[float, str, ClsTop3]] = field(default_factory=list)
    stream_t0: float | None = None
+    #: 与 `stream_t0` 同一次初始化时的 `time.time()`，与 monotonic 流逝秒相加得到墙钟时间戳
+    stream_wall_start: float | None = None
    next_bucket: int = 0


@@ -258,6 +264,8 @@ class CameraSessionManager:
            )

        run = RunningSurgery(stop_event=stop_event, state=state, tasks=tasks)
+        init_consumption_log_file(surgery_id)
+        init_voice_log_file(surgery_id, self._s)
        async with self._manager_lock:
            self._active[surgery_id] = run

@@ -408,6 +416,22 @@ class CameraSessionManager:
            return []
        return list(self._active[surgery_id].state.candidate_consumables)

+    async def record_voice_parse_failure(
+        self, surgery_id: str, confirmation_id: str
+    ) -> tuple[int, int]:
+        """解析失败时累加计数，返回 (当前失败次数, 距上限还剩几次「重试机会」)。"""
+        if surgery_id not in self._active:
+            return 0, 0
+        st = self._active[surgery_id].state
+        max_r = int(self._s.voice_confirm_max_failed_parse_rounds)
+        async with st.lock:
+            p = st.pending_by_id.get(confirmation_id)
+            if p is None or p.status != "pending":
+                return 0, 0
+            p.voice_parse_failures += 1
+            remaining = max(0, max_r - p.voice_parse_failures)
+            return p.voice_parse_failures, remaining
+
    def next_pending_confirmation(
        self, surgery_id: str
    ) -> PendingConsumableConfirmation | None:
@@ -622,6 +646,19 @@ class CameraSessionManager:
                if snap is None:
                    continue

+                if self._s.video_log_inference_results:
+                    logger.info(
+                        "Vision result surgery={} camera={} top1={}({:.3f}) top2={}({:.3f}) top3={}({:.3f})",
+                        surgery_id,
+                        camera_id,
+                        snap.t1_name,
+                        snap.t1_conf,
+                        snap.t2_name,
+                        snap.t2_conf,
+                        snap.t3_name,
+                        snap.t3_conf,
+                    )
+
                wsec = self._s.consumable_vision_window_sec
                pending_preds: list[PredictionResult] = []
                async with state.lock:
@@ -630,6 +667,7 @@ class CameraSessionManager:
                    )
                    if cis.stream_t0 is None:
                        cis.stream_t0 = time.monotonic()
+                        cis.stream_wall_start = time.time()
                    t_rel = time.monotonic() - cis.stream_t0
                    cis.votes.append((t_rel, snap.t1_name, snap))
                    current_b = int(t_rel // wsec)
@@ -648,7 +686,19 @@ class CameraSessionManager:
                        if not bucket_pts:
                            continue
                        best = window_bucket_to_best_snap(bucket_pts)
-                        if best is not None:
+                        if best is not None and cis.stream_wall_start is not None:
+                            if self._s.consumption_tsv_log_enabled or self._s.consumption_log_markdown_terminal:
+                                wall_lo = cis.stream_wall_start + lo
+                                wall_hi = cis.stream_wall_start + hi
+                                append_consumption_window(
+                                    surgery_id=surgery_id,
+                                    name_to_code=state.name_to_code,
+                                    best=best,
+                                    doctor_id=self._s.video_result_doctor_id,
+                                    camera_id=camera_id,
+                                    wall_start_epoch=wall_lo,
+                                    wall_end_epoch=wall_hi,
+                                )
                            pending_preds.append(
                                cls_top3_to_prediction_result(best)
                            )