feat: 语音确认、联调与运维增强

- 语音：序数解析（第一个/第二个等）、解析失败计数与 API detail.retry_remaining；百度 ASR 固定 dev_pid 为普通话；SurgeryPipelineError 支持 extra 并入 HTTP detail。 - Demo：demo 路由与假 RTSP、客户端 index 与 README；BackendResolver 与配置调整。 - 可观测：消耗 TSV 日志、语音文件日志、终端 Markdown 辅助；相关测试与依赖更新。 - 注意：.env 仍被 gitignore，本地密钥不会进入本提交。 Made-with: Cursor
2026-04-23 14:24:20 +08:00
parent 42720f81cf
commit 0c05463617
39 changed files with 3030 additions and 143 deletions
--- a/tests/test_api_contract.py
+++ b/tests/test_api_contract.py
@@ -193,6 +193,60 @@ def test_resolve_non_wav_422(api_app: FastAPI) -> None:
    assert r.status_code == 422


+def test_prompt_audio_200(api_app: FastAPI) -> None:
+    pipeline = MagicMock()
+    pipeline.get_pending_prompt_audio_mp3 = AsyncMock(return_value=b"\xff\xfb\x90")
+    api_app.dependency_overrides[get_surgery_pipeline] = lambda: pipeline
+    client = TestClient(api_app)
+    r = client.get("/client/surgeries/123456/pending-confirmation/cid1/prompt-audio")
+    assert r.status_code == 200
+    assert r.content == b"\xff\xfb\x90"
+    assert "mpeg" in (r.headers.get("content-type") or "")
+    pipeline.get_pending_prompt_audio_mp3.assert_awaited_once_with(
+        surgery_id="123456",
+        confirmation_id="cid1",
+    )
+
+
+def test_resolve_text_200(api_app: FastAPI) -> None:
+    pipeline = MagicMock()
+    pipeline.resolve_pending_confirmation_from_client_text = AsyncMock(
+        return_value=VoiceResolveResult(
+            resolved_label="纱布",
+            rejected=False,
+            asr_text="第一个",
+            audio_object_key=None,
+            message="ok",
+        )
+    )
+    api_app.dependency_overrides[get_surgery_pipeline] = lambda: pipeline
+    client = TestClient(api_app)
+    r = client.post(
+        "/client/surgeries/123456/pending-confirmation/cid/resolve-text",
+        json={"recognized_text": "第一个"},
+    )
+    assert r.status_code == 200
+    body = r.json()
+    assert body["resolved_label"] == "纱布"
+    assert body["asr_text"] == "第一个"
+    pipeline.resolve_pending_confirmation_from_client_text.assert_awaited_once()
+
+
+def test_resolve_text_maps_surgery_error(api_app: FastAPI) -> None:
+    pipeline = MagicMock()
+    pipeline.resolve_pending_confirmation_from_client_text = AsyncMock(
+        side_effect=SurgeryPipelineError("VOICE_PARSE_FAILED", "无法匹配")
+    )
+    api_app.dependency_overrides[get_surgery_pipeline] = lambda: pipeline
+    client = TestClient(api_app)
+    r = client.post(
+        "/client/surgeries/123456/pending-confirmation/cid/resolve-text",
+        json={"recognized_text": "随便说说"},
+    )
+    assert r.status_code == 422
+    assert r.json()["detail"]["code"] == "VOICE_PARSE_FAILED"
+
+
 def test_resolve_200(api_app: FastAPI) -> None:
    pipeline = MagicMock()
    pipeline.resolve_pending_confirmation_from_audio = AsyncMock(
@@ -255,3 +309,22 @@ def test_internal_voice_status_404_and_200(api_app: FastAPI) -> None:
    r2 = client2.get("/internal/surgeries/123456/voice-status")
    assert r2.status_code == 200
    assert r2.json()["pending_queue_approx"] == 2
+
+
+def test_internal_voice_audits_200_empty(api_app: FastAPI) -> None:
+    pipeline = MagicMock()
+    pipeline.list_voice_audits = AsyncMock(return_value=([], 0))
+    api_app.dependency_overrides[get_surgery_pipeline] = lambda: pipeline
+    client = TestClient(api_app)
+    r = client.get(
+        "/internal/surgeries/123456/voice-audits",
+        params={"limit": 1, "offset": 0},
+    )
+    assert r.status_code == 200
+    j = r.json()
+    assert j["surgery_id"] == "123456"
+    assert j["total"] == 0
+    assert j["limit"] == 1
+    assert j["offset"] == 0
+    assert j["items"] == []
+    pipeline.list_voice_audits.assert_awaited_once_with("123456", limit=1, offset=0)
--- a/tests/test_consumption_tsv_log.py
+++ b/tests/test_consumption_tsv_log.py
@@ -0,0 +1,123 @@
+"""consumption_log.txt 兼容 TSV 格式。"""
+
+import pytest
+
+from app.config import settings
+from app.services.consumable_vision_algorithm import ClsTop3
+from app.services.consumption_tsv_log import (
+    HEADER,
+    _RANGE_SEP,
+    append_consumption_tsv_line,
+    build_consumption_markdown,
+    build_tsv_line,
+    init_consumption_log_file,
+    short_camera_label,
+)
+
+
+def test_short_camera_label() -> None:
+    assert short_camera_label("or-cam-01") == "cam01"
+    assert short_camera_label("or-cam-2") == "cam02"
+
+
+def test_build_tsv_line_matches_sample_shape(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.setattr(settings, "consumption_log_timezone", "UTC")
+    best = ClsTop3(
+        t1_name="一次性医用灭菌棉签",
+        t1_conf=0.9997,
+        t2_name="cls2",
+        t2_conf=0.0003,
+        t3_name="cls3",
+        t3_conf=0.0002,
+        t1_pid="2237844",
+        t2_pid="11765-1-101",
+        t3_pid="21504-1-1",
+    )
+    # 墙钟：拉流起点对齐到 2024-01-01T00:00:00Z，时间窗 +0s…+45s
+    w0 = 1704067200.0
+    line = build_tsv_line(
+        name_to_code={},
+        best=best,
+        doctor_id="DOCTOR_PLACEHOLDER",
+        camera_id="or-cam-01",
+        wall_start_epoch=w0,
+        wall_end_epoch=w0 + 45.0,
+    )
+    parts = line.rstrip("\n").split("\t")
+    assert len(parts) == 7
+    assert parts[0] == "2237844"
+    assert parts[1] == "一次性医用灭菌棉签 0.9997"
+    assert parts[2] == "cls2"
+    assert parts[3] == "cls3"
+    assert parts[4] == "1"
+    assert parts[5] == "DOCTOR_PLACEHOLDER"
+    assert (
+        parts[6]
+        == "cam01@2024-01-01T00:00:00.000+00:00"
+        + _RANGE_SEP
+        + "2024-01-01T00:00:45.000+00:00"
+    )
+
+
+def test_header_columns() -> None:
+    cols = HEADER.strip().split("\t")
+    assert cols[0] == "物品id"
+    assert cols[-1] == "时间戳"
+
+
+def test_per_surgery_file_init_and_append(
+    tmp_path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setattr(settings, "consumption_tsv_log_enabled", True)
+    monkeypatch.setattr(
+        settings,
+        "consumption_tsv_log_path",
+        str(tmp_path / "{surgery_id}.txt"),
+    )
+    init_consumption_log_file("or-001")
+    append_consumption_tsv_line("or-001", "row1\n")
+    append_consumption_tsv_line("or-001", "row2\n")
+    p = tmp_path / "or-001.txt"
+    assert p.read_text(encoding="utf-8") == HEADER + "row1\n" + "row2\n"
+    init_consumption_log_file("or-001")
+    assert p.read_text(encoding="utf-8") == HEADER
+
+
+def test_build_consumption_markdown_top123_columns(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.setattr(settings, "consumption_log_timezone", "UTC")
+    best = ClsTop3(
+        t1_name="一次性医用灭菌棉签",
+        t1_conf=0.9997,
+        t2_name="cls2",
+        t2_conf=0.0003,
+        t3_name="cls3",
+        t3_conf=0.0002,
+        t1_pid="2237844",
+        t2_pid="11765-1-101",
+        t3_pid="21504-1-1",
+    )
+    w0 = 1704067200.0
+    md = build_consumption_markdown(
+        name_to_code={},
+        best=best,
+        doctor_id="DOCTOR_PLACEHOLDER",
+        camera_id="or-cam-01",
+        wall_start_epoch=w0,
+        wall_end_epoch=w0 + 45.0,
+    )
+    assert "Top1 物品id" in md and "Top1 物品名称" in md and "Top1 置信度" in md
+    assert "Top2 物品名称" in md and "Top3 物品名称" in md
+    assert "Top2 物品id" not in md
+    assert "2237844" in md
+    assert "一次性医用灭菌棉签" in md
+    assert "0.9997" in md
+    assert "cls2" in md and "cls3" in md
+    assert "11765-1-101" not in md and "21504-1-1" not in md
+    assert "0.0003" not in md and "0.0002" not in md
+    assert "DOCTOR_PLACEHOLDER" in md
+    assert "| 1 |" in md
+    # 终端为可读时间戳，非落盘用 ISO@cam
+    assert "2024-01-01 00:00:00.000" in md and "2024-01-01 00:00:45.000" in md
+    assert "cam01" in md and " · " in md and _RANGE_SEP in md
+    assert "cam01@2024-01" not in md
--- a/tests/test_session_manager_unit.py
+++ b/tests/test_session_manager_unit.py
@@ -156,9 +156,7 @@ async def test_handle_skips_when_candidate_list_empty() -> None:
        confidence=0.99,
        topk=[PredictionCandidate(label="纱布", confidence=0.99)],
    )
-    await mgr._handle_classification_result(
-        state=state, cls_res=res
-    )
+    await mgr._handle_classification_result(state=state, cls_res=res)
    assert state.details == []
    assert state.pending_fifo == []

@@ -199,9 +197,7 @@ async def test_handle_skips_below_voice_floor() -> None:
        confidence=0.4,
        topk=[PredictionCandidate(label="纱布", confidence=0.4)],
    )
-    await mgr._handle_classification_result(
-        state=state, cls_res=res
-    )
+    await mgr._handle_classification_result(state=state, cls_res=res)
    assert state.details == []
    assert state.pending_fifo == []

@@ -221,9 +217,7 @@ async def test_handle_auto_vision_confirm() -> None:
        confidence=0.99,
        topk=[PredictionCandidate(label="纱布", confidence=0.99)],
    )
-    await mgr._handle_classification_result(
-        state=state, cls_res=res
-    )
+    await mgr._handle_classification_result(state=state, cls_res=res)
    assert len(state.details) == 1
    assert state.details[0].source == "vision"
    assert state.details[0].item_id == "纱布"
@@ -247,9 +241,7 @@ async def test_handle_high_conf_top1_not_in_candidates_enqueues_pending() -> Non
            PredictionCandidate(label="缝线", confidence=0.2),
        ],
    )
-    await mgr._handle_classification_result(
-        state=state, cls_res=res
-    )
+    await mgr._handle_classification_result(state=state, cls_res=res)
    assert state.details == []
    assert len(state.pending_fifo) == 1
    pid = state.pending_fifo[0]
@@ -276,9 +268,7 @@ async def test_handle_mid_confidence_enqueues_pending() -> None:
            PredictionCandidate(label="缝线", confidence=0.3),
        ],
    )
-    await mgr._handle_classification_result(
-        state=state, cls_res=res
-    )
+    await mgr._handle_classification_result(state=state, cls_res=res)
    assert len(state.pending_fifo) == 1


@@ -299,9 +289,7 @@ async def test_handle_voice_disabled_no_pending_for_mid_conf() -> None:
        confidence=0.5,
        topk=[PredictionCandidate(label="纱布", confidence=0.5)],
    )
-    await mgr._handle_classification_result(
-        state=state, cls_res=res
-    )
+    await mgr._handle_classification_result(state=state, cls_res=res)
    assert state.pending_fifo == []
    assert state.details == []

@@ -322,12 +310,8 @@ async def test_handle_vision_cooldown_skips_duplicate() -> None:
        confidence=0.99,
        topk=[PredictionCandidate(label="纱布", confidence=0.99)],
    )
-    await mgr._handle_classification_result(
-        state=state, cls_res=res
-    )
-    await mgr._handle_classification_result(
-        state=state, cls_res=res
-    )
+    await mgr._handle_classification_result(state=state, cls_res=res)
+    await mgr._handle_classification_result(state=state, cls_res=res)
    assert len(state.details) == 1


@@ -350,12 +334,8 @@ async def test_handle_pending_dedupe_cooldown() -> None:
            PredictionCandidate(label="缝线", confidence=0.2),
        ],
    )
-    await mgr._handle_classification_result(
-        state=state, cls_res=res
-    )
-    await mgr._handle_classification_result(
-        state=state, cls_res=res
-    )
+    await mgr._handle_classification_result(state=state, cls_res=res)
+    await mgr._handle_classification_result(state=state, cls_res=res)
    assert len(state.pending_fifo) == 1


--- a/tests/test_voice_audit_repository.py
+++ b/tests/test_voice_audit_repository.py
@@ -1,5 +1,6 @@
 from __future__ import annotations

+import asyncio
 import json

 import pytest
@@ -55,3 +56,50 @@ async def test_save_audit_persists_fields(db_session: AsyncSession) -> None:
    assert r.resolved_label == "纱布"
    assert r.options_snapshot_json == opts
    assert r.error_message is None
+
+
+@pytest.mark.asyncio
+async def test_list_by_surgery_order_and_total(db_session: AsyncSession) -> None:
+    repo = VoiceAuditRepository()
+    async with db_session.begin():
+        await repo.save_audit(
+            db_session,
+            surgery_id="111111",
+            confirmation_id="a",
+            status="parse_failed",
+            audio_object_key=None,
+            audio_content_type=None,
+            audio_size_bytes=None,
+            audio_sha256=None,
+            asr_text="糊",
+            resolved_label=None,
+            options_snapshot_json="[]",
+            error_message="x",
+        )
+    await asyncio.sleep(0.02)
+    async with db_session.begin():
+        await repo.save_audit(
+            db_session,
+            surgery_id="111111",
+            confirmation_id="b",
+            status="recognized",
+            audio_object_key="k.wav",
+            audio_content_type="audio/wav",
+            audio_size_bytes=10,
+            audio_sha256="b" * 64,
+            asr_text="纱布",
+            resolved_label="纱布",
+            options_snapshot_json="[]",
+            error_message=None,
+        )
+    async with db_session.begin():
+        rows, total = await repo.list_by_surgery(db_session, "111111", limit=10, offset=0)
+    assert total == 2
+    assert [r.confirmation_id for r in rows] == ["b", "a"]
+    async with db_session.begin():
+        page2, total2 = await repo.list_by_surgery(
+            db_session, "111111", limit=1, offset=1
+        )
+    assert total2 == 2
+    assert len(page2) == 1
+    assert page2[0].confirmation_id == "a"
--- a/tests/test_voice_confirm.py
+++ b/tests/test_voice_confirm.py
@@ -13,6 +13,21 @@ def test_parse_voice_choice_numeric() -> None:
    assert parse_voice_choice("第2个", ["纱布", "缝线", "钳子"]) == "缝线"


+def test_parse_voice_choice_ordinal_chinese() -> None:
+    opts = ["纱布", "缝线", "钳子"]
+    assert parse_voice_choice("第一个", opts) == "纱布"
+    assert parse_voice_choice("第一个。", opts) == "纱布"
+    assert parse_voice_choice("第2个", opts) == "缝线"
+    assert parse_voice_choice("第二", opts) == "缝线"
+    assert parse_voice_choice("选3", opts) == "钳子"
+    assert parse_voice_choice("选项2", ["纱布", "缝线"]) == "缝线"
+
+
+def test_parse_voice_choice_single_chinese_digit_with_few_options() -> None:
+    assert parse_voice_choice("一", ["纱布", "缝线"]) == "纱布"
+    assert parse_voice_choice("两", ["纱布", "缝线"]) == "缝线"
+
+
 def test_parse_voice_choice_negative() -> None:
    assert parse_voice_choice("不是", ["纱布", "缝线"]) is None

--- a/tests/test_voice_file_log.py
+++ b/tests/test_voice_file_log.py
@@ -0,0 +1,62 @@
+"""Tests for voice TSV + emit_voice_event (path + TSV line)."""
+
+from __future__ import annotations
+
+import tempfile
+from pathlib import Path
+
+from app.config import Settings
+from app.services.voice_file_log import (
+    append_voice_tsv_line,
+    emit_voice_event,
+    init_voice_log_file,
+    resolved_voice_log_path,
+)
+
+
+def test_resolved_voice_log_path_replaces_surgery_id() -> None:
+    s = Settings()
+    s.voice_file_log_path = "logs/voice_{surgery_id}.txt"
+    p = resolved_voice_log_path("123456", s)
+    assert p.name == "voice_123456.txt"
+    assert "logs" in str(p)
+
+
+def test_init_and_append_tsv() -> None:
+    with tempfile.TemporaryDirectory() as d:
+        base = Path(d)
+        s = Settings()
+        s.voice_file_log_enabled = True
+        s.voice_file_log_path = str((base / "v_{surgery_id}.txt").resolve())
+        init_voice_log_file("999999", s)
+        p = resolved_voice_log_path("999999", s)
+        assert p.exists()
+        h = p.read_text(encoding="utf-8")
+        assert "来源" in h and "confirmation_id" in h
+        line = "ts\ttest\trecognized\tcid1\t同\t品\tfalse\t\tk.wav\n"
+        append_voice_tsv_line("999999", line, s)
+        assert p.read_text(encoding="utf-8").endswith(line)
+
+
+def test_emit_voice_event_writes_when_enabled() -> None:
+    s = Settings()
+    s.voice_file_log_enabled = True
+    with tempfile.TemporaryDirectory() as d:
+        s.voice_file_log_path = str((Path(d) / "v_{surgery_id}.txt").resolve())
+        init_voice_log_file("111111", s)
+        emit_voice_event(
+            s,
+            surgery_id="111111",
+            source="wav",
+            status="recognized",
+            confirmation_id="c1",
+            asr_text="纱布",
+            resolved_label="纱布",
+            rejected=False,
+            audio_object_key="k.wav",
+        )
+        p = resolved_voice_log_path("111111", s)
+        body = p.read_text(encoding="utf-8")
+        assert "纱布" in body
+        assert "recognized" in body
+        assert "k.wav" in body