refactor: 统一耗材视觉算法并扩展语音确认至全量候选清单

- 以 ConsumableVisionAlgorithmService 替代 consumable_classifier 与 tear_action；可选手部检测权重，未配置时全帧分类；时间窗众数与 Excel 白名单配置。 - 语音待确认：ASR 先匹配 pending topk，再匹配本台 candidate_consumables；记账 item_id 与 vision 一致使用 name_to_code。 - 更新 config、Compose、.env.example、依赖（pandas/openpyxl）与测试。 Made-with: Cursor
2026-04-22 16:31:12 +08:00
parent 4c4550d58b
commit 132702aea9
18 changed files with 791 additions and 476 deletions
--- a/tests/test_session_manager_unit.py
+++ b/tests/test_session_manager_unit.py
@@ -7,7 +7,10 @@ from unittest.mock import MagicMock
 import pytest

 from app.config import Settings
-from app.services.consumable_classifier import PredictionCandidate, PredictionResult
+from app.services.consumable_vision_algorithm import (
+    PredictionCandidate,
+    PredictionResult,
+)
 from app.surgery_errors import SurgeryPipelineError
 from app.services.video.session_manager import (
    CameraSessionManager,
@@ -21,8 +24,7 @@ def test_live_consumption_requires_non_empty_details() -> None:
    settings = Settings()
    mgr = CameraSessionManager(
        settings=settings,
-        consumable_classifier=MagicMock(),
-        tear_action=MagicMock(),
+        vision_algorithm=MagicMock(),
        hikvision_runtime=None,
        result_repository=None,
    )
@@ -33,13 +35,50 @@ def test_live_consumption_requires_non_empty_details() -> None:
    assert mgr.live_consumption_if_active("123456") is None


+@pytest.mark.asyncio
+async def test_resolve_voice_accepts_label_on_surgery_list_not_in_topk_options() -> None:
+    settings = Settings()
+    mgr = CameraSessionManager(
+        settings=settings,
+        vision_algorithm=MagicMock(),
+        hikvision_runtime=None,
+        result_repository=None,
+    )
+    st = SurgerySessionState(
+        candidate_consumables=["纱布", "止血钳"],
+        name_to_code={"纱布": "P1", "止血钳": "P2"},
+    )
+    pid = "test-confirm-id"
+    st.pending_by_id[pid] = PendingConsumableConfirmation(
+        id=pid,
+        status="pending",
+        options=[("纱布", 0.4)],
+        prompt_text="请确认",
+        created_at=datetime.now(timezone.utc),
+        model_top1_label="unknown",
+        model_top1_confidence=0.41,
+    )
+    st.pending_fifo.append(pid)
+    mgr._active["123456"] = RunningSurgery(
+        stop_event=asyncio.Event(), state=st, tasks=[]
+    )
+
+    await mgr.resolve_pending_confirmation(
+        "123456", pid, chosen_label="止血钳", rejected=False
+    )
+
+    assert len(st.details) == 1
+    assert st.details[0].item_id == "P2"
+    assert st.details[0].item_name == "止血钳"
+    assert st.details[0].source == "voice"
+
+
@pytest.mark.asyncio
 async def test_resolve_pending_appends_voice_detail() -> None:
    settings = Settings()
    mgr = CameraSessionManager(
        settings=settings,
-        consumable_classifier=MagicMock(),
-        tear_action=MagicMock(),
+        vision_algorithm=MagicMock(),
        hikvision_runtime=None,
        result_repository=None,
    )
@@ -74,8 +113,7 @@ async def test_resolve_reject_closes_without_detail() -> None:
    settings = Settings()
    mgr = CameraSessionManager(
        settings=settings,
-        consumable_classifier=MagicMock(),
-        tear_action=MagicMock(),
+        vision_algorithm=MagicMock(),
        hikvision_runtime=None,
        result_repository=None,
    )
@@ -108,8 +146,7 @@ async def test_handle_skips_when_candidate_list_empty() -> None:
    settings = Settings()
    mgr = CameraSessionManager(
        settings=settings,
-        consumable_classifier=MagicMock(),
-        tear_action=MagicMock(),
+        vision_algorithm=MagicMock(),
        hikvision_runtime=None,
        result_repository=None,
    )
@@ -120,7 +157,7 @@ async def test_handle_skips_when_candidate_list_empty() -> None:
        topk=[PredictionCandidate(label="纱布", confidence=0.99)],
    )
    await mgr._handle_classification_result(
-        state=state, cls_res=res, tear_label=""
+        state=state, cls_res=res
    )
    assert state.details == []
    assert state.pending_fifo == []
@@ -131,8 +168,7 @@ async def test_archive_retry_loop_starts() -> None:
    settings = Settings()
    mgr = CameraSessionManager(
        settings=settings,
-        consumable_classifier=MagicMock(),
-        tear_action=MagicMock(),
+        vision_algorithm=MagicMock(),
        hikvision_runtime=None,
        result_repository=None,
    )
@@ -153,8 +189,7 @@ async def test_handle_skips_below_voice_floor() -> None:
    settings.video_voice_confirm_min_confidence = 0.5
    mgr = CameraSessionManager(
        settings=settings,
-        consumable_classifier=MagicMock(),
-        tear_action=MagicMock(),
+        vision_algorithm=MagicMock(),
        hikvision_runtime=None,
        result_repository=None,
    )
@@ -165,7 +200,7 @@ async def test_handle_skips_below_voice_floor() -> None:
        topk=[PredictionCandidate(label="纱布", confidence=0.4)],
    )
    await mgr._handle_classification_result(
-        state=state, cls_res=res, tear_label=""
+        state=state, cls_res=res
    )
    assert state.details == []
    assert state.pending_fifo == []
@@ -176,8 +211,7 @@ async def test_handle_auto_vision_confirm() -> None:
    settings = Settings()
    mgr = CameraSessionManager(
        settings=settings,
-        consumable_classifier=MagicMock(),
-        tear_action=MagicMock(),
+        vision_algorithm=MagicMock(),
        hikvision_runtime=None,
        result_repository=None,
    )
@@ -188,7 +222,7 @@ async def test_handle_auto_vision_confirm() -> None:
        topk=[PredictionCandidate(label="纱布", confidence=0.99)],
    )
    await mgr._handle_classification_result(
-        state=state, cls_res=res, tear_label=""
+        state=state, cls_res=res
    )
    assert len(state.details) == 1
    assert state.details[0].source == "vision"
@@ -200,8 +234,7 @@ async def test_handle_high_conf_top1_not_in_candidates_enqueues_pending() -> Non
    settings = Settings()
    mgr = CameraSessionManager(
        settings=settings,
-        consumable_classifier=MagicMock(),
-        tear_action=MagicMock(),
+        vision_algorithm=MagicMock(),
        hikvision_runtime=None,
        result_repository=None,
    )
@@ -215,7 +248,7 @@ async def test_handle_high_conf_top1_not_in_candidates_enqueues_pending() -> Non
        ],
    )
    await mgr._handle_classification_result(
-        state=state, cls_res=res, tear_label=""
+        state=state, cls_res=res
    )
    assert state.details == []
    assert len(state.pending_fifo) == 1
@@ -230,8 +263,7 @@ async def test_handle_mid_confidence_enqueues_pending() -> None:
    settings.video_voice_confirm_min_confidence = 0.3
    mgr = CameraSessionManager(
        settings=settings,
-        consumable_classifier=MagicMock(),
-        tear_action=MagicMock(),
+        vision_algorithm=MagicMock(),
        hikvision_runtime=None,
        result_repository=None,
    )
@@ -245,7 +277,7 @@ async def test_handle_mid_confidence_enqueues_pending() -> None:
        ],
    )
    await mgr._handle_classification_result(
-        state=state, cls_res=res, tear_label=""
+        state=state, cls_res=res
    )
    assert len(state.pending_fifo) == 1

@@ -257,8 +289,7 @@ async def test_handle_voice_disabled_no_pending_for_mid_conf() -> None:
    settings.video_auto_confirm_confidence = 0.8
    mgr = CameraSessionManager(
        settings=settings,
-        consumable_classifier=MagicMock(),
-        tear_action=MagicMock(),
+        vision_algorithm=MagicMock(),
        hikvision_runtime=None,
        result_repository=None,
    )
@@ -269,7 +300,7 @@ async def test_handle_voice_disabled_no_pending_for_mid_conf() -> None:
        topk=[PredictionCandidate(label="纱布", confidence=0.5)],
    )
    await mgr._handle_classification_result(
-        state=state, cls_res=res, tear_label=""
+        state=state, cls_res=res
    )
    assert state.pending_fifo == []
    assert state.details == []
@@ -281,8 +312,7 @@ async def test_handle_vision_cooldown_skips_duplicate() -> None:
    settings.video_detail_cooldown_sec = 3600.0
    mgr = CameraSessionManager(
        settings=settings,
-        consumable_classifier=MagicMock(),
-        tear_action=MagicMock(),
+        vision_algorithm=MagicMock(),
        hikvision_runtime=None,
        result_repository=None,
    )
@@ -293,10 +323,10 @@ async def test_handle_vision_cooldown_skips_duplicate() -> None:
        topk=[PredictionCandidate(label="纱布", confidence=0.99)],
    )
    await mgr._handle_classification_result(
-        state=state, cls_res=res, tear_label=""
+        state=state, cls_res=res
    )
    await mgr._handle_classification_result(
-        state=state, cls_res=res, tear_label=""
+        state=state, cls_res=res
    )
    assert len(state.details) == 1

@@ -307,8 +337,7 @@ async def test_handle_pending_dedupe_cooldown() -> None:
    settings.video_detail_cooldown_sec = 3600.0
    mgr = CameraSessionManager(
        settings=settings,
-        consumable_classifier=MagicMock(),
-        tear_action=MagicMock(),
+        vision_algorithm=MagicMock(),
        hikvision_runtime=None,
        result_repository=None,
    )
@@ -322,10 +351,10 @@ async def test_handle_pending_dedupe_cooldown() -> None:
        ],
    )
    await mgr._handle_classification_result(
-        state=state, cls_res=res, tear_label=""
+        state=state, cls_res=res
    )
    await mgr._handle_classification_result(
-        state=state, cls_res=res, tear_label=""
+        state=state, cls_res=res
    )
    assert len(state.pending_fifo) == 1

@@ -335,8 +364,7 @@ async def test_resolve_invalid_chosen_label() -> None:
    settings = Settings()
    mgr = CameraSessionManager(
        settings=settings,
-        consumable_classifier=MagicMock(),
-        tear_action=MagicMock(),
+        vision_algorithm=MagicMock(),
        hikvision_runtime=None,
        result_repository=None,
    )
@@ -367,8 +395,7 @@ async def test_resolve_not_active() -> None:
    settings = Settings()
    mgr = CameraSessionManager(
        settings=settings,
-        consumable_classifier=MagicMock(),
-        tear_action=MagicMock(),
+        vision_algorithm=MagicMock(),
        hikvision_runtime=None,
        result_repository=None,
    )
@@ -384,8 +411,7 @@ async def test_resolve_second_time_not_found() -> None:
    settings = Settings()
    mgr = CameraSessionManager(
        settings=settings,
-        consumable_classifier=MagicMock(),
-        tear_action=MagicMock(),
+        vision_algorithm=MagicMock(),
        hikvision_runtime=None,
        result_repository=None,
    )
@@ -419,8 +445,7 @@ async def test_resolve_already_resolved_status() -> None:
    settings = Settings()
    mgr = CameraSessionManager(
        settings=settings,
-        consumable_classifier=MagicMock(),
-        tear_action=MagicMock(),
+        vision_algorithm=MagicMock(),
        hikvision_runtime=None,
        result_repository=None,
    )
--- a/tests/test_session_rank.py
+++ b/tests/test_session_rank.py
@@ -1,4 +1,4 @@
-from app.services.consumable_classifier import PredictionCandidate
+from app.services.consumable_vision_algorithm import PredictionCandidate
 from app.services.video.session_manager import _rank_topk_for_candidates


--- a/tests/test_surgery_pipeline_persistence.py
+++ b/tests/test_surgery_pipeline_persistence.py
@@ -46,8 +46,7 @@ async def test_stop_surgery_persists_final_result(
    settings = Settings()
    mgr = CameraSessionManager(
        settings=settings,
-        consumable_classifier=MagicMock(),
-        tear_action=MagicMock(),
+        vision_algorithm=MagicMock(),
        hikvision_runtime=None,
        result_repository=repo,
    )
@@ -101,8 +100,7 @@ async def test_stop_surgery_failed_persist_goes_to_archive_then_retry_persists(
    settings = Settings()
    mgr = CameraSessionManager(
        settings=settings,
-        consumable_classifier=MagicMock(),
-        tear_action=MagicMock(),
+        vision_algorithm=MagicMock(),
        hikvision_runtime=None,
        result_repository=repo,
    )
@@ -149,8 +147,7 @@ async def test_pipeline_prefers_live_then_db_then_archive(
    settings = Settings()
    mgr = CameraSessionManager(
        settings=settings,
-        consumable_classifier=MagicMock(),
-        tear_action=MagicMock(),
+        vision_algorithm=MagicMock(),
        hikvision_runtime=None,
        result_repository=repo,
    )
--- a/tests/test_voice_confirm.py
+++ b/tests/test_voice_confirm.py
@@ -1,4 +1,8 @@
-from app.services.voice_confirm import build_prompt_text, parse_voice_choice
+from app.services.voice_confirm import (
+    build_prompt_text,
+    match_voice_choice_against_candidates,
+    parse_voice_choice,
+)


 def test_parse_voice_choice_substring() -> None:
@@ -17,3 +21,23 @@ def test_build_prompt_contains_options() -> None:
    text = build_prompt_text([("纱布", 0.4), ("缝线", 0.3)])
    assert "纱布" in text
    assert "缝线" in text
+
+
+def test_match_voice_against_full_candidate_list() -> None:
+    assert (
+        match_voice_choice_against_candidates(
+            "刚才用的是止血钳",
+            ["纱布", "缝线", "止血钳"],
+        )
+        == "止血钳"
+    )
+
+
+def test_match_voice_longest_candidate_first() -> None:
+    assert (
+        match_voice_choice_against_candidates(
+            "拿的一次性止血钳",
+            ["止血钳", "一次性止血钳"],
+        )
+        == "一次性止血钳"
+    )
--- a/tests/test_voice_resolution_service.py
+++ b/tests/test_voice_resolution_service.py
@@ -61,20 +61,24 @@ def _make_service(
 def _active_session_with_pending(
    surgery_id: str = "123456",
    confirmation_id: str = "cid-a",
+    *,
+    candidate_consumables: list[str] | None = None,
+    pending_options: list[tuple[str, float]] | None = None,
 ) -> tuple[CameraSessionManager, str]:
    settings = Settings()
    mgr = CameraSessionManager(
        settings=settings,
-        consumable_classifier=MagicMock(),
-        tear_action=MagicMock(),
+        vision_algorithm=MagicMock(),
        hikvision_runtime=None,
        result_repository=None,
    )
-    st = SurgerySessionState(candidate_consumables=["纱布", "缝线"])
+    cands = candidate_consumables or ["纱布", "缝线"]
+    opts = pending_options or [("纱布", 0.4), ("缝线", 0.3)]
+    st = SurgerySessionState(candidate_consumables=cands)
    st.pending_by_id[confirmation_id] = PendingConsumableConfirmation(
        id=confirmation_id,
        status="pending",
-        options=[("纱布", 0.4), ("缝线", 0.3)],
+        options=opts,
        prompt_text="请确认",
        created_at=datetime.now(timezone.utc),
        model_top1_label="x",
@@ -193,6 +197,55 @@ async def test_resolve_rejected_audit(
        assert row.status == "rejected"


+@pytest.mark.asyncio
+async def test_resolve_recognizes_label_not_in_topk_but_in_surgery_candidates(
+    sqlite_session_factory,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """医生说出候选清单中的耗材（未出现在本次 pending 的模型 topk 里）也应记账。"""
+    settings = Settings()
+    sessions, cid = _active_session_with_pending(
+        candidate_consumables=["纱布", "缝线", "止血钳"],
+        pending_options=[("纱布", 0.4), ("缝线", 0.3)],
+    )
+    minio = MagicMock()
+    minio.configured = True
+    minio.ensure_bucket = MagicMock()
+    minio.upload_voice_wav = MagicMock(
+        return_value=StoredAudio(
+            object_key="k2.wav",
+            sha256_hex="d" * 64,
+            size_bytes=10,
+        )
+    )
+    baidu = MagicMock()
+    baidu.configured = True
+    baidu.asr = MagicMock(
+        return_value={"err_no": 0, "result": ["刚才用的是止血钳"]}
+    )
+    svc = _make_service(
+        settings=settings,
+        sessions=sessions,
+        minio=minio,
+        baidu=baidu,
+        sqlite_factory=sqlite_session_factory,
+        monkeypatch=monkeypatch,
+    )
+    result = await svc.resolve_from_wav(
+        surgery_id="123456",
+        confirmation_id=cid,
+        wav_bytes=_minimal_wav_16k_mono(),
+        filename="a.wav",
+        content_type="audio/wav",
+    )
+    assert result.rejected is False
+    assert result.resolved_label == "止血钳"
+    st = sessions._active["123456"].state
+    assert len(st.details) == 1
+    assert st.details[0].item_name == "止血钳"
+    assert st.details[0].source == "voice"
+
+
@pytest.mark.asyncio
 async def test_audio_too_large_audit(
    sqlite_session_factory,