refactor: 统一耗材视觉算法并扩展语音确认至全量候选清单

- 以 ConsumableVisionAlgorithmService 替代 consumable_classifier 与 tear_action;
  可选手部检测权重,未配置时全帧分类;时间窗众数与 Excel 白名单配置。
- 语音待确认:ASR 先匹配 pending topk,再匹配本台 candidate_consumables;
  记账 item_id 与 vision 一致使用 name_to_code。
- 更新 config、Compose、.env.example、依赖(pandas/openpyxl)与测试。

Made-with: Cursor
This commit is contained in:
Kevin
2026-04-22 16:31:12 +08:00
parent 4c4550d58b
commit 132702aea9
18 changed files with 791 additions and 476 deletions

View File

@@ -7,7 +7,10 @@ from unittest.mock import MagicMock
import pytest
from app.config import Settings
from app.services.consumable_classifier import PredictionCandidate, PredictionResult
from app.services.consumable_vision_algorithm import (
PredictionCandidate,
PredictionResult,
)
from app.surgery_errors import SurgeryPipelineError
from app.services.video.session_manager import (
CameraSessionManager,
@@ -21,8 +24,7 @@ def test_live_consumption_requires_non_empty_details() -> None:
settings = Settings()
mgr = CameraSessionManager(
settings=settings,
consumable_classifier=MagicMock(),
tear_action=MagicMock(),
vision_algorithm=MagicMock(),
hikvision_runtime=None,
result_repository=None,
)
@@ -33,13 +35,50 @@ def test_live_consumption_requires_non_empty_details() -> None:
assert mgr.live_consumption_if_active("123456") is None
@pytest.mark.asyncio
async def test_resolve_voice_accepts_label_on_surgery_list_not_in_topk_options() -> None:
settings = Settings()
mgr = CameraSessionManager(
settings=settings,
vision_algorithm=MagicMock(),
hikvision_runtime=None,
result_repository=None,
)
st = SurgerySessionState(
candidate_consumables=["纱布", "止血钳"],
name_to_code={"纱布": "P1", "止血钳": "P2"},
)
pid = "test-confirm-id"
st.pending_by_id[pid] = PendingConsumableConfirmation(
id=pid,
status="pending",
options=[("纱布", 0.4)],
prompt_text="请确认",
created_at=datetime.now(timezone.utc),
model_top1_label="unknown",
model_top1_confidence=0.41,
)
st.pending_fifo.append(pid)
mgr._active["123456"] = RunningSurgery(
stop_event=asyncio.Event(), state=st, tasks=[]
)
await mgr.resolve_pending_confirmation(
"123456", pid, chosen_label="止血钳", rejected=False
)
assert len(st.details) == 1
assert st.details[0].item_id == "P2"
assert st.details[0].item_name == "止血钳"
assert st.details[0].source == "voice"
@pytest.mark.asyncio
async def test_resolve_pending_appends_voice_detail() -> None:
settings = Settings()
mgr = CameraSessionManager(
settings=settings,
consumable_classifier=MagicMock(),
tear_action=MagicMock(),
vision_algorithm=MagicMock(),
hikvision_runtime=None,
result_repository=None,
)
@@ -74,8 +113,7 @@ async def test_resolve_reject_closes_without_detail() -> None:
settings = Settings()
mgr = CameraSessionManager(
settings=settings,
consumable_classifier=MagicMock(),
tear_action=MagicMock(),
vision_algorithm=MagicMock(),
hikvision_runtime=None,
result_repository=None,
)
@@ -108,8 +146,7 @@ async def test_handle_skips_when_candidate_list_empty() -> None:
settings = Settings()
mgr = CameraSessionManager(
settings=settings,
consumable_classifier=MagicMock(),
tear_action=MagicMock(),
vision_algorithm=MagicMock(),
hikvision_runtime=None,
result_repository=None,
)
@@ -120,7 +157,7 @@ async def test_handle_skips_when_candidate_list_empty() -> None:
topk=[PredictionCandidate(label="纱布", confidence=0.99)],
)
await mgr._handle_classification_result(
state=state, cls_res=res, tear_label=""
state=state, cls_res=res
)
assert state.details == []
assert state.pending_fifo == []
@@ -131,8 +168,7 @@ async def test_archive_retry_loop_starts() -> None:
settings = Settings()
mgr = CameraSessionManager(
settings=settings,
consumable_classifier=MagicMock(),
tear_action=MagicMock(),
vision_algorithm=MagicMock(),
hikvision_runtime=None,
result_repository=None,
)
@@ -153,8 +189,7 @@ async def test_handle_skips_below_voice_floor() -> None:
settings.video_voice_confirm_min_confidence = 0.5
mgr = CameraSessionManager(
settings=settings,
consumable_classifier=MagicMock(),
tear_action=MagicMock(),
vision_algorithm=MagicMock(),
hikvision_runtime=None,
result_repository=None,
)
@@ -165,7 +200,7 @@ async def test_handle_skips_below_voice_floor() -> None:
topk=[PredictionCandidate(label="纱布", confidence=0.4)],
)
await mgr._handle_classification_result(
state=state, cls_res=res, tear_label=""
state=state, cls_res=res
)
assert state.details == []
assert state.pending_fifo == []
@@ -176,8 +211,7 @@ async def test_handle_auto_vision_confirm() -> None:
settings = Settings()
mgr = CameraSessionManager(
settings=settings,
consumable_classifier=MagicMock(),
tear_action=MagicMock(),
vision_algorithm=MagicMock(),
hikvision_runtime=None,
result_repository=None,
)
@@ -188,7 +222,7 @@ async def test_handle_auto_vision_confirm() -> None:
topk=[PredictionCandidate(label="纱布", confidence=0.99)],
)
await mgr._handle_classification_result(
state=state, cls_res=res, tear_label=""
state=state, cls_res=res
)
assert len(state.details) == 1
assert state.details[0].source == "vision"
@@ -200,8 +234,7 @@ async def test_handle_high_conf_top1_not_in_candidates_enqueues_pending() -> Non
settings = Settings()
mgr = CameraSessionManager(
settings=settings,
consumable_classifier=MagicMock(),
tear_action=MagicMock(),
vision_algorithm=MagicMock(),
hikvision_runtime=None,
result_repository=None,
)
@@ -215,7 +248,7 @@ async def test_handle_high_conf_top1_not_in_candidates_enqueues_pending() -> Non
],
)
await mgr._handle_classification_result(
state=state, cls_res=res, tear_label=""
state=state, cls_res=res
)
assert state.details == []
assert len(state.pending_fifo) == 1
@@ -230,8 +263,7 @@ async def test_handle_mid_confidence_enqueues_pending() -> None:
settings.video_voice_confirm_min_confidence = 0.3
mgr = CameraSessionManager(
settings=settings,
consumable_classifier=MagicMock(),
tear_action=MagicMock(),
vision_algorithm=MagicMock(),
hikvision_runtime=None,
result_repository=None,
)
@@ -245,7 +277,7 @@ async def test_handle_mid_confidence_enqueues_pending() -> None:
],
)
await mgr._handle_classification_result(
state=state, cls_res=res, tear_label=""
state=state, cls_res=res
)
assert len(state.pending_fifo) == 1
@@ -257,8 +289,7 @@ async def test_handle_voice_disabled_no_pending_for_mid_conf() -> None:
settings.video_auto_confirm_confidence = 0.8
mgr = CameraSessionManager(
settings=settings,
consumable_classifier=MagicMock(),
tear_action=MagicMock(),
vision_algorithm=MagicMock(),
hikvision_runtime=None,
result_repository=None,
)
@@ -269,7 +300,7 @@ async def test_handle_voice_disabled_no_pending_for_mid_conf() -> None:
topk=[PredictionCandidate(label="纱布", confidence=0.5)],
)
await mgr._handle_classification_result(
state=state, cls_res=res, tear_label=""
state=state, cls_res=res
)
assert state.pending_fifo == []
assert state.details == []
@@ -281,8 +312,7 @@ async def test_handle_vision_cooldown_skips_duplicate() -> None:
settings.video_detail_cooldown_sec = 3600.0
mgr = CameraSessionManager(
settings=settings,
consumable_classifier=MagicMock(),
tear_action=MagicMock(),
vision_algorithm=MagicMock(),
hikvision_runtime=None,
result_repository=None,
)
@@ -293,10 +323,10 @@ async def test_handle_vision_cooldown_skips_duplicate() -> None:
topk=[PredictionCandidate(label="纱布", confidence=0.99)],
)
await mgr._handle_classification_result(
state=state, cls_res=res, tear_label=""
state=state, cls_res=res
)
await mgr._handle_classification_result(
state=state, cls_res=res, tear_label=""
state=state, cls_res=res
)
assert len(state.details) == 1
@@ -307,8 +337,7 @@ async def test_handle_pending_dedupe_cooldown() -> None:
settings.video_detail_cooldown_sec = 3600.0
mgr = CameraSessionManager(
settings=settings,
consumable_classifier=MagicMock(),
tear_action=MagicMock(),
vision_algorithm=MagicMock(),
hikvision_runtime=None,
result_repository=None,
)
@@ -322,10 +351,10 @@ async def test_handle_pending_dedupe_cooldown() -> None:
],
)
await mgr._handle_classification_result(
state=state, cls_res=res, tear_label=""
state=state, cls_res=res
)
await mgr._handle_classification_result(
state=state, cls_res=res, tear_label=""
state=state, cls_res=res
)
assert len(state.pending_fifo) == 1
@@ -335,8 +364,7 @@ async def test_resolve_invalid_chosen_label() -> None:
settings = Settings()
mgr = CameraSessionManager(
settings=settings,
consumable_classifier=MagicMock(),
tear_action=MagicMock(),
vision_algorithm=MagicMock(),
hikvision_runtime=None,
result_repository=None,
)
@@ -367,8 +395,7 @@ async def test_resolve_not_active() -> None:
settings = Settings()
mgr = CameraSessionManager(
settings=settings,
consumable_classifier=MagicMock(),
tear_action=MagicMock(),
vision_algorithm=MagicMock(),
hikvision_runtime=None,
result_repository=None,
)
@@ -384,8 +411,7 @@ async def test_resolve_second_time_not_found() -> None:
settings = Settings()
mgr = CameraSessionManager(
settings=settings,
consumable_classifier=MagicMock(),
tear_action=MagicMock(),
vision_algorithm=MagicMock(),
hikvision_runtime=None,
result_repository=None,
)
@@ -419,8 +445,7 @@ async def test_resolve_already_resolved_status() -> None:
settings = Settings()
mgr = CameraSessionManager(
settings=settings,
consumable_classifier=MagicMock(),
tear_action=MagicMock(),
vision_algorithm=MagicMock(),
hikvision_runtime=None,
result_repository=None,
)

View File

@@ -1,4 +1,4 @@
from app.services.consumable_classifier import PredictionCandidate
from app.services.consumable_vision_algorithm import PredictionCandidate
from app.services.video.session_manager import _rank_topk_for_candidates

View File

@@ -46,8 +46,7 @@ async def test_stop_surgery_persists_final_result(
settings = Settings()
mgr = CameraSessionManager(
settings=settings,
consumable_classifier=MagicMock(),
tear_action=MagicMock(),
vision_algorithm=MagicMock(),
hikvision_runtime=None,
result_repository=repo,
)
@@ -101,8 +100,7 @@ async def test_stop_surgery_failed_persist_goes_to_archive_then_retry_persists(
settings = Settings()
mgr = CameraSessionManager(
settings=settings,
consumable_classifier=MagicMock(),
tear_action=MagicMock(),
vision_algorithm=MagicMock(),
hikvision_runtime=None,
result_repository=repo,
)
@@ -149,8 +147,7 @@ async def test_pipeline_prefers_live_then_db_then_archive(
settings = Settings()
mgr = CameraSessionManager(
settings=settings,
consumable_classifier=MagicMock(),
tear_action=MagicMock(),
vision_algorithm=MagicMock(),
hikvision_runtime=None,
result_repository=repo,
)

View File

@@ -1,4 +1,8 @@
from app.services.voice_confirm import build_prompt_text, parse_voice_choice
from app.services.voice_confirm import (
build_prompt_text,
match_voice_choice_against_candidates,
parse_voice_choice,
)
def test_parse_voice_choice_substring() -> None:
@@ -17,3 +21,23 @@ def test_build_prompt_contains_options() -> None:
text = build_prompt_text([("纱布", 0.4), ("缝线", 0.3)])
assert "纱布" in text
assert "缝线" in text
def test_match_voice_against_full_candidate_list() -> None:
assert (
match_voice_choice_against_candidates(
"刚才用的是止血钳",
["纱布", "缝线", "止血钳"],
)
== "止血钳"
)
def test_match_voice_longest_candidate_first() -> None:
assert (
match_voice_choice_against_candidates(
"拿的一次性止血钳",
["止血钳", "一次性止血钳"],
)
== "一次性止血钳"
)

View File

@@ -61,20 +61,24 @@ def _make_service(
def _active_session_with_pending(
surgery_id: str = "123456",
confirmation_id: str = "cid-a",
*,
candidate_consumables: list[str] | None = None,
pending_options: list[tuple[str, float]] | None = None,
) -> tuple[CameraSessionManager, str]:
settings = Settings()
mgr = CameraSessionManager(
settings=settings,
consumable_classifier=MagicMock(),
tear_action=MagicMock(),
vision_algorithm=MagicMock(),
hikvision_runtime=None,
result_repository=None,
)
st = SurgerySessionState(candidate_consumables=["纱布", "缝线"])
cands = candidate_consumables or ["纱布", "缝线"]
opts = pending_options or [("纱布", 0.4), ("缝线", 0.3)]
st = SurgerySessionState(candidate_consumables=cands)
st.pending_by_id[confirmation_id] = PendingConsumableConfirmation(
id=confirmation_id,
status="pending",
options=[("纱布", 0.4), ("缝线", 0.3)],
options=opts,
prompt_text="请确认",
created_at=datetime.now(timezone.utc),
model_top1_label="x",
@@ -193,6 +197,55 @@ async def test_resolve_rejected_audit(
assert row.status == "rejected"
@pytest.mark.asyncio
async def test_resolve_recognizes_label_not_in_topk_but_in_surgery_candidates(
sqlite_session_factory,
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""医生说出候选清单中的耗材(未出现在本次 pending 的模型 topk 里)也应记账。"""
settings = Settings()
sessions, cid = _active_session_with_pending(
candidate_consumables=["纱布", "缝线", "止血钳"],
pending_options=[("纱布", 0.4), ("缝线", 0.3)],
)
minio = MagicMock()
minio.configured = True
minio.ensure_bucket = MagicMock()
minio.upload_voice_wav = MagicMock(
return_value=StoredAudio(
object_key="k2.wav",
sha256_hex="d" * 64,
size_bytes=10,
)
)
baidu = MagicMock()
baidu.configured = True
baidu.asr = MagicMock(
return_value={"err_no": 0, "result": ["刚才用的是止血钳"]}
)
svc = _make_service(
settings=settings,
sessions=sessions,
minio=minio,
baidu=baidu,
sqlite_factory=sqlite_session_factory,
monkeypatch=monkeypatch,
)
result = await svc.resolve_from_wav(
surgery_id="123456",
confirmation_id=cid,
wav_bytes=_minimal_wav_16k_mono(),
filename="a.wav",
content_type="audio/wav",
)
assert result.rejected is False
assert result.resolved_label == "止血钳"
st = sessions._active["123456"].state
assert len(st.details) == 1
assert st.details[0].item_name == "止血钳"
assert st.details[0].source == "voice"
@pytest.mark.asyncio
async def test_audio_too_large_audit(
sqlite_session_factory,