feat: 语音确认、联调与运维增强

- 语音:序数解析(第一个/第二个等)、解析失败计数与 API detail.retry_remaining;
  百度 ASR 固定 dev_pid 为普通话;SurgeryPipelineError 支持 extra 并入 HTTP detail。
- Demo:demo 路由与假 RTSP、客户端 index 与 README;BackendResolver 与配置调整。
- 可观测:消耗 TSV 日志、语音文件日志、终端 Markdown 辅助;相关测试与依赖更新。
- 注意:.env 仍被 gitignore,本地密钥不会进入本提交。

Made-with: Cursor
This commit is contained in:
Kevin
2026-04-23 14:24:20 +08:00
parent 42720f81cf
commit 0c05463617
39 changed files with 3030 additions and 143 deletions

View File

@@ -193,6 +193,60 @@ def test_resolve_non_wav_422(api_app: FastAPI) -> None:
assert r.status_code == 422
def test_prompt_audio_200(api_app: FastAPI) -> None:
pipeline = MagicMock()
pipeline.get_pending_prompt_audio_mp3 = AsyncMock(return_value=b"\xff\xfb\x90")
api_app.dependency_overrides[get_surgery_pipeline] = lambda: pipeline
client = TestClient(api_app)
r = client.get("/client/surgeries/123456/pending-confirmation/cid1/prompt-audio")
assert r.status_code == 200
assert r.content == b"\xff\xfb\x90"
assert "mpeg" in (r.headers.get("content-type") or "")
pipeline.get_pending_prompt_audio_mp3.assert_awaited_once_with(
surgery_id="123456",
confirmation_id="cid1",
)
def test_resolve_text_200(api_app: FastAPI) -> None:
pipeline = MagicMock()
pipeline.resolve_pending_confirmation_from_client_text = AsyncMock(
return_value=VoiceResolveResult(
resolved_label="纱布",
rejected=False,
asr_text="第一个",
audio_object_key=None,
message="ok",
)
)
api_app.dependency_overrides[get_surgery_pipeline] = lambda: pipeline
client = TestClient(api_app)
r = client.post(
"/client/surgeries/123456/pending-confirmation/cid/resolve-text",
json={"recognized_text": "第一个"},
)
assert r.status_code == 200
body = r.json()
assert body["resolved_label"] == "纱布"
assert body["asr_text"] == "第一个"
pipeline.resolve_pending_confirmation_from_client_text.assert_awaited_once()
def test_resolve_text_maps_surgery_error(api_app: FastAPI) -> None:
pipeline = MagicMock()
pipeline.resolve_pending_confirmation_from_client_text = AsyncMock(
side_effect=SurgeryPipelineError("VOICE_PARSE_FAILED", "无法匹配")
)
api_app.dependency_overrides[get_surgery_pipeline] = lambda: pipeline
client = TestClient(api_app)
r = client.post(
"/client/surgeries/123456/pending-confirmation/cid/resolve-text",
json={"recognized_text": "随便说说"},
)
assert r.status_code == 422
assert r.json()["detail"]["code"] == "VOICE_PARSE_FAILED"
def test_resolve_200(api_app: FastAPI) -> None:
pipeline = MagicMock()
pipeline.resolve_pending_confirmation_from_audio = AsyncMock(
@@ -255,3 +309,22 @@ def test_internal_voice_status_404_and_200(api_app: FastAPI) -> None:
r2 = client2.get("/internal/surgeries/123456/voice-status")
assert r2.status_code == 200
assert r2.json()["pending_queue_approx"] == 2
def test_internal_voice_audits_200_empty(api_app: FastAPI) -> None:
pipeline = MagicMock()
pipeline.list_voice_audits = AsyncMock(return_value=([], 0))
api_app.dependency_overrides[get_surgery_pipeline] = lambda: pipeline
client = TestClient(api_app)
r = client.get(
"/internal/surgeries/123456/voice-audits",
params={"limit": 1, "offset": 0},
)
assert r.status_code == 200
j = r.json()
assert j["surgery_id"] == "123456"
assert j["total"] == 0
assert j["limit"] == 1
assert j["offset"] == 0
assert j["items"] == []
pipeline.list_voice_audits.assert_awaited_once_with("123456", limit=1, offset=0)

View File

@@ -0,0 +1,123 @@
"""consumption_log.txt 兼容 TSV 格式。"""
import pytest
from app.config import settings
from app.services.consumable_vision_algorithm import ClsTop3
from app.services.consumption_tsv_log import (
HEADER,
_RANGE_SEP,
append_consumption_tsv_line,
build_consumption_markdown,
build_tsv_line,
init_consumption_log_file,
short_camera_label,
)
def test_short_camera_label() -> None:
assert short_camera_label("or-cam-01") == "cam01"
assert short_camera_label("or-cam-2") == "cam02"
def test_build_tsv_line_matches_sample_shape(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setattr(settings, "consumption_log_timezone", "UTC")
best = ClsTop3(
t1_name="一次性医用灭菌棉签",
t1_conf=0.9997,
t2_name="cls2",
t2_conf=0.0003,
t3_name="cls3",
t3_conf=0.0002,
t1_pid="2237844",
t2_pid="11765-1-101",
t3_pid="21504-1-1",
)
# 墙钟:拉流起点对齐到 2024-01-01T00:00:00Z时间窗 +0s…+45s
w0 = 1704067200.0
line = build_tsv_line(
name_to_code={},
best=best,
doctor_id="DOCTOR_PLACEHOLDER",
camera_id="or-cam-01",
wall_start_epoch=w0,
wall_end_epoch=w0 + 45.0,
)
parts = line.rstrip("\n").split("\t")
assert len(parts) == 7
assert parts[0] == "2237844"
assert parts[1] == "一次性医用灭菌棉签 0.9997"
assert parts[2] == "cls2"
assert parts[3] == "cls3"
assert parts[4] == "1"
assert parts[5] == "DOCTOR_PLACEHOLDER"
assert (
parts[6]
== "cam01@2024-01-01T00:00:00.000+00:00"
+ _RANGE_SEP
+ "2024-01-01T00:00:45.000+00:00"
)
def test_header_columns() -> None:
cols = HEADER.strip().split("\t")
assert cols[0] == "物品id"
assert cols[-1] == "时间戳"
def test_per_surgery_file_init_and_append(
tmp_path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(settings, "consumption_tsv_log_enabled", True)
monkeypatch.setattr(
settings,
"consumption_tsv_log_path",
str(tmp_path / "{surgery_id}.txt"),
)
init_consumption_log_file("or-001")
append_consumption_tsv_line("or-001", "row1\n")
append_consumption_tsv_line("or-001", "row2\n")
p = tmp_path / "or-001.txt"
assert p.read_text(encoding="utf-8") == HEADER + "row1\n" + "row2\n"
init_consumption_log_file("or-001")
assert p.read_text(encoding="utf-8") == HEADER
def test_build_consumption_markdown_top123_columns(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setattr(settings, "consumption_log_timezone", "UTC")
best = ClsTop3(
t1_name="一次性医用灭菌棉签",
t1_conf=0.9997,
t2_name="cls2",
t2_conf=0.0003,
t3_name="cls3",
t3_conf=0.0002,
t1_pid="2237844",
t2_pid="11765-1-101",
t3_pid="21504-1-1",
)
w0 = 1704067200.0
md = build_consumption_markdown(
name_to_code={},
best=best,
doctor_id="DOCTOR_PLACEHOLDER",
camera_id="or-cam-01",
wall_start_epoch=w0,
wall_end_epoch=w0 + 45.0,
)
assert "Top1 物品id" in md and "Top1 物品名称" in md and "Top1 置信度" in md
assert "Top2 物品名称" in md and "Top3 物品名称" in md
assert "Top2 物品id" not in md
assert "2237844" in md
assert "一次性医用灭菌棉签" in md
assert "0.9997" in md
assert "cls2" in md and "cls3" in md
assert "11765-1-101" not in md and "21504-1-1" not in md
assert "0.0003" not in md and "0.0002" not in md
assert "DOCTOR_PLACEHOLDER" in md
assert "| 1 |" in md
# 终端为可读时间戳,非落盘用 ISO@cam
assert "2024-01-01 00:00:00.000" in md and "2024-01-01 00:00:45.000" in md
assert "cam01" in md and " · " in md and _RANGE_SEP in md
assert "cam01@2024-01" not in md

View File

@@ -156,9 +156,7 @@ async def test_handle_skips_when_candidate_list_empty() -> None:
confidence=0.99,
topk=[PredictionCandidate(label="纱布", confidence=0.99)],
)
await mgr._handle_classification_result(
state=state, cls_res=res
)
await mgr._handle_classification_result(state=state, cls_res=res)
assert state.details == []
assert state.pending_fifo == []
@@ -199,9 +197,7 @@ async def test_handle_skips_below_voice_floor() -> None:
confidence=0.4,
topk=[PredictionCandidate(label="纱布", confidence=0.4)],
)
await mgr._handle_classification_result(
state=state, cls_res=res
)
await mgr._handle_classification_result(state=state, cls_res=res)
assert state.details == []
assert state.pending_fifo == []
@@ -221,9 +217,7 @@ async def test_handle_auto_vision_confirm() -> None:
confidence=0.99,
topk=[PredictionCandidate(label="纱布", confidence=0.99)],
)
await mgr._handle_classification_result(
state=state, cls_res=res
)
await mgr._handle_classification_result(state=state, cls_res=res)
assert len(state.details) == 1
assert state.details[0].source == "vision"
assert state.details[0].item_id == "纱布"
@@ -247,9 +241,7 @@ async def test_handle_high_conf_top1_not_in_candidates_enqueues_pending() -> Non
PredictionCandidate(label="缝线", confidence=0.2),
],
)
await mgr._handle_classification_result(
state=state, cls_res=res
)
await mgr._handle_classification_result(state=state, cls_res=res)
assert state.details == []
assert len(state.pending_fifo) == 1
pid = state.pending_fifo[0]
@@ -276,9 +268,7 @@ async def test_handle_mid_confidence_enqueues_pending() -> None:
PredictionCandidate(label="缝线", confidence=0.3),
],
)
await mgr._handle_classification_result(
state=state, cls_res=res
)
await mgr._handle_classification_result(state=state, cls_res=res)
assert len(state.pending_fifo) == 1
@@ -299,9 +289,7 @@ async def test_handle_voice_disabled_no_pending_for_mid_conf() -> None:
confidence=0.5,
topk=[PredictionCandidate(label="纱布", confidence=0.5)],
)
await mgr._handle_classification_result(
state=state, cls_res=res
)
await mgr._handle_classification_result(state=state, cls_res=res)
assert state.pending_fifo == []
assert state.details == []
@@ -322,12 +310,8 @@ async def test_handle_vision_cooldown_skips_duplicate() -> None:
confidence=0.99,
topk=[PredictionCandidate(label="纱布", confidence=0.99)],
)
await mgr._handle_classification_result(
state=state, cls_res=res
)
await mgr._handle_classification_result(
state=state, cls_res=res
)
await mgr._handle_classification_result(state=state, cls_res=res)
await mgr._handle_classification_result(state=state, cls_res=res)
assert len(state.details) == 1
@@ -350,12 +334,8 @@ async def test_handle_pending_dedupe_cooldown() -> None:
PredictionCandidate(label="缝线", confidence=0.2),
],
)
await mgr._handle_classification_result(
state=state, cls_res=res
)
await mgr._handle_classification_result(
state=state, cls_res=res
)
await mgr._handle_classification_result(state=state, cls_res=res)
await mgr._handle_classification_result(state=state, cls_res=res)
assert len(state.pending_fifo) == 1

View File

@@ -1,5 +1,6 @@
from __future__ import annotations
import asyncio
import json
import pytest
@@ -55,3 +56,50 @@ async def test_save_audit_persists_fields(db_session: AsyncSession) -> None:
assert r.resolved_label == "纱布"
assert r.options_snapshot_json == opts
assert r.error_message is None
@pytest.mark.asyncio
async def test_list_by_surgery_order_and_total(db_session: AsyncSession) -> None:
repo = VoiceAuditRepository()
async with db_session.begin():
await repo.save_audit(
db_session,
surgery_id="111111",
confirmation_id="a",
status="parse_failed",
audio_object_key=None,
audio_content_type=None,
audio_size_bytes=None,
audio_sha256=None,
asr_text="",
resolved_label=None,
options_snapshot_json="[]",
error_message="x",
)
await asyncio.sleep(0.02)
async with db_session.begin():
await repo.save_audit(
db_session,
surgery_id="111111",
confirmation_id="b",
status="recognized",
audio_object_key="k.wav",
audio_content_type="audio/wav",
audio_size_bytes=10,
audio_sha256="b" * 64,
asr_text="纱布",
resolved_label="纱布",
options_snapshot_json="[]",
error_message=None,
)
async with db_session.begin():
rows, total = await repo.list_by_surgery(db_session, "111111", limit=10, offset=0)
assert total == 2
assert [r.confirmation_id for r in rows] == ["b", "a"]
async with db_session.begin():
page2, total2 = await repo.list_by_surgery(
db_session, "111111", limit=1, offset=1
)
assert total2 == 2
assert len(page2) == 1
assert page2[0].confirmation_id == "a"

View File

@@ -13,6 +13,21 @@ def test_parse_voice_choice_numeric() -> None:
assert parse_voice_choice("第2个", ["纱布", "缝线", "钳子"]) == "缝线"
def test_parse_voice_choice_ordinal_chinese() -> None:
opts = ["纱布", "缝线", "钳子"]
assert parse_voice_choice("第一个", opts) == "纱布"
assert parse_voice_choice("第一个。", opts) == "纱布"
assert parse_voice_choice("第2个", opts) == "缝线"
assert parse_voice_choice("第二", opts) == "缝线"
assert parse_voice_choice("选3", opts) == "钳子"
assert parse_voice_choice("选项2", ["纱布", "缝线"]) == "缝线"
def test_parse_voice_choice_single_chinese_digit_with_few_options() -> None:
assert parse_voice_choice("", ["纱布", "缝线"]) == "纱布"
assert parse_voice_choice("", ["纱布", "缝线"]) == "缝线"
def test_parse_voice_choice_negative() -> None:
assert parse_voice_choice("不是", ["纱布", "缝线"]) is None

View File

@@ -0,0 +1,62 @@
"""Tests for voice TSV + emit_voice_event (path + TSV line)."""
from __future__ import annotations
import tempfile
from pathlib import Path
from app.config import Settings
from app.services.voice_file_log import (
append_voice_tsv_line,
emit_voice_event,
init_voice_log_file,
resolved_voice_log_path,
)
def test_resolved_voice_log_path_replaces_surgery_id() -> None:
s = Settings()
s.voice_file_log_path = "logs/voice_{surgery_id}.txt"
p = resolved_voice_log_path("123456", s)
assert p.name == "voice_123456.txt"
assert "logs" in str(p)
def test_init_and_append_tsv() -> None:
with tempfile.TemporaryDirectory() as d:
base = Path(d)
s = Settings()
s.voice_file_log_enabled = True
s.voice_file_log_path = str((base / "v_{surgery_id}.txt").resolve())
init_voice_log_file("999999", s)
p = resolved_voice_log_path("999999", s)
assert p.exists()
h = p.read_text(encoding="utf-8")
assert "来源" in h and "confirmation_id" in h
line = "ts\ttest\trecognized\tcid1\t\t\tfalse\t\tk.wav\n"
append_voice_tsv_line("999999", line, s)
assert p.read_text(encoding="utf-8").endswith(line)
def test_emit_voice_event_writes_when_enabled() -> None:
s = Settings()
s.voice_file_log_enabled = True
with tempfile.TemporaryDirectory() as d:
s.voice_file_log_path = str((Path(d) / "v_{surgery_id}.txt").resolve())
init_voice_log_file("111111", s)
emit_voice_event(
s,
surgery_id="111111",
source="wav",
status="recognized",
confirmation_id="c1",
asr_text="纱布",
resolved_label="纱布",
rejected=False,
audio_object_key="k.wav",
)
p = resolved_voice_log_path("111111", s)
body = p.read_text(encoding="utf-8")
assert "纱布" in body
assert "recognized" in body
assert "k.wav" in body