feat: align surgery API with schemas and extend client tooling

- Refactor app API and schemas; adjust surgery pipeline, repository, and session manager.

- Improve consumption TSV logging and consumable vision integration; trim voice resolution.

- Add Baidu Face 1:N search script, .env.example entries, and client API integration doc.

- Update demo client, staging checklist, surgery interface doc, and related tests; add sample face image.

Made-with: Cursor
This commit is contained in:
Kevin
2026-04-23 16:09:20 +08:00
parent 0c05463617
commit 69980d8073
20 changed files with 994 additions and 610 deletions

View File

@@ -219,6 +219,13 @@ def cls_top3_from_result(
n3 = str(cls.names.get(int(t5i[2]), "")).strip()
c3 = _ci(2)
def _pid(label: str) -> str:
lb = (label or "").strip()
if not lb:
return ""
norm = _norm_product_name(lb)
return (name_to_code.get(norm) or name_to_code.get(lb) or "").strip()
return ClsTop3(
t1_name=n1,
t1_conf=c1,
@@ -226,9 +233,9 @@ def cls_top3_from_result(
t2_conf=c2,
t3_name=n3,
t3_conf=c3,
t1_pid=name_to_code.get(n1, ""),
t2_pid=name_to_code.get(n2, ""),
t3_pid=name_to_code.get(n3, ""),
t1_pid=_pid(n1),
t2_pid=_pid(n2),
t3_pid=_pid(n3),
)

View File

@@ -1,4 +1,6 @@
"""每例手术一个文本文件(制表符列):`start_surgery` 时截断并写表头,每次时间窗识别**追加**一行。终端 Markdown 时间戳为可读形式;落盘行内仍为 ISO 便于程序解析。
"""每例手术一个文本文件(制表符列):`start_surgery` 时截断并写表头,每次时间窗识别**追加**一行(仅 item_id, item_name, qty, doctor_id, timestamp。终端 Markdown 时间戳为可读形式;落盘时间戳为 ISO 区间便于程序解析。
手术结束时再追加一节汇总行item_id, item_name, qty无其它列
时间戳:在拉流起点记录 `time.time()`,与 `time.monotonic()` 时间窗对齐。直播 RTSP 经 OpenCV 一般无可靠绝对时码,以本机接收时刻为准。
"""
@@ -14,11 +16,12 @@ from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
from loguru import logger
from app.config import settings
from app.services.consumable_vision_algorithm import ClsTop3
from app.services.consumable_vision_algorithm import ClsTop3, _norm_product_name
from app.terminal_markdown import print_markdown_stderr
# 制表符分隔;时间范围用 U+2013 连接;Top2/3 仅名称;本窗消耗数量恒为 1
HEADER = "物品id\t物品名称\tTop2物品名称\tTop3物品名称\t消耗数量\t医生id\t时间戳\n"
# 制表符分隔;时间范围用 U+2013 连接;本窗消耗数量恒为 1
HEADER = "item_id\titem_name\tqty\tdoctor_id\ttimestamp\n"
SUMMARY_HEADER = "item_id\titem_name\tqty\n"
_RANGE_SEP = "\u2013" # en dash与样例 `00:00:00.00000:00:45.000` 一致
_lock = threading.Lock()
@@ -83,13 +86,20 @@ def _encode_cell(value: str) -> str:
return s
def _item_id_for_row(name: str, pid: str, name_to_code: dict[str, str]) -> str:
p = (pid or "").strip()
def resolve_consumption_item_id(
t1_name: str,
t1_pid: str,
name_to_code: dict[str, str],
) -> str:
"""业务物品 id`name_to_code` 的键为归一化名称,须与分类输出一同参与查找。"""
n = (t1_name or "").strip()
norm = _norm_product_name(n)
code = (name_to_code.get(norm) or name_to_code.get(n) or "").strip()
if code:
return code
p = (t1_pid or "").strip()
if p:
return p
n = (name or "").strip()
if n in name_to_code:
return (name_to_code.get(n) or n).strip()
return n
@@ -102,17 +112,12 @@ def build_tsv_line(
wall_start_epoch: float,
wall_end_epoch: float,
) -> str:
id1 = _item_id_for_row(best.t1_name, best.t1_pid, name_to_code)
# 与历史样例Top1 为「名称 置信度」四位小数
name1 = f"{(best.t1_name or '').strip()} {best.t1_conf:.4f}".strip()
n2 = (best.t2_name or "").strip()
n3 = (best.t3_name or "").strip()
id1 = resolve_consumption_item_id(best.t1_name, best.t1_pid, name_to_code)
name1 = (best.t1_name or "").strip()
ts = format_consumption_timestamp(camera_id, wall_start_epoch, wall_end_epoch)
row = [
_encode_cell(id1),
_encode_cell(name1),
_encode_cell(n2),
_encode_cell(n3),
"1",
_encode_cell(doctor_id),
_encode_cell(ts),
@@ -179,25 +184,17 @@ def build_consumption_markdown(
wall_start_epoch: float,
wall_end_epoch: float,
) -> str:
"""终端用:Top1 含 id/名称/置信度Top2/3 仅名称;消耗数量恒为 1。"""
id1 = _item_id_for_row(best.t1_name, best.t1_pid, name_to_code)
"""终端用:与落盘列一致;本窗 qty 恒为 1。"""
id1 = resolve_consumption_item_id(best.t1_name, best.t1_pid, name_to_code)
n1 = (best.t1_name or "").strip()
has2 = bool((best.t2_name or "").strip())
has3 = bool((best.t3_name or "").strip())
n2 = (best.t2_name or "").strip() if has2 else ""
n3 = (best.t3_name or "").strip() if has3 else ""
dash = ""
ts = format_consumption_timestamp_readable(camera_id, wall_start_epoch, wall_end_epoch)
return "\n".join(
[
"| Top1 物品id | Top1 物品名称 | Top1 置信度 | Top2 物品名称 | Top3 物品名称 | 消耗数量 | 医生id | 时间戳 |",
"| :--- | :--- | ---: | :--- | :--- | ---: | :--- | :--- |",
"| {} | {} | {:.4f} | {} | {} | 1 | {} | {} |".format(
"| item_id | item_name | qty | doctor_id | timestamp |",
"| :--- | :--- | ---: | :--- | :--- |",
"| {} | {} | 1 | {} | {} |".format(
_md_cell(id1),
_md_cell(n1),
best.t1_conf,
_md_cell(n2) if has2 else dash,
_md_cell(n3) if has3 else dash,
_md_cell(doctor_id),
_md_cell(ts),
),
@@ -206,6 +203,47 @@ def build_consumption_markdown(
)
def append_consumption_log_summary(
surgery_id: str,
totals: dict[str, tuple[str, int]],
) -> None:
"""在明细行之后追加汇总块(表头 + 每物品一行)。"""
if not settings.consumption_tsv_log_enabled or not totals:
return
path = resolved_consumption_log_path(surgery_id)
if not path.is_file():
return
body = "".join(
["\n", SUMMARY_HEADER]
+ [
"\t".join([_encode_cell(iid), _encode_cell(name), str(qty)]) + "\n"
for iid, (name, qty) in sorted(totals.items(), key=lambda x: x[0])
]
)
with _lock:
with path.open("a", encoding="utf-8") as f:
f.write(body)
def print_consumption_summary_markdown(
totals: dict[str, tuple[str, int]],
) -> None:
if not settings.consumption_log_markdown_terminal or not totals:
return
lines = [
"## 消耗汇总",
"",
"| item_id | item_name | qty |",
"| :--- | :--- | ---: |",
]
for iid, (name, qty) in sorted(totals.items(), key=lambda x: x[0]):
lines.append(
"| {} | {} | {} |".format(_md_cell(iid), _md_cell(name), qty)
)
lines.append("")
print_markdown_stderr("\n".join(lines))
def append_consumption_window(
*,
surgery_id: str,
@@ -215,9 +253,17 @@ def append_consumption_window(
camera_id: str,
wall_start_epoch: float,
wall_end_epoch: float,
running_totals: dict[str, tuple[str, int]] | None = None,
) -> None:
if not settings.consumption_tsv_log_enabled and not settings.consumption_log_markdown_terminal:
return
iid = resolve_consumption_item_id(best.t1_name, best.t1_pid, name_to_code)
iname = (best.t1_name or "").strip()
if running_totals is not None:
if iid not in running_totals:
running_totals[iid] = (iname, 0)
prev_name, q = running_totals[iid]
running_totals[iid] = (prev_name, q + 1)
if settings.consumption_tsv_log_enabled:
line = build_tsv_line(
name_to_code=name_to_code,

View File

@@ -2,6 +2,8 @@
from __future__ import annotations
import base64
from app.database import AsyncSessionLocal
from app.repositories.surgery_results import SurgeryResultRepository
from app.schemas import (
@@ -78,31 +80,22 @@ class SurgeryPipeline:
return persisted
return self._sessions.archived_consumption_fallback(surgery_id)
def voice_status(self, surgery_id: str) -> dict[str, object] | None:
return self._sessions.voice_status(surgery_id)
async def list_voice_audits(
self,
surgery_id: str,
*,
limit: int = 50,
offset: int = 0,
):
"""持久化表 `voice_confirmation_audits` 分页,用于追溯/对账/报表。"""
return await self._voice.list_voice_audits_for_surgery(
surgery_id, limit=limit, offset=offset
)
def get_pending_confirmation_for_client(
async def get_pending_confirmation_for_client(
self, surgery_id: str
) -> SurgeryPendingConfirmationResponse | None:
pending = self._sessions.next_pending_confirmation(surgery_id)
if pending is None:
return None
mp3 = await run_in_threadpool(
self._voice.synthesize_prompt_to_mp3,
pending.prompt_text,
)
b64 = base64.b64encode(mp3).decode("ascii")
return SurgeryPendingConfirmationResponse(
surgery_id=surgery_id,
confirmation_id=pending.id,
prompt_text=pending.prompt_text,
prompt_audio_mp3_base64=b64,
options=[
PendingConfirmationOption(label=a, confidence=b)
for a, b in pending.options
@@ -129,34 +122,3 @@ class SurgeryPipeline:
content_type=content_type,
)
async def resolve_pending_confirmation_from_client_text(
self,
surgery_id: str,
confirmation_id: str,
recognized_text: str,
) -> VoiceResolveResult:
"""浏览器等客户端本机识别后的文本,解析规则与 WAV 路径一致(无需 MinIO/百度)。"""
return await self._voice.resolve_from_recognized_text(
surgery_id=surgery_id,
confirmation_id=confirmation_id,
recognized_text=recognized_text,
)
async def get_pending_prompt_audio_mp3(
self,
surgery_id: str,
confirmation_id: str,
) -> bytes:
"""待确认 `prompt_text` 的百度 TTS MP3供模拟客户端用 Audio 直放。"""
pending = self._sessions.get_pending_confirmation_by_id(
surgery_id, confirmation_id
)
if pending is None or pending.status != "pending":
raise SurgeryPipelineError(
"CONFIRMATION_NOT_FOUND",
"未找到该待确认项或已处理。",
)
return await run_in_threadpool(
self._voice.synthesize_prompt_to_mp3,
pending.prompt_text,
)

View File

@@ -12,7 +12,7 @@ from loguru import logger
from app.config import Settings
from app.database import AsyncSessionLocal
from app.repositories.surgery_results import SurgeryResultRepository
from app.schemas import SurgeryConsumptionDetail
from app.schemas import SurgeryConsumptionDetail, SurgeryConsumptionStored
from app.services.consumable_vision_algorithm import (
ClsTop3,
ConsumableVisionAlgorithmService,
@@ -26,7 +26,12 @@ from app.services.video.backend_resolver import BackendResolver
from app.services.video.hikvision_runtime import HikvisionInitRefCount, HikvisionRuntime
from app.services.video.rtsp_capture import RtspCapture
from app.services.video.types import VideoBackendKind
from app.services.consumption_tsv_log import append_consumption_window, init_consumption_log_file
from app.services.consumption_tsv_log import (
append_consumption_log_summary,
append_consumption_window,
init_consumption_log_file,
print_consumption_summary_markdown,
)
from app.services.voice_file_log import init_voice_log_file
from app.services.voice_confirm import build_prompt_text
from app.surgery_errors import SurgeryPipelineError
@@ -64,7 +69,7 @@ class SurgerySessionState:
#: 分类类名(归一化) -> 业务物品 idExcel 产品编码或名称)。
name_to_code: dict[str, str] = field(default_factory=dict)
camera_infer: dict[str, CameraStreamInferState] = field(default_factory=dict)
details: list[SurgeryConsumptionDetail] = field(default_factory=list)
details: list[SurgeryConsumptionStored] = field(default_factory=list)
lock: asyncio.Lock = field(default_factory=asyncio.Lock)
ready: asyncio.Event = field(default_factory=asyncio.Event)
last_detail_monotonic: dict[str, float] = field(default_factory=dict)
@@ -76,6 +81,8 @@ class SurgerySessionState:
last_asr_text: str | None = None
#: 最近一次语音确认错误说明ASR/解析失败等)。
last_voice_error: str | None = None
#: 视觉时间窗落盘用量累计供停录时写汇总item_id -> 首次名称, 次数)。
consumption_log_totals: dict[str, tuple[str, int]] = field(default_factory=dict)
@dataclass
@@ -87,7 +94,7 @@ class RunningSurgery:
@dataclass
class ArchivedSurgery:
details: list[SurgeryConsumptionDetail]
details: list[SurgeryConsumptionStored]
def _rank_topk_for_candidates(
@@ -293,7 +300,7 @@ class CameraSessionManager:
async def _persist_archived_details(
self,
surgery_id: str,
details: list[SurgeryConsumptionDetail],
details: list[SurgeryConsumptionStored],
) -> bool:
if self._repo is None:
return True
@@ -331,6 +338,10 @@ class CameraSessionManager:
if isinstance(res, BaseException):
logger.warning("surgery task finished with error: {}", res)
totals = dict(run.state.consumption_log_totals)
append_consumption_log_summary(surgery_id, totals)
print_consumption_summary_markdown(totals)
details = list(run.state.details)
persisted = False
@@ -364,26 +375,13 @@ class CameraSessionManager:
rows = list(self._active[surgery_id].state.details)
if not rows:
return None
return rows
return [r.as_response() for r in rows]
def archived_consumption_fallback(self, surgery_id: str) -> list[SurgeryConsumptionDetail] | None:
arch = self._archive.get(surgery_id)
if arch is None:
return None
return list(arch.details)
def voice_status(self, surgery_id: str) -> dict[str, object] | None:
if surgery_id not in self._active:
return None
st = self._active[surgery_id].state
return {
"surgery_id": surgery_id,
"voice_enabled": bool(self._s.voice_confirmation_enabled),
"pending_queue_approx": len(st.pending_fifo),
"last_prompt_snippet": st.last_pending_prompt_snippet,
"last_asr_text": st.last_asr_text,
"last_error": st.last_voice_error,
}
return [r.as_response() for r in arch.details]
def record_voice_trace(
self,
@@ -525,10 +523,10 @@ class CameraSessionManager:
return
state.last_detail_monotonic[item_id] = now_m
state.details.append(
SurgeryConsumptionDetail(
SurgeryConsumptionStored(
item_id=item_id,
item_name=item_name,
quantity=1,
qty=1,
doctor_id=doctor_id,
timestamp=datetime.now(timezone.utc),
source=source,
@@ -698,6 +696,7 @@ class CameraSessionManager:
camera_id=camera_id,
wall_start_epoch=wall_lo,
wall_end_epoch=wall_hi,
running_totals=state.consumption_log_totals,
)
pending_preds.append(
cls_top3_to_prediction_result(best)

View File

@@ -11,7 +11,6 @@ from loguru import logger
from app.config import Settings
from app.services.voice_file_log import emit_voice_event
from app.database import AsyncSessionLocal
from app.db.models import VoiceConfirmationAudit
from app.repositories.voice_audits import VoiceAuditRepository
from app.services.audio_wav import WavDecodeError, wav_bytes_to_pcm16k_mono_s16le
from app.services.baidu_speech import BaiduSpeechNotConfiguredError, BaiduSpeechService
@@ -660,22 +659,6 @@ class VoiceConfirmationService:
message="已确认并记一条消耗。",
)
async def list_voice_audits_for_surgery(
self,
surgery_id: str,
*,
limit: int = 50,
offset: int = 0,
) -> tuple[list[VoiceConfirmationAudit], int]:
"""从 `voice_confirmation_audits` 表分页读取,供内部查询与报表。"""
async with AsyncSessionLocal() as session:
return await self._audits.list_by_surgery(
session,
surgery_id,
limit=limit,
offset=offset,
)
async def _persist_audit(
self,
*,