diff --git a/.env.example b/.env.example index ac52e4f..e96e24f 100644 --- a/.env.example +++ b/.env.example @@ -123,6 +123,20 @@ CONSUMABLE_CLASSIFIER_TOPK=5 # 短语音识别模型:固定普通话(默认 1537;勿用 1737 英语等)。代码会始终带上此 dev_pid。 # BAIDU_SPEECH_ASR_DEV_PID=1537 +# --- Baidu Face(可选;仅 `scripts/baidu_face_1n_search.py` 批量人脸 1:N 搜索;需在控制台创建应用并开通人脸识别)--- +# BAIDU_FACE_APP_ID= +# BAIDU_FACE_API_KEY= +# BAIDU_FACE_SECRET_KEY= +# 搜索的人脸组 id,逗号分隔,最多 10 个;未传命令行 --groups 时使用此项 +# 仅允许英文/数字/下划线(与控制台「用户组 id」一致),不能中文;否则 API 会报 222005 +# BAIDU_FACE_GROUP_ID_LIST=my_group_1 +# BAIDU_FACE_MAX_USER_NUM=1 +# BAIDU_FACE_MATCH_THRESHOLD=80 +# BAIDU_FACE_QUALITY_CONTROL=NONE +# BAIDU_FACE_LIVENESS_CONTROL=NONE +# BAIDU_FACE_CONNECTION_TIMEOUT_MS= +# BAIDU_FACE_SOCKET_TIMEOUT_MS= + # --- MinIO(语音 WAV 存桶;`docker-compose.dev.yml` 内已含 `minio` 服务;本机只跑 API 时填 127.0.0.1:9000)--- # docker compose -f docker-compose.dev.yml up -d minio # MINIO_ENDPOINT=127.0.0.1:9000 diff --git a/app/api.py b/app/api.py index 5ce7a64..1bf7fb5 100644 --- a/app/api.py +++ b/app/api.py @@ -2,8 +2,7 @@ import asyncio from collections.abc import Awaitable, Callable from typing import Annotated -from fastapi import APIRouter, Depends, File, HTTPException, Path, Query, UploadFile, status -from fastapi.responses import Response +from fastapi import APIRouter, Depends, File, HTTPException, Path, UploadFile, status from fastapi.responses import JSONResponse from loguru import logger from sqlalchemy.exc import SQLAlchemyError @@ -18,12 +17,8 @@ from app.schemas import ( SurgeryEndRequest, SurgeryPendingConfirmationResolveResponse, SurgeryPendingConfirmationResponse, - SurgeryPendingResolveTextRequest, SurgeryResultResponse, SurgeryStartRequest, - SurgeryVoiceAuditItem, - SurgeryVoiceAuditsListResponse, - SurgeryVoiceStatusResponse, build_consumption_summary, ) from app.services.surgery_pipeline import SurgeryPipeline @@ -251,7 +246,11 @@ async def end_surgery( "根据手术 6 位号查询该台手术的耗材消耗明细(多行)及按物品汇总。" "手术进行中返回当前内存已记账结果;结束后返回数据库持久化结果。" "若手术从未开始或尚无可查的最终归档,返回 503。" - "使用 GET:只读、幂等。" + "使用 GET:只读、幂等。\n\n" + "响应体 `details` 与 `summary` 的字段定义见模式 SurgeryConsumptionDetail / SurgeryConsumptionSummary;" + "若服务端启用耗材 TSV 文本日志,文件明细列为 tab 分隔的 " + "item_id、item_name、qty、doctor_id、timestamp(文末另有仅三列的汇总块 item_id、item_name、qty)," + "与 HTTP JSON 字段一致。" ), ) async def get_surgery_result( @@ -296,13 +295,22 @@ async def get_surgery_result( "description": "当前无待确认项或手术未在进行。", "model": SurgeryClientErrorResponse, }, + status.HTTP_422_UNPROCESSABLE_CONTENT: { + "description": "提示文本为空等导致无法合成播报。", + "model": SurgeryClientErrorResponse, + }, + status.HTTP_503_SERVICE_UNAVAILABLE: { + "description": "百度语音未配置或 TTS 调用失败。", + "model": SurgeryClientErrorResponse, + }, }, tags=["client"], - summary="拉取待确认耗材", + summary="拉取待确认耗材(含 TTS 音频)", description=( - "返回当前 FIFO 队首的一条低置信度识别。" - "客户端应播报 prompt_text 并由医生确认后调用 resolve 接口。" - "无待确认项时返回 404。" + "返回当前 FIFO 队首的一条低置信度识别;" + "响应内 `prompt_audio_mp3_base64` 为与 `prompt_text` 一致的 MP3(Base64),客户端可直接解码播放。" + "无待确认项时返回 404;合成失败或未配置语音服务时返回 422/503(见错误码)。" + "医生确认后请使用 `POST .../resolve` 上传 WAV。" ), ) async def get_pending_consumable_confirmation( @@ -317,7 +325,10 @@ async def get_pending_consumable_confirmation( ], pipeline: Annotated[SurgeryPipeline, Depends(get_surgery_pipeline)], ) -> SurgeryPendingConfirmationResponse: - payload = pipeline.get_pending_confirmation_for_client(surgery_id) + try: + payload = await pipeline.get_pending_confirmation_for_client(surgery_id) + except SurgeryPipelineError as exc: + _raise_confirmation_http(exc, surgery_id) if payload is None: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, @@ -344,7 +355,7 @@ async def get_pending_consumable_confirmation( description=( "multipart/form-data 上传单个 WAV 文件(字段名 `audio`)。" "服务端将音频存入 MinIO、调用百度 ASR 识别、解析候选项并完成确认。" - "记一条 source=voice 的消耗;若语音表示否认全部候选则不记消耗。" + "解析并确认后记一条消耗明细;若语音表示否认全部候选则不记消耗。" ), ) async def resolve_pending_consumable_confirmation( @@ -407,167 +418,3 @@ async def resolve_pending_consumable_confirmation( asr_text=result.asr_text, audio_object_key=result.audio_object_key, ) - - -@router.post( - "/client/surgeries/{surgery_id}/pending-confirmation/{confirmation_id}/resolve-text", - response_model=SurgeryPendingConfirmationResolveResponse, - responses={ - status.HTTP_404_NOT_FOUND: {"model": SurgeryClientErrorResponse}, - status.HTTP_409_CONFLICT: {"model": SurgeryClientErrorResponse}, - status.HTTP_422_UNPROCESSABLE_CONTENT: {"model": SurgeryClientErrorResponse}, - }, - tags=["client"], - summary="提交客户端语音识别文本以确认耗材", - description=( - "由浏览器 Web Speech 等本机 STT 得到的文本,不做 MinIO/百度 ASR;" - "候选项解析与上传 WAV 接口一致。" - ), -) -async def resolve_pending_consumable_confirmation_text( - surgery_id: Annotated[ - str, - Path( - min_length=6, - max_length=6, - pattern=r"^\d{6}$", - description="手术 6 位号,仅允许 6 位数字。", - ), - ], - confirmation_id: Annotated[str, Path(min_length=1, max_length=128)], - body: SurgeryPendingResolveTextRequest, - pipeline: Annotated[SurgeryPipeline, Depends(get_surgery_pipeline)], -) -> SurgeryPendingConfirmationResolveResponse: - try: - result = await pipeline.resolve_pending_confirmation_from_client_text( - surgery_id=surgery_id, - confirmation_id=confirmation_id, - recognized_text=body.recognized_text, - ) - except SurgeryPipelineError as exc: - _raise_confirmation_http(exc, surgery_id) - return SurgeryPendingConfirmationResolveResponse( - surgery_id=surgery_id, - confirmation_id=confirmation_id, - status="accepted", - message=result.message, - resolved_label=result.resolved_label, - rejected=result.rejected, - asr_text=result.asr_text, - audio_object_key=result.audio_object_key, - ) - - -@router.get( - "/client/surgeries/{surgery_id}/pending-confirmation/{confirmation_id}/prompt-audio", - responses={ - status.HTTP_404_NOT_FOUND: {"model": SurgeryClientErrorResponse}, - status.HTTP_422_UNPROCESSABLE_CONTENT: {"model": SurgeryClientErrorResponse}, - status.HTTP_503_SERVICE_UNAVAILABLE: {"model": SurgeryClientErrorResponse}, - }, - tags=["client"], - summary="待确认话术的 TTS 音频(MP3)", - description="使用百度在线合成,与 prompt_text 一致;供浏览器 MediaElement 直放。未配置百度语音时返回 503。", - response_class=Response, -) -async def get_pending_prompt_audio_mpeg( - surgery_id: Annotated[ - str, - Path( - min_length=6, - max_length=6, - pattern=r"^\d{6}$", - description="手术 6 位号,仅允许 6 位数字。", - ), - ], - confirmation_id: Annotated[str, Path(min_length=1, max_length=128)], - pipeline: Annotated[SurgeryPipeline, Depends(get_surgery_pipeline)], -) -> Response: - try: - data = await pipeline.get_pending_prompt_audio_mp3( - surgery_id=surgery_id, - confirmation_id=confirmation_id, - ) - except SurgeryPipelineError as exc: - _raise_confirmation_http(exc, surgery_id) - return Response( - content=data, - media_type="audio/mpeg", - headers={"Cache-Control": "no-store"}, - ) - - -@router.get( - "/internal/surgeries/{surgery_id}/voice-status", - response_model=SurgeryVoiceStatusResponse, - tags=["internal"], - summary="人工确认队列状态(联调)", - description="查询指定进行中手术的待确认队列长度与最近话术摘要。手术未在进行返回 404。", -) -async def get_surgery_voice_status( - surgery_id: Annotated[ - str, - Path( - min_length=6, - max_length=6, - pattern=r"^\d{6}$", - description="手术 6 位号,仅允许 6 位数字。", - ), - ], - pipeline: Annotated[SurgeryPipeline, Depends(get_surgery_pipeline)], -) -> SurgeryVoiceStatusResponse: - payload = pipeline.voice_status(surgery_id) - if payload is None: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail={ - "code": "SURGERY_NOT_ACTIVE", - "message": "该手术当前不在进行中,无实时语音状态。", - "surgery_id": surgery_id, - }, - ) - return SurgeryVoiceStatusResponse( - surgery_id=surgery_id, - voice_enabled=bool(payload["voice_enabled"]), - pending_queue_approx=int(payload["pending_queue_approx"]), - last_prompt_snippet=payload.get("last_prompt_snippet"), - last_asr_text=payload.get("last_asr_text"), - last_error=payload.get("last_error"), - ) - - -@router.get( - "/internal/surgeries/{surgery_id}/voice-audits", - response_model=SurgeryVoiceAuditsListResponse, - tags=["internal"], - summary="语音确认审计记录(按手术号分页)", - description=( - "查询持久化表 `voice_confirmation_audits`:ASR 文本、解析结果、" - "候选项快照、MinIO 对象键、失败原因等。用于追溯、对账与报表;" - "不区分手术是否仍进行中,只要库里有记录即返回。" - ), -) -async def get_surgery_voice_audits( - surgery_id: Annotated[ - str, - Path( - min_length=6, - max_length=6, - pattern=r"^\d{6}$", - description="手术 6 位号,仅允许 6 位数字。", - ), - ], - pipeline: Annotated[SurgeryPipeline, Depends(get_surgery_pipeline)], - limit: Annotated[int, Query(ge=1, le=200, description="每页条数。")] = 50, - offset: Annotated[int, Query(ge=0, description="跳过前若干条,供分页。")] = 0, -) -> SurgeryVoiceAuditsListResponse: - rows, total = await pipeline.list_voice_audits( - surgery_id, limit=limit, offset=offset - ) - return SurgeryVoiceAuditsListResponse( - surgery_id=surgery_id, - total=total, - limit=limit, - offset=offset, - items=[SurgeryVoiceAuditItem.model_validate(r) for r in rows], - ) diff --git a/app/config.py b/app/config.py index 7f44d87..158d5cc 100644 --- a/app/config.py +++ b/app/config.py @@ -101,7 +101,7 @@ class Settings(BaseSettings): video_result_doctor_id: str = "vision" #: 为 true 时,每次单帧分类得到 top1 等结果会打一条 INFO 日志(联调用;高流量时建议关)。 video_log_inference_results: bool = False - #: 为 true 时,将时间窗级识别写入文本日志(`start_surgery` 时按手术截断/初始化,窗内结果追加;Top2/3 仅名称;数量恒 1)。 + #: 为 true 时,将时间窗级识别写入文本日志(`start_surgery` 时按手术截断/初始化;每行 tab:item_id、item_name、qty、doctor_id、timestamp;停录后追加汇总块 item_id、item_name、qty)。 consumption_tsv_log_enabled: bool = True #: 路径模板,须含 `{surgery_id}`(每例手术独立文件)。不含占位时自动在扩展名前追加 `_`。 consumption_tsv_log_path: str = "logs/consumption_{surgery_id}.txt" diff --git a/app/repositories/surgery_results.py b/app/repositories/surgery_results.py index bc90b8e..d4ab07a 100644 --- a/app/repositories/surgery_results.py +++ b/app/repositories/surgery_results.py @@ -6,7 +6,7 @@ from sqlalchemy import delete, select from sqlalchemy.ext.asyncio import AsyncSession from app.db.models import SurgeryFinalResult, SurgeryResultDetailRow -from app.schemas import SurgeryConsumptionDetail +from app.schemas import SurgeryConsumptionDetail, SurgeryConsumptionStored class SurgeryResultRepository: @@ -17,7 +17,7 @@ class SurgeryResultRepository: session: AsyncSession, *, surgery_id: str, - details: list[SurgeryConsumptionDetail], + details: list[SurgeryConsumptionStored], completed_at: datetime | None = None, ) -> None: when = completed_at or datetime.now(timezone.utc) @@ -37,7 +37,7 @@ class SurgeryResultRepository: surgery_id=surgery_id, item_id=d.item_id, item_name=d.item_name, - quantity=d.quantity, + quantity=d.qty, doctor_id=d.doctor_id, recorded_at=d.timestamp, source=d.source, @@ -64,10 +64,9 @@ class SurgeryResultRepository: SurgeryConsumptionDetail( item_id=r.item_id, item_name=r.item_name, - quantity=r.quantity, + qty=r.quantity, doctor_id=r.doctor_id, timestamp=r.recorded_at, - source=r.source, ) for r in rows ] diff --git a/app/schemas.py b/app/schemas.py index de78a35..18c17e5 100644 --- a/app/schemas.py +++ b/app/schemas.py @@ -1,5 +1,6 @@ from __future__ import annotations +from dataclasses import dataclass from datetime import datetime from pydantic import BaseModel, ConfigDict, Field @@ -75,24 +76,72 @@ class SurgeryClientErrorResponse(BaseModel): class SurgeryConsumptionDetail(BaseModel): - """单条消耗明细(按事件发生,可能多行)。""" + """单条消耗明细(HTTP 与 OpenAPI;按事件发生,可能多行)。 - item_id: str = Field(description="物品 ID。") - item_name: str = Field(description="物品名称。") - quantity: int = Field(ge=0, description="本条记录对应的消耗数量。") - doctor_id: str = Field(description="医生 ID。") - timestamp: datetime = Field(description="记录时间(ISO 8601)。") - source: str = Field( - default="vision", - description="记录来源:vision 自动识别;voice 语音确认。", + JSON 字段顺序:item_id → item_name → qty → doctor_id → timestamp。 + (可选落盘耗材 TSV 明细列与此一致:item_id、item_name、qty、doctor_id、timestamp。) + """ + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "item_id": "HC001", + "item_name": "纱布", + "qty": 1, + "doctor_id": "D1001", + "timestamp": "2026-04-21T10:30:00+08:00", + } + } ) + item_id: str = Field( + description=( + "业务物品标识:优先为耗材目录中的产品编码;" + "目录键经名称归一化后与分类类名匹配,未命中目录时与模型输出类名一致。" + ), + ) + item_name: str = Field(description="物品名称(分类或确认后的展示名)。") + qty: int = Field(ge=0, description="本条记录对应的消耗数量。") + doctor_id: str = Field(description="医生 ID。") + timestamp: datetime = Field(description="记录时间(ISO 8601,date-time)。") + + +@dataclass +class SurgeryConsumptionStored: + """内存 / 数据库持久化用的明细行(含 source,仅服务端内部使用,不随 HTTP 返回)。""" + + item_id: str + item_name: str + qty: int + doctor_id: str + timestamp: datetime + source: str = "vision" + + def as_response(self) -> SurgeryConsumptionDetail: + return SurgeryConsumptionDetail( + item_id=self.item_id, + item_name=self.item_name, + qty=self.qty, + doctor_id=self.doctor_id, + timestamp=self.timestamp, + ) + class SurgeryConsumptionSummary(BaseModel): - """按物品汇总:该手术下该物品消耗数量合计。""" + """按物品汇总:该手术下该物品消耗数量合计(item_id、item_name、total_quantity)。""" - item_id: str = Field(description="物品 ID。") - item_name: str = Field(description="物品名称。") + model_config = ConfigDict( + json_schema_extra={ + "example": { + "item_id": "HC001", + "item_name": "纱布", + "total_quantity": 3, + } + } + ) + + item_id: str = Field(description="物品 ID,与明细中 item_id 一致。") + item_name: str = Field(description="物品名称,取该物品首条明细中的名称。") total_quantity: int = Field(ge=0, description="该物品在本台手术中的消耗数量合计。") @@ -105,7 +154,7 @@ def build_consumption_summary( if row.item_id not in totals: totals[row.item_id] = (row.item_name, 0) name, acc = totals[row.item_id] - totals[row.item_id] = (name, acc + row.quantity) + totals[row.item_id] = (name, acc + row.qty) return [ SurgeryConsumptionSummary( item_id=iid, @@ -116,73 +165,6 @@ def build_consumption_summary( ] -class SurgeryVoiceStatusResponse(BaseModel): - """手术进行中人工确认(客户端播报)联调状态。""" - - surgery_id: str = Field(description="手术 6 位号。") - voice_enabled: bool = Field( - description="是否启用了低置信度人工确认(客户端拉取待确认项)。", - ) - pending_queue_approx: int = Field( - ge=0, - description="待医生确认的追问任务数量(FIFO 队列长度)。", - ) - last_prompt_snippet: str | None = Field( - default=None, - description="最近一次生成的待确认话术摘要。", - ) - last_asr_text: str | None = Field( - default=None, - description="最近一次语音确认接口产生的 ASR 文本。", - ) - last_error: str | None = Field( - default=None, - description="最近一次语音确认错误说明(如 ASR/解析失败)。", - ) - - -class SurgeryVoiceAuditItem(BaseModel): - """单条 `voice_confirmation_audits` 行(追溯对账用)。""" - - id: int - confirmation_id: str - status: str = Field( - description=( - "recognized / rejected / parse_failed / asr_failed / invalid_audio / " - "upload_failed / client_stt_empty / client_stt_parse_failed 等" - ), - ) - audio_object_key: str | None = None - audio_content_type: str | None = None - audio_size_bytes: int | None = None - audio_sha256: str | None = None - asr_text: str | None = None - resolved_label: str | None = None - options_snapshot_json: str | None = Field( - default=None, - description="当次候选项与置信度 JSON 快照。", - ) - error_message: str | None = None - created_at: datetime = Field( - description="记录写入时间(UTC)。", - ) - - model_config = ConfigDict(from_attributes=True) - - -class SurgeryVoiceAuditsListResponse(BaseModel): - """按手术号分页的语音确认审计列表。""" - - surgery_id: str - total: int = Field(ge=0, description="该手术在表中的总条数(不受本页 limit 截断)。") - limit: int = Field(ge=1, le=200) - offset: int = Field(ge=0) - items: list[SurgeryVoiceAuditItem] = Field( - default_factory=list, - description="按 `created_at` 降序。", - ) - - class PendingConfirmationOption(BaseModel): label: str confidence: float @@ -193,23 +175,19 @@ class SurgeryPendingConfirmationResponse(BaseModel): surgery_id: str confirmation_id: str - prompt_text: str = Field(description="可直接用于 TTS 播报的话术。") + prompt_text: str = Field(description="可直接用于展示或无障碍朗读的话术(与 MP3 内容一致)。") + prompt_audio_mp3_base64: str = Field( + description=( + "与 prompt_text 对应的百度 TTS 音频(MP3)的标准 Base64 字符串(无换行);" + "客户端解码为二进制后以 audio/mpeg 播放。" + ), + ) options: list[PendingConfirmationOption] model_top1_label: str = Field(description="模型原始 Top1 标签(可能不在候选清单内)。") model_top1_confidence: float created_at: datetime -class SurgeryPendingResolveTextRequest(BaseModel): - """由浏览器 Web Speech 等客户端本地识别后提交的文本,语义与经百度 ASR 得到的文本相同。""" - - recognized_text: str = Field( - min_length=1, - max_length=2000, - description="识别文本;服务端用与语音接口相同的规则解析候选项。", - ) - - class SurgeryPendingConfirmationResolveResponse(BaseModel): surgery_id: str confirmation_id: str @@ -244,21 +222,21 @@ class SurgeryResultResponse(BaseModel): { "item_id": "HC001", "item_name": "纱布", - "quantity": 2, + "qty": 2, "doctor_id": "D1001", "timestamp": "2026-04-21T10:30:00+08:00", }, { "item_id": "HC001", "item_name": "纱布", - "quantity": 1, + "qty": 1, "doctor_id": "D1002", "timestamp": "2026-04-21T11:05:00+08:00", }, { "item_id": "HC002", "item_name": "缝线", - "quantity": 1, + "qty": 1, "doctor_id": "D1001", "timestamp": "2026-04-21T10:45:00+08:00", }, @@ -276,9 +254,15 @@ class SurgeryResultResponse(BaseModel): message: str = Field(description="返回说明。") details: list[SurgeryConsumptionDetail] = Field( default_factory=list, - description="消耗明细行:每条含物品、数量、医生与时间;同一物品可多次出现。", + description=( + "消耗明细(多行)。每行字段顺序:item_id、item_name、qty、doctor_id、timestamp;" + "同一 item_id 可多次出现。" + ), ) summary: list[SurgeryConsumptionSummary] = Field( default_factory=list, - description="按物品汇总的消耗合计,应与 details 按 item_id 汇总一致。", + description=( + "按 item_id 汇总的合计表(仅 item_id、item_name、total_quantity)," + "应与 details 按 item_id 汇总 qty 一致。" + ), ) diff --git a/app/services/consumable_vision_algorithm.py b/app/services/consumable_vision_algorithm.py index 42676b5..52997d8 100644 --- a/app/services/consumable_vision_algorithm.py +++ b/app/services/consumable_vision_algorithm.py @@ -219,6 +219,13 @@ def cls_top3_from_result( n3 = str(cls.names.get(int(t5i[2]), "")).strip() c3 = _ci(2) + def _pid(label: str) -> str: + lb = (label or "").strip() + if not lb: + return "" + norm = _norm_product_name(lb) + return (name_to_code.get(norm) or name_to_code.get(lb) or "").strip() + return ClsTop3( t1_name=n1, t1_conf=c1, @@ -226,9 +233,9 @@ def cls_top3_from_result( t2_conf=c2, t3_name=n3, t3_conf=c3, - t1_pid=name_to_code.get(n1, ""), - t2_pid=name_to_code.get(n2, ""), - t3_pid=name_to_code.get(n3, ""), + t1_pid=_pid(n1), + t2_pid=_pid(n2), + t3_pid=_pid(n3), ) diff --git a/app/services/consumption_tsv_log.py b/app/services/consumption_tsv_log.py index 8de4ca0..6546abf 100644 --- a/app/services/consumption_tsv_log.py +++ b/app/services/consumption_tsv_log.py @@ -1,4 +1,6 @@ -"""每例手术一个文本文件(制表符列):`start_surgery` 时截断并写表头,每次时间窗识别**追加**一行。终端 Markdown 中时间戳为可读形式;落盘行内仍为 ISO 便于程序解析。 +"""每例手术一个文本文件(制表符列):`start_surgery` 时截断并写表头,每次时间窗识别**追加**一行(仅 item_id, item_name, qty, doctor_id, timestamp)。终端 Markdown 时间戳为可读形式;落盘时间戳为 ISO 区间便于程序解析。 + +手术结束时再追加一节汇总行:item_id, item_name, qty(无其它列)。 时间戳:在拉流起点记录 `time.time()`,与 `time.monotonic()` 时间窗对齐。直播 RTSP 经 OpenCV 一般无可靠绝对时码,以本机接收时刻为准。 """ @@ -14,11 +16,12 @@ from zoneinfo import ZoneInfo, ZoneInfoNotFoundError from loguru import logger from app.config import settings -from app.services.consumable_vision_algorithm import ClsTop3 +from app.services.consumable_vision_algorithm import ClsTop3, _norm_product_name from app.terminal_markdown import print_markdown_stderr -# 制表符分隔;时间范围用 U+2013 连接;Top2/3 仅名称;本窗消耗数量恒为 1 -HEADER = "物品id\t物品名称\tTop2物品名称\tTop3物品名称\t消耗数量\t医生id\t时间戳\n" +# 制表符分隔;时间范围用 U+2013 连接;本窗消耗数量恒为 1 +HEADER = "item_id\titem_name\tqty\tdoctor_id\ttimestamp\n" +SUMMARY_HEADER = "item_id\titem_name\tqty\n" _RANGE_SEP = "\u2013" # en dash,与样例 `00:00:00.000–00:00:45.000` 一致 _lock = threading.Lock() @@ -83,13 +86,20 @@ def _encode_cell(value: str) -> str: return s -def _item_id_for_row(name: str, pid: str, name_to_code: dict[str, str]) -> str: - p = (pid or "").strip() +def resolve_consumption_item_id( + t1_name: str, + t1_pid: str, + name_to_code: dict[str, str], +) -> str: + """业务物品 id:`name_to_code` 的键为归一化名称,须与分类输出一同参与查找。""" + n = (t1_name or "").strip() + norm = _norm_product_name(n) + code = (name_to_code.get(norm) or name_to_code.get(n) or "").strip() + if code: + return code + p = (t1_pid or "").strip() if p: return p - n = (name or "").strip() - if n in name_to_code: - return (name_to_code.get(n) or n).strip() return n @@ -102,17 +112,12 @@ def build_tsv_line( wall_start_epoch: float, wall_end_epoch: float, ) -> str: - id1 = _item_id_for_row(best.t1_name, best.t1_pid, name_to_code) - # 与历史样例:Top1 为「名称 置信度」四位小数 - name1 = f"{(best.t1_name or '').strip()} {best.t1_conf:.4f}".strip() - n2 = (best.t2_name or "").strip() - n3 = (best.t3_name or "").strip() + id1 = resolve_consumption_item_id(best.t1_name, best.t1_pid, name_to_code) + name1 = (best.t1_name or "").strip() ts = format_consumption_timestamp(camera_id, wall_start_epoch, wall_end_epoch) row = [ _encode_cell(id1), _encode_cell(name1), - _encode_cell(n2), - _encode_cell(n3), "1", _encode_cell(doctor_id), _encode_cell(ts), @@ -179,25 +184,17 @@ def build_consumption_markdown( wall_start_epoch: float, wall_end_epoch: float, ) -> str: - """终端用:Top1 含 id/名称/置信度;Top2/3 仅名称;消耗数量恒为 1。""" - id1 = _item_id_for_row(best.t1_name, best.t1_pid, name_to_code) + """终端用:与落盘列一致;本窗 qty 恒为 1。""" + id1 = resolve_consumption_item_id(best.t1_name, best.t1_pid, name_to_code) n1 = (best.t1_name or "").strip() - has2 = bool((best.t2_name or "").strip()) - has3 = bool((best.t3_name or "").strip()) - n2 = (best.t2_name or "").strip() if has2 else "" - n3 = (best.t3_name or "").strip() if has3 else "" - dash = "—" ts = format_consumption_timestamp_readable(camera_id, wall_start_epoch, wall_end_epoch) return "\n".join( [ - "| Top1 物品id | Top1 物品名称 | Top1 置信度 | Top2 物品名称 | Top3 物品名称 | 消耗数量 | 医生id | 时间戳 |", - "| :--- | :--- | ---: | :--- | :--- | ---: | :--- | :--- |", - "| {} | {} | {:.4f} | {} | {} | 1 | {} | {} |".format( + "| item_id | item_name | qty | doctor_id | timestamp |", + "| :--- | :--- | ---: | :--- | :--- |", + "| {} | {} | 1 | {} | {} |".format( _md_cell(id1), _md_cell(n1), - best.t1_conf, - _md_cell(n2) if has2 else dash, - _md_cell(n3) if has3 else dash, _md_cell(doctor_id), _md_cell(ts), ), @@ -206,6 +203,47 @@ def build_consumption_markdown( ) +def append_consumption_log_summary( + surgery_id: str, + totals: dict[str, tuple[str, int]], +) -> None: + """在明细行之后追加汇总块(表头 + 每物品一行)。""" + if not settings.consumption_tsv_log_enabled or not totals: + return + path = resolved_consumption_log_path(surgery_id) + if not path.is_file(): + return + body = "".join( + ["\n", SUMMARY_HEADER] + + [ + "\t".join([_encode_cell(iid), _encode_cell(name), str(qty)]) + "\n" + for iid, (name, qty) in sorted(totals.items(), key=lambda x: x[0]) + ] + ) + with _lock: + with path.open("a", encoding="utf-8") as f: + f.write(body) + + +def print_consumption_summary_markdown( + totals: dict[str, tuple[str, int]], +) -> None: + if not settings.consumption_log_markdown_terminal or not totals: + return + lines = [ + "## 消耗汇总", + "", + "| item_id | item_name | qty |", + "| :--- | :--- | ---: |", + ] + for iid, (name, qty) in sorted(totals.items(), key=lambda x: x[0]): + lines.append( + "| {} | {} | {} |".format(_md_cell(iid), _md_cell(name), qty) + ) + lines.append("") + print_markdown_stderr("\n".join(lines)) + + def append_consumption_window( *, surgery_id: str, @@ -215,9 +253,17 @@ def append_consumption_window( camera_id: str, wall_start_epoch: float, wall_end_epoch: float, + running_totals: dict[str, tuple[str, int]] | None = None, ) -> None: if not settings.consumption_tsv_log_enabled and not settings.consumption_log_markdown_terminal: return + iid = resolve_consumption_item_id(best.t1_name, best.t1_pid, name_to_code) + iname = (best.t1_name or "").strip() + if running_totals is not None: + if iid not in running_totals: + running_totals[iid] = (iname, 0) + prev_name, q = running_totals[iid] + running_totals[iid] = (prev_name, q + 1) if settings.consumption_tsv_log_enabled: line = build_tsv_line( name_to_code=name_to_code, diff --git a/app/services/surgery_pipeline.py b/app/services/surgery_pipeline.py index e37e4f4..f45ebf7 100644 --- a/app/services/surgery_pipeline.py +++ b/app/services/surgery_pipeline.py @@ -2,6 +2,8 @@ from __future__ import annotations +import base64 + from app.database import AsyncSessionLocal from app.repositories.surgery_results import SurgeryResultRepository from app.schemas import ( @@ -78,31 +80,22 @@ class SurgeryPipeline: return persisted return self._sessions.archived_consumption_fallback(surgery_id) - def voice_status(self, surgery_id: str) -> dict[str, object] | None: - return self._sessions.voice_status(surgery_id) - - async def list_voice_audits( - self, - surgery_id: str, - *, - limit: int = 50, - offset: int = 0, - ): - """持久化表 `voice_confirmation_audits` 分页,用于追溯/对账/报表。""" - return await self._voice.list_voice_audits_for_surgery( - surgery_id, limit=limit, offset=offset - ) - - def get_pending_confirmation_for_client( + async def get_pending_confirmation_for_client( self, surgery_id: str ) -> SurgeryPendingConfirmationResponse | None: pending = self._sessions.next_pending_confirmation(surgery_id) if pending is None: return None + mp3 = await run_in_threadpool( + self._voice.synthesize_prompt_to_mp3, + pending.prompt_text, + ) + b64 = base64.b64encode(mp3).decode("ascii") return SurgeryPendingConfirmationResponse( surgery_id=surgery_id, confirmation_id=pending.id, prompt_text=pending.prompt_text, + prompt_audio_mp3_base64=b64, options=[ PendingConfirmationOption(label=a, confidence=b) for a, b in pending.options @@ -129,34 +122,3 @@ class SurgeryPipeline: content_type=content_type, ) - async def resolve_pending_confirmation_from_client_text( - self, - surgery_id: str, - confirmation_id: str, - recognized_text: str, - ) -> VoiceResolveResult: - """浏览器等客户端本机识别后的文本,解析规则与 WAV 路径一致(无需 MinIO/百度)。""" - return await self._voice.resolve_from_recognized_text( - surgery_id=surgery_id, - confirmation_id=confirmation_id, - recognized_text=recognized_text, - ) - - async def get_pending_prompt_audio_mp3( - self, - surgery_id: str, - confirmation_id: str, - ) -> bytes: - """待确认 `prompt_text` 的百度 TTS MP3,供模拟客户端用 Audio 直放。""" - pending = self._sessions.get_pending_confirmation_by_id( - surgery_id, confirmation_id - ) - if pending is None or pending.status != "pending": - raise SurgeryPipelineError( - "CONFIRMATION_NOT_FOUND", - "未找到该待确认项或已处理。", - ) - return await run_in_threadpool( - self._voice.synthesize_prompt_to_mp3, - pending.prompt_text, - ) diff --git a/app/services/video/session_manager.py b/app/services/video/session_manager.py index e2d5ae4..50ca0d3 100644 --- a/app/services/video/session_manager.py +++ b/app/services/video/session_manager.py @@ -12,7 +12,7 @@ from loguru import logger from app.config import Settings from app.database import AsyncSessionLocal from app.repositories.surgery_results import SurgeryResultRepository -from app.schemas import SurgeryConsumptionDetail +from app.schemas import SurgeryConsumptionDetail, SurgeryConsumptionStored from app.services.consumable_vision_algorithm import ( ClsTop3, ConsumableVisionAlgorithmService, @@ -26,7 +26,12 @@ from app.services.video.backend_resolver import BackendResolver from app.services.video.hikvision_runtime import HikvisionInitRefCount, HikvisionRuntime from app.services.video.rtsp_capture import RtspCapture from app.services.video.types import VideoBackendKind -from app.services.consumption_tsv_log import append_consumption_window, init_consumption_log_file +from app.services.consumption_tsv_log import ( + append_consumption_log_summary, + append_consumption_window, + init_consumption_log_file, + print_consumption_summary_markdown, +) from app.services.voice_file_log import init_voice_log_file from app.services.voice_confirm import build_prompt_text from app.surgery_errors import SurgeryPipelineError @@ -64,7 +69,7 @@ class SurgerySessionState: #: 分类类名(归一化) -> 业务物品 id(Excel 产品编码或名称)。 name_to_code: dict[str, str] = field(default_factory=dict) camera_infer: dict[str, CameraStreamInferState] = field(default_factory=dict) - details: list[SurgeryConsumptionDetail] = field(default_factory=list) + details: list[SurgeryConsumptionStored] = field(default_factory=list) lock: asyncio.Lock = field(default_factory=asyncio.Lock) ready: asyncio.Event = field(default_factory=asyncio.Event) last_detail_monotonic: dict[str, float] = field(default_factory=dict) @@ -76,6 +81,8 @@ class SurgerySessionState: last_asr_text: str | None = None #: 最近一次语音确认错误说明(ASR/解析失败等)。 last_voice_error: str | None = None + #: 视觉时间窗落盘用量累计,供停录时写汇总(item_id -> 首次名称, 次数)。 + consumption_log_totals: dict[str, tuple[str, int]] = field(default_factory=dict) @dataclass @@ -87,7 +94,7 @@ class RunningSurgery: @dataclass class ArchivedSurgery: - details: list[SurgeryConsumptionDetail] + details: list[SurgeryConsumptionStored] def _rank_topk_for_candidates( @@ -293,7 +300,7 @@ class CameraSessionManager: async def _persist_archived_details( self, surgery_id: str, - details: list[SurgeryConsumptionDetail], + details: list[SurgeryConsumptionStored], ) -> bool: if self._repo is None: return True @@ -331,6 +338,10 @@ class CameraSessionManager: if isinstance(res, BaseException): logger.warning("surgery task finished with error: {}", res) + totals = dict(run.state.consumption_log_totals) + append_consumption_log_summary(surgery_id, totals) + print_consumption_summary_markdown(totals) + details = list(run.state.details) persisted = False @@ -364,26 +375,13 @@ class CameraSessionManager: rows = list(self._active[surgery_id].state.details) if not rows: return None - return rows + return [r.as_response() for r in rows] def archived_consumption_fallback(self, surgery_id: str) -> list[SurgeryConsumptionDetail] | None: arch = self._archive.get(surgery_id) if arch is None: return None - return list(arch.details) - - def voice_status(self, surgery_id: str) -> dict[str, object] | None: - if surgery_id not in self._active: - return None - st = self._active[surgery_id].state - return { - "surgery_id": surgery_id, - "voice_enabled": bool(self._s.voice_confirmation_enabled), - "pending_queue_approx": len(st.pending_fifo), - "last_prompt_snippet": st.last_pending_prompt_snippet, - "last_asr_text": st.last_asr_text, - "last_error": st.last_voice_error, - } + return [r.as_response() for r in arch.details] def record_voice_trace( self, @@ -525,10 +523,10 @@ class CameraSessionManager: return state.last_detail_monotonic[item_id] = now_m state.details.append( - SurgeryConsumptionDetail( + SurgeryConsumptionStored( item_id=item_id, item_name=item_name, - quantity=1, + qty=1, doctor_id=doctor_id, timestamp=datetime.now(timezone.utc), source=source, @@ -698,6 +696,7 @@ class CameraSessionManager: camera_id=camera_id, wall_start_epoch=wall_lo, wall_end_epoch=wall_hi, + running_totals=state.consumption_log_totals, ) pending_preds.append( cls_top3_to_prediction_result(best) diff --git a/app/services/voice_resolution.py b/app/services/voice_resolution.py index 9391d81..261b183 100644 --- a/app/services/voice_resolution.py +++ b/app/services/voice_resolution.py @@ -11,7 +11,6 @@ from loguru import logger from app.config import Settings from app.services.voice_file_log import emit_voice_event from app.database import AsyncSessionLocal -from app.db.models import VoiceConfirmationAudit from app.repositories.voice_audits import VoiceAuditRepository from app.services.audio_wav import WavDecodeError, wav_bytes_to_pcm16k_mono_s16le from app.services.baidu_speech import BaiduSpeechNotConfiguredError, BaiduSpeechService @@ -660,22 +659,6 @@ class VoiceConfirmationService: message="已确认并记一条消耗。", ) - async def list_voice_audits_for_surgery( - self, - surgery_id: str, - *, - limit: int = 50, - offset: int = 0, - ) -> tuple[list[VoiceConfirmationAudit], int]: - """从 `voice_confirmation_audits` 表分页读取,供内部查询与报表。""" - async with AsyncSessionLocal() as session: - return await self._audits.list_by_surgery( - session, - surgery_id, - limit=limit, - offset=offset, - ) - async def _persist_audit( self, *, diff --git a/docs/client-api-integration.md b/docs/client-api-integration.md new file mode 100644 index 0000000..0fbd903 --- /dev/null +++ b/docs/client-api-integration.md @@ -0,0 +1,213 @@ +# 手术室监控服务 — 客户端 HTTP API 对接说明 + +本文档面向**集成我方 FastAPI 后端的客户系统**(HIS、手麻、护理工作站、自研终端等),说明如何通过 HTTP 调用完成「手术开始 / 结束」「耗材结果查询」「低置信度耗材语音确认」等能力。 + +> **说明**:仓库内 `scripts/demo_client/` 等演示页面仅用于**内部联调与测试**,不代表生产对接规范;生产环境请按本文档与 OpenAPI 契约自行实现客户端。 + +--- + +## 1. 服务与发现 + + +| 项目 | 说明 | +| ------- | ------------------------------------------------------------- | +| 默认监听 | 应用默认 `0.0.0.0:38080`(以实际部署为准,可能经反向代理改写路径或端口) | +| 基础路径 | 路由**无全局前缀**;下文路径均为相对服务根路径 | +| OpenAPI | 服务启动后可访问 `**/docs`(Swagger UI)**、`**/redoc`** 获取实时 Schema 与试调 | +| 健康检查 | `GET /health`:探活与数据库连通性(降级时可能返回 503) | +| 跨域 CORS | 仅当服务端开启演示用 CORS 配置时对浏览器页面生效;**服务端对接通常不受 CORS 限制** | + + +认证方式以部署约定为准;当前公开路由未在文档层强制 API Key,若贵方环境增加了网关鉴权,请在请求头中按网关要求携带。 + +--- + +## 2. 客户端 API 一览(`/client/...`) + +所有「客户集成」接口均位于 `**/client`** 命名空间下。 + + +| 方法 | 路径 | 摘要 | +| ------ | ------------------------------------------------------------------------------- | ------------------------- | +| `POST` | `/client/surgeries/start` | 开始手术:确认摄像头开录成功后返回 | +| `POST` | `/client/surgeries/end` | 结束手术:确认停录成功后返回 | +| `GET` | `/client/surgeries/{surgery_id}/result` | 查询该台手术的耗材明细与汇总 | +| `GET` | `/client/surgeries/{surgery_id}/pending-confirmation` | 拉取一条待确认项(含 TTS 音频 Base64) | +| `POST` | `/client/surgeries/{surgery_id}/pending-confirmation/{confirmation_id}/resolve` | 上传医生语音 WAV,完成确认或否认 | + + +路径参数 `**surgery_id`**:固定 **6 位数字**(正则 `^\d{6}$`)。 + +--- + +## 3. 端到端业务流程(推荐时序) + +以下为客户系统与手术室监控服务之间的**推荐调用顺序**与并行关系。 + +```mermaid +sequenceDiagram + participant Client as 客户系统 + participant API as 监控服务 API + + Client->>API: POST /client/surgeries/start + Note over Client,API: body: surgery_id, camera_ids, candidate_consumables + API-->>Client: 200 accepted(开录已确认) + + par 术中轮询 + loop 按需轮询 + Client->>API: GET .../result + API-->>Client: 200 明细+汇总 或 503 RESULT_NOT_READY + end + loop 语音确认闭环(若启用) + Client->>API: GET .../pending-confirmation + API-->>Client: 200 待确认+MP3 或 404 无待确认 + opt 有待确认 + Client->>Client: 播放 prompt_audio_mp3_base64 + Client->>API: POST .../resolve(multipart audio) + API-->>Client: 200 accepted + end + end + end + + Client->>API: POST /client/surgeries/end + API-->>Client: 200 accepted(停录已确认) + + Client->>API: GET .../result + API-->>Client: 200 归档后持久化结果(若可查) +``` + + + +### 3.1 手术生命周期(状态视角) + +```mermaid +flowchart LR + A[未开始] -->|POST start 成功| B[录制中 / 推理中] + B -->|GET result 可 200| C[可查消耗] + B -->|GET pending 可 200| D[有待确认] + D -->|POST resolve| B + B -->|POST end 成功| E[已结束] + E -->|GET result| C +``` + + + +--- + +## 4. 接口说明与请求/响应要点 + +### 4.1 `POST /client/surgeries/start` + +**Content-Type**:`application/json` + +**请求体(摘要)** + + +| 字段 | 类型 | 说明 | +| ----------------------- | -------- | ------------------------------------------------------------- | +| `surgery_id` | string | 6 位数字手术号 | +| `camera_ids` | string[] | 至少 1 个;需与贵方 RTSP/SDK 映射中的 **摄像头 ID** 一致 | +| `candidate_consumables` | string[] | 可选;**本台手术可能用到的耗材名称清单**。服务端仅对该清单内耗材做自动记账与待确认追问;为空则只做拉流推理、不写入消耗 | + + +**成功(200)**:`SurgeryApiResponse` — `status` 一般为 `accepted`,表示**服务端已确认开录完成**。 + +**失败**:常见 `**503 Service Unavailable`**,`detail` 内含业务 `code`(如录制子系统未就绪、开录未确认等)。开录/停录类错误会按服务端配置**自动重试**;仍失败则返回最后一次错误信息。 + +### 4.2 `POST /client/surgeries/end` + +**Content-Type**:`application/json` + +**请求体**:`{ "surgery_id": "123456" }` + +**成功(200)**:停录已确认。 + +**失败**:`503` 同 start,表示未能在确认摄像头全部停录后完成请求。 + +### 4.3 `GET /client/surgeries/{surgery_id}/result` + +**幂等只读**。术中返回当前内存已记账结果;结束后返回数据库持久化结果(以服务端实现为准)。 + +**成功(200)**:`SurgeryResultResponse` + +- `details`:多行消耗明细,字段顺序为 `**item_id` → `item_name` → `qty` → `doctor_id` → `timestamp`** +- `summary`:按 `item_id` 汇总的 `total_quantity` + +`**503`**:`detail.code === "RESULT_NOT_READY"` — 尚无该手术可查询结果(未开始、未成功开录或暂无可返回数据)。 + +### 4.4 `GET /client/surgeries/{surgery_id}/pending-confirmation` + +用于**低置信度识别**的人工确认闭环(需服务端启用语音确认及相关配置)。 + +**成功(200)**:包含 `confirmation_id`、`prompt_text`、候选项 `options`、`prompt_audio_mp3_base64`(与话术一致的 **MP3 标准 Base64**,无换行)、模型 Top1 等。 + +`**404`**:当前无待确认或手术未在进行(`NO_PENDING_CONFIRMATION`)。 + +`**422` / `503`**:话术为空、音频无效、ASR/TTS/MinIO/百度语音等异常时返回,详见响应 `detail.code`。 + +### 4.5 `POST /client/surgeries/{surgery_id}/pending-confirmation/{confirmation_id}/resolve` + +**Content-Type**:`multipart/form-data` + +**表单字段** + + +| 字段 | 说明 | +| ------- | -------------------------------------------------------- | +| `audio` | 单个 `**.wav`** 文件;建议 16 kHz 单声道 PCM;其他格式服务端可能尝试 ffmpeg 转码 | + + +**成功(200)**:`SurgeryPendingConfirmationResolveResponse` — 含 `resolved_label`、`rejected`、`asr_text`、`audio_object_key` 等。 + +**常见 HTTP 状态**:`404`(确认项不存在或已失效)、`409`(已处理)、`422`(音频/解析问题)、`503`(外部依赖故障)。 + +--- + +## 5. 错误约定 + +FastAPI 对 `HTTPException` 的 JSON 外形一般为: + +```json +{ + "detail": { + "code": "业务错误码", + "message": "人类可读说明", + "surgery_id": "123456" + } +} +``` + +部分错误可能在 `detail` 中附带额外字段(如重试剩余次数),请以实际响应与 OpenAPI 为准。 + +**校验错误(422)**:请求体或路径不符合 Pydantic 校验时,FastAPI 可能返回标准 `422` 验证错误结构(与上表「对象型 detail」不同),请客户端分别解析。 + +--- + +## 6. 与内部演示能力的关系(非客户必选) + +以下路由用于**内部一键联调**,客户生产系统**无需依赖**: + + +| 路径 | 说明 | +| ------------------------------------------- | -------------------------------------------------- | +| `GET /internal/demo/orchestrator-status` | 探测演示编排是否开启、RTSP 配置文件是否设置等 | +| `POST /internal/demo/orchestrate-and-start` | 仅在服务端开启 `DEMO_ORCHESTRATOR_ENABLED` 时注册;用于演示环境串联开录 | + + +客户正式对接应直接调用 `**/client/...`**,并在贵方环境中配置真实的 `camera_ids` 与视频后端映射。 + +--- + +## 7. 联调建议 + +1. 先调用 `**GET /health`** 确认服务与数据库可用。 +2. 用 `**POST /client/surgeries/start`** 验证 `camera_ids` 与现场 RTSP/SDK 配置一致,避免 503。 +3. `**candidate_consumables**` 与实际手术耗材名称尽量与院内目录或模型标签对齐,减少待确认次数。 +4. 结果查询 `**503**` 时建议**退避重试**(术中数据尚未就绪属正常现象)。 +5. 以 `**/docs`** 导出或对照契约测试,与贵方 CI 中的契约测试对齐。 + +--- + +## 8. 文档修订 + +接口行为以部署实例的 **OpenAPI(`/docs`)** 与代码为准;字段含义补充见仓库内 `app/schemas.py` 中各模型的 `description`。 \ No newline at end of file diff --git a/docs/staging-regression-checklist.md b/docs/staging-regression-checklist.md index c7918dc..9378576 100644 --- a/docs/staging-regression-checklist.md +++ b/docs/staging-regression-checklist.md @@ -23,7 +23,6 @@ - [ ] 客户端对 `prompt_text` **TTS 播报**,采集医生回答为 **WAV** - [ ] `POST .../pending-confirmation/{confirmation_id}/resolve`,`multipart` 字段名 `audio` - [ ] 确认后明细中出现 `source=voice`;否认不增加明细 - - [ ] (可选)`GET /internal/surgeries/{id}/voice-status` 查看队列与最近 ASR 摘要 4. **结束手术** `POST /client/surgeries/end` - [ ] 返回 `200`,摄像头任务停止 diff --git a/docs/客户端手术通信接口说明.md b/docs/客户端手术通信接口说明.md index 5dad5ae..660861b 100644 --- a/docs/客户端手术通信接口说明.md +++ b/docs/客户端手术通信接口说明.md @@ -7,11 +7,11 @@ ## 1. 概述 -| 能力 | 说明 | -| ---- | ---------------------------------------------------- | -| 开始手术 | 请求开始手术;服务端启动摄像头录制,**仅在确认开录完成后**返回 HTTP 200。 | -| 结束手术 | 请求结束手术;服务端停止摄像头录制,**仅在确认停录完成后**返回 HTTP 200。 | -| 查询结果 | 根据手术 6 位号查询消耗明细与汇总;**仅在已开录且至少已有一条消耗明细后**返回 HTTP 200。 | +| 能力 | 说明 | +| ----- | ------------------------------------------------------ | +| 开始手术 | 请求开始手术;服务端启动摄像头录制,**仅在确认开录完成后**返回 HTTP 200。 | +| 结束手术 | 请求结束手术;服务端停止摄像头录制,**仅在确认停录完成后**返回 HTTP 200。 | +| 查询结果 | 根据手术 6 位号查询消耗明细与汇总;**仅在已开录且至少已有一条消耗明细后**返回 HTTP 200。 | | 待确认耗材 | 低置信度时服务端排队一条待确认任务;客户端拉取话术(TTS)并在医生确认后回传,**不阻塞**后续视频推理。 | @@ -42,13 +42,13 @@ ## 3. 接口列表 -| 序号 | 方法 | 路径 | 说明 | -| --- | ------ | --------------------------------------- | ------ | -| 1 | `POST` | `/client/surgeries/start` | 开始手术 | -| 2 | `POST` | `/client/surgeries/end` | 结束手术 | -| 3 | `GET` | `/client/surgeries/{surgery_id}/result` | 查询手术结果 | -| 4 | `GET` | `/client/surgeries/{surgery_id}/pending-confirmation` | 拉取一条待确认耗材 | -| 5 | `POST` | `/client/surgeries/{surgery_id}/pending-confirmation/{confirmation_id}/resolve` | 提交医生确认结果 | +| 序号 | 方法 | 路径 | 说明 | +| --- | ------ | ------------------------------------------------------------------------------- | --------- | +| 1 | `POST` | `/client/surgeries/start` | 开始手术 | +| 2 | `POST` | `/client/surgeries/end` | 结束手术 | +| 3 | `GET` | `/client/surgeries/{surgery_id}/result` | 查询手术结果 | +| 4 | `GET` | `/client/surgeries/{surgery_id}/pending-confirmation` | 拉取一条待确认耗材 | +| 5 | `POST` | `/client/surgeries/{surgery_id}/pending-confirmation/{confirmation_id}/resolve` | 提交医生确认结果 | --- @@ -71,15 +71,15 @@ **请求体字段:** -| 字段名 | 类型 | 必填 | 说明 | -| ----------------------- | ---------- | --- | ---------------------- | -| `surgery_id` | `string` | 是 | 手术 6 位号,必须为 6 位数字。 | -| `camera_ids` | `string[]` | 是 | 摄像头 ID 列表,至少 1 个元素;须与服务端配置的 RTSP 映射键一致(示例见 `app/resources/camera_rtsp_urls.sample.json`)。 | -| `candidate_consumables` | `string[]` | 否 | 本台手术允许记账的耗材名称清单。**为空或未传则不会写入任何消耗**(仅拉流推理);非空时自动记账与待确认仅针对清单内名称。 | +| 字段名 | 类型 | 必填 | 说明 | +| ----------------------- | ---------- | --- | ----------------------------------------------------------------------------------------- | +| `surgery_id` | `string` | 是 | 手术 6 位号,必须为 6 位数字。 | +| `camera_ids` | `string[]` | 是 | 摄像头 ID 列表,至少 1 个元素;须与服务端配置的 RTSP 映射键一致(示例见 `app/resources/camera_rtsp_urls.sample.json`)。 | +| `candidate_consumables` | `string[]` | 否 | 本台手术允许记账的耗材名称清单。**为空或未传则不会写入任何消耗**(仅拉流推理);非空时自动记账与待确认仅针对清单内名称。 | + **说明:** 若该 `surgery_id` 在服务端仍存在**尚未写入数据库**的上一台手术内存归档,开始新会话前会先尝试落库;落库失败则返回 **503**(`RECORDING_CANNOT_START`),避免静默丢失数据。 - **请求示例:** ```json @@ -223,7 +223,7 @@ Host: <主机>:<端口> | `quantity` | `integer` | 是 | 本条记录对应的消耗数量(非负整数)。 | | `doctor_id` | `string` | 是 | 医生 ID。 | | `timestamp` | `string` | 是 | 记录时间,**ISO 8601**(JSON 中为 ISO 格式字符串,与 OpenAPI 中 `date-time` 一致)。 | -| `source` | `string` | 否 | `vision` 自动识别;`voice` 医生通过待确认接口确认。 | +| `source` | `string` | 否 | `vision` 自动识别;`voice` 医生通过待确认接口确认。 | `**summary[]` 中每一项(汇总行):** @@ -285,10 +285,12 @@ Host: <主机>:<端口> **失败(HTTP 404):** 无待确认项或手术未在进行。`detail.code` 示例:`NO_PENDING_CONFIRMATION`。 -| 项目 | 内容 | -| --- | --- | -| 方法 | `GET` | -| 路径 | `/client/surgeries/{surgery_id}/pending-confirmation` | + +| 项目 | 内容 | +| --- | ----------------------------------------------------- | +| 方法 | `GET` | +| 路径 | `/client/surgeries/{surgery_id}/pending-confirmation` | + **响应字段(节选):** `confirmation_id`、`prompt_text`、`options[]`(`label` + `confidence`)、`model_top1_label`、`model_top1_confidence`、`created_at`。 @@ -298,17 +300,21 @@ Host: <主机>:<端口> **用途:** 客户端采集医生回答的 **WAV 音频**并上传;服务端将音频存入 MinIO、调用百度 ASR 识别、解析 4.4 返回的候选项;**确认**则记一条 `source=voice` 的消耗明细,**否认**则关闭该待确认项且不记账。 -| 项目 | 内容 | -| --- | --- | -| 方法 | `POST` | -| 路径 | `/client/surgeries/{surgery_id}/pending-confirmation/{confirmation_id}/resolve` | -| `Content-Type` | `multipart/form-data` | + +| 项目 | 内容 | +| -------------- | ------------------------------------------------------------------------------- | +| 方法 | `POST` | +| 路径 | `/client/surgeries/{surgery_id}/pending-confirmation/{confirmation_id}/resolve` | +| `Content-Type` | `multipart/form-data` | + **请求体(multipart):** -| 字段 | 类型 | 必填 | 说明 | -| --- | --- | --- | --- | -| `audio` | 文件 | 是 | 医生语音 **`.wav`**;建议 16 kHz 单声道 PCM,其他格式服务端可尝试用 ffmpeg 转码。 | + +| 字段 | 类型 | 必填 | 说明 | +| ------- | --- | --- | -------------------------------------------------------- | +| `audio` | 文件 | 是 | 医生语音 `**.wav`**;建议 16 kHz 单声道 PCM,其他格式服务端可尝试用 ffmpeg 转码。 | + **成功响应(HTTP 200):** `SurgeryPendingConfirmationResolveResponse`:`resolved_label`、`rejected`、`asr_text`、`audio_object_key` 等(与 OpenAPI 一致)。 @@ -367,7 +373,7 @@ Host: <主机>:<端口> | 版本 | 日期 | 说明 | | --- | ---------- | ---------------------------------------------------------------- | -| 1.6 | 2026-04-21 | 待确认耗材接口;候选清单硬约束;查询结果需至少一条明细;客户端侧人工确认。 | +| 1.6 | 2026-04-21 | 待确认耗材接口;候选清单硬约束;查询结果需至少一条明细;客户端侧人工确认。 | | 1.5 | 2026-04-21 | 开始/结束手术:录制流水线失败时重试,仍失败再 503;可配置 `SURGERY_RECORDING_`*。 | | 1.4 | 2026-04-21 | 与 OpenAPI 对齐:开始/结束/查询的 200/503 条件及 `SurgeryClientErrorResponse`。 | | 1.3 | 2026-04-21 | 结束手术:仅在实际停录确认后返回 HTTP 200;否则 503。 | diff --git a/scripts/baidu_face_1n_search.py b/scripts/baidu_face_1n_search.py new file mode 100644 index 0000000..ad31c8d --- /dev/null +++ b/scripts/baidu_face_1n_search.py @@ -0,0 +1,291 @@ +#!/usr/bin/env python3 +"""百度智能云人脸 1:N 搜索(独立脚本,不接入本仓库 FastAPI)。 + +对应官方文档:人脸 1:N 搜索 — https://cloud.baidu.com/doc/FACE/s/Gk37c1uzc +接口:POST https://aip.baidubce.com/rest/2.0/face/v3/search + +前置条件(本脚本不负责「建库 / 注册人脸」): + - 在控制台创建应用并开通「人脸识别」相关接口权限; + - 已使用人脸库管理 API 或控制台建立用户组,并向库中注册用户与人脸照片; + - 否则搜索会失败(例如未找到匹配用户、人脸库为空等)。人脸库管理说明见产品文档「人脸库管理」章节。 + +配置从环境变量读取;启动时会从**仓库根目录**下的 `.env` 与**当前工作目录**下的 `.env` 加载(需已安装 `python-dotenv`,随 pydantic-settings 提供)。 + +主要环境变量(详见仓库 `.env.example` 中 Baidu Face 节): + BAIDU_FACE_APP_ID、BAIDU_FACE_API_KEY、BAIDU_FACE_SECRET_KEY(必填) + BAIDU_FACE_GROUP_ID_LIST(与命令行 --groups 二选一;格式以百度人脸库文档为准,非法值由接口返回错误码) + +用法示例(输入为**文件夹**,遍历其下所有支持的图片并打印识别日志): + + uv run python scripts/baidu_face_1n_search.py /path/to/photos + +支持格式:PNG、JPG、JPEG、BMP(单张 base64 建议 <2M,分辨率 <1920x1080,以官方文档为准)。 +""" + +from __future__ import annotations + +import argparse +import base64 +import json +import os +import sys +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +from aip import AipFace +from dotenv import load_dotenv + +_PROJECT_ROOT = Path(__file__).resolve().parent.parent +_IMAGE_SUFFIXES = {".png", ".jpg", ".jpeg", ".bmp"} + + +def _validate_group_id_list(s: str) -> None: + """仅校验列表非空与数量上限;group_id 字符集等由百度接口校验。""" + parts = [p.strip() for p in s.split(",") if p.strip()] + if not parts: + print("错误:group_id_list 解析后为空。", file=sys.stderr) + sys.exit(2) + if len(parts) > 10: + print("错误:group_id 最多 10 个(逗号分隔)。", file=sys.stderr) + sys.exit(2) + + +def _load_dotenv_files() -> None: + load_dotenv(_PROJECT_ROOT / ".env") + load_dotenv() + + +def _env(name: str) -> str: + return (os.environ.get(name) or "").strip() + + +def _env_int(name: str, default: int) -> int: + v = _env(name) + if v.isdigit() or (v.startswith("-") and v[1:].isdigit()): + return int(v) + return default + + +def _face_client() -> AipFace: + app_id = _env("BAIDU_FACE_APP_ID") + api_key = _env("BAIDU_FACE_API_KEY") + secret = _env("BAIDU_FACE_SECRET_KEY") + if not app_id or not api_key or not secret: + print( + "错误:未配置百度人脸凭据。\n" + "请在 `.env` 或环境中设置:BAIDU_FACE_APP_ID、BAIDU_FACE_API_KEY、" + "BAIDU_FACE_SECRET_KEY\n" + "(参考仓库 `.env.example` 中 Baidu Face 节;与语音 BAIDU_SPEECH_* 可为不同应用。)", + file=sys.stderr, + ) + sys.exit(2) + client = AipFace(app_id, api_key, secret) + conn_ms = _env("BAIDU_FACE_CONNECTION_TIMEOUT_MS") + sock_ms = _env("BAIDU_FACE_SOCKET_TIMEOUT_MS") + if conn_ms.isdigit(): + client.setConnectionTimeoutInMillis(int(conn_ms)) + if sock_ms.isdigit(): + client.setSocketTimeoutInMillis(int(sock_ms)) + return client + + +def _read_image_base64(path: Path) -> str: + if not path.is_file(): + raise FileNotFoundError(str(path)) + raw = path.read_bytes() + if not raw: + raise ValueError("empty file") + return base64.b64encode(raw).decode("ascii") + + +def _ts() -> str: + return datetime.now(timezone.utc).astimezone().isoformat(timespec="seconds") + + +def _list_image_files(folder: Path, *, recursive: bool) -> list[Path]: + if not folder.is_dir(): + print(f"错误:不是有效文件夹:{folder}", file=sys.stderr) + sys.exit(2) + if recursive: + out: list[Path] = [] + for p in folder.rglob("*"): + if p.is_file() and p.suffix.lower() in _IMAGE_SUFFIXES: + out.append(p) + else: + out = [ + p + for p in folder.iterdir() + if p.is_file() and p.suffix.lower() in _IMAGE_SUFFIXES + ] + return sorted(out, key=lambda p: p.name.lower()) + + +def _search_options_from_env_and_args(args: argparse.Namespace) -> dict[str, Any]: + qc = _env("BAIDU_FACE_QUALITY_CONTROL") or "NONE" + lc = _env("BAIDU_FACE_LIVENESS_CONTROL") or "NONE" + if args.quality_control is not None: + qc = args.quality_control + if args.liveness_control is not None: + lc = args.liveness_control + max_n = _env_int("BAIDU_FACE_MAX_USER_NUM", 1) if args.max_user_num is None else args.max_user_num + match_th = _env_int("BAIDU_FACE_MATCH_THRESHOLD", 80) if args.match_threshold is None else args.match_threshold + return { + "max_user_num": max(1, min(50, max_n)), + "match_threshold": max(0, min(100, match_th)), + "quality_control": qc, + "liveness_control": lc, + } + + +def _resolve_group_id_list(args: argparse.Namespace) -> str: + g = (args.group_id_list or "").strip() or _env("BAIDU_FACE_GROUP_ID_LIST") + if not g: + print( + "错误:未指定人脸组。\n" + "请设置环境变量 BAIDU_FACE_GROUP_ID_LIST,或传入命令行:--groups a,b", + file=sys.stderr, + ) + sys.exit(2) + return g + + +def _parse_args() -> argparse.Namespace: + p = argparse.ArgumentParser( + description="百度人脸 1:N 搜索:在指定人脸库组中,对文件夹内每张照片做相似度检索并打印识别日志。" + ) + p.add_argument( + "folder", + type=Path, + help="含照片的文件夹路径(仅处理 PNG/JPG/JPEG/BMP)", + ) + p.add_argument( + "--groups", + dest="group_id_list", + default=None, + help="人脸组 id,逗号分隔,最多 10 个;未传时使用环境变量 BAIDU_FACE_GROUP_ID_LIST", + ) + p.add_argument( + "--max-user-num", + type=int, + default=None, + help="覆盖环境变量 BAIDU_FACE_MAX_USER_NUM;返回前 N 个最相似用户(1–50)", + ) + p.add_argument( + "--match-threshold", + type=int, + default=None, + help="覆盖环境变量 BAIDU_FACE_MATCH_THRESHOLD;0–100,默认 80", + ) + p.add_argument( + "--quality-control", + choices=("NONE", "LOW", "NORMAL", "HIGH"), + default=None, + help="覆盖环境变量 BAIDU_FACE_QUALITY_CONTROL;默认 NONE", + ) + p.add_argument( + "--liveness-control", + choices=("NONE", "LOW", "NORMAL", "HIGH"), + default=None, + help="覆盖环境变量 BAIDU_FACE_LIVENESS_CONTROL;默认 NONE", + ) + p.add_argument( + "--recursive", + action="store_true", + help="递归包含子目录中的图片", + ) + p.add_argument( + "--json", + action="store_true", + help="每张照片输出一行 JSON(file + API 原样响应),便于脚本解析", + ) + return p.parse_args() + + +def main() -> None: + _load_dotenv_files() + args = _parse_args() + folder = args.folder.resolve() + group_id_list = _resolve_group_id_list(args) + _validate_group_id_list(group_id_list) + options = _search_options_from_env_and_args(args) + + _group_log = f"[{_ts()}] 使用 group_id_list={group_id_list!r}" + if args.json: + print(_group_log, file=sys.stderr) + else: + print(_group_log) + + files = _list_image_files(folder, recursive=args.recursive) + if not files: + print( + f"[{_ts()}] 文件夹内未找到支持格式的图片:{folder}({', '.join(sorted(_IMAGE_SUFFIXES))};可加 --recursive)", + file=sys.stderr, + ) + sys.exit(2) + + client = _face_client() + n = len(files) + any_error = False + + for i, path in enumerate(files, start=1): + rel = path.name + try: + b64 = _read_image_base64(path) + except (OSError, ValueError) as e: + any_error = True + print( + f"[{_ts()}] [{i}/{n}] 文件 {rel!r} 读取失败:{e}", + file=sys.stderr, + ) + continue + + resp = client.search(b64, "BASE64", group_id_list, options) + + if args.json: + line = { + "file": str(path), + "relpath": rel, + "index": i, + "total": n, + "response": resp, + } + print(json.dumps(line, ensure_ascii=False)) + if resp.get("error_code", -1) != 0: + any_error = True + continue + + err = resp.get("error_code") + if err != 0: + any_error = True + msg = resp.get("error_msg", "") + print( + f"[{_ts()}] [{i}/{n}] {rel!r} 识别失败 error_code={err} error_msg={msg!r}" + ) + continue + + result = resp.get("result") or {} + users = result.get("user_list") or [] + if not users: + print( + f"[{_ts()}] [{i}/{n}] {rel!r} 无匹配用户 user_list 为空(可检查人脸库或调低匹配阈值)" + ) + continue + + for r, u in enumerate(users, start=1): + gid = u.get("group_id", "") + uid = u.get("user_id", "") + info = u.get("user_info", "") + score = u.get("score", "") + tag = f" [{r}]" if len(users) > 1 else "" + print( + f"[{_ts()}] [{i}/{n}] {rel!r} 识别成功{tag} group_id={gid!r} " + f"user_id={uid!r} user_info={info!r} score={score}" + ) + + if any_error: + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/scripts/demo_client/index.html b/scripts/demo_client/index.html index 2e3d9b2..13e3650 100644 --- a/scripts/demo_client/index.html +++ b/scripts/demo_client/index.html @@ -698,9 +698,8 @@ { key: "timestamp", label: "time" }, { key: "item_id", label: "item_id" }, { key: "item_name", label: "item_name" }, - { key: "quantity", label: "qty" }, + { key: "qty", label: "qty" }, { key: "doctor_id", label: "doctor" }, - { key: "source", label: "source" }, ]); renderTable("汇总 summary[]", summary, [ { key: "item_id", label: "item_id" }, @@ -710,10 +709,50 @@ }; // ============================================================ - // §4.4 pending-confirmation + 可选 TTS + // §4.4 pending-confirmation(响应内带 Base64 MP3)+ 可选自动播报 // ============================================================ let pollTimer = null; - let lastTtsConfirmationId = null; + /** 仅在一次成功播出音频/TTS 后更新,避免未播成功却跳过 */ + let lastSpokenConfirmationId = null; + let lastPendingPayload = null; + + /** 方案1:首次用户手势内播放极短静音,解锁自动播放;之后待确认 MP3 复用同一 Audio */ + const SILENT_UNLOCK_DATA_URL = + "data:audio/wav;base64,UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAE="; + let sharedPromptAudio = null; + let audioPlaybackUnlocked = false; + let lastPromptBlobUrl = null; + + function getSharedPromptAudio() { + if (!sharedPromptAudio) { + sharedPromptAudio = new Audio(); + sharedPromptAudio.preload = "auto"; + sharedPromptAudio.volume = 1; + } + return sharedPromptAudio; + } + + document.addEventListener( + "pointerdown", + async () => { + if (audioPlaybackUnlocked) return; + try { + const a = getSharedPromptAudio(); + if (lastPromptBlobUrl) { + URL.revokeObjectURL(lastPromptBlobUrl); + lastPromptBlobUrl = null; + } + a.src = SILENT_UNLOCK_DATA_URL; + await a.play(); + a.pause(); + a.currentTime = 0; + audioPlaybackUnlocked = true; + } catch (e) { + console.warn("[demo-client] 音频自动播放未解锁(可点「播放话术」)", e); + } + }, + { once: true, capture: true, passive: true }, + ); function pickZhTtsVoice() { if (!window.speechSynthesis) return null; @@ -747,40 +786,63 @@ }); } - /** 优先 GET /prompt-audio 播放百度 MP3,失败时 speechSynthesis */ - async function playPromptTts(surgeryId, confirmationId, textFallback) { - const path = `/client/surgeries/${surgeryId}/pending-confirmation/${encodeURIComponent(confirmationId)}/prompt-audio`; - const u = baseUrl() + path; - try { - const res = await fetch(u); - if (res.ok) { - const blob = await res.blob(); + /** 解码 GET pending 的 prompt_audio_mp3_base64;优先用解锁后的单例 Audio;失败则回退 speechSynthesis */ + async function playPromptAudioBase64(b64, textFallback) { + const t = (textFallback || "").trim(); + const raw = typeof b64 === "string" ? b64.replace(/\s+/g, "") : ""; + if (raw) { + try { + const bin = atob(raw); + const bytes = new Uint8Array(bin.length); + for (let i = 0; i < bin.length; i++) bytes[i] = bin.charCodeAt(i); + const blob = new Blob([bytes], { type: "audio/mpeg" }); const o = URL.createObjectURL(blob); - return new Promise((resolve, reject) => { - const a = new Audio(); - a.preload = "auto"; - a.src = o; - a.onended = () => { + const a = getSharedPromptAudio(); + if (lastPromptBlobUrl) { + URL.revokeObjectURL(lastPromptBlobUrl); + lastPromptBlobUrl = null; + } + lastPromptBlobUrl = o; + a.pause(); + a.currentTime = 0; + a.src = o; + try { + await new Promise((resolve, reject) => { + const cleanupBlob = () => { + if (lastPromptBlobUrl === o) { + URL.revokeObjectURL(o); + lastPromptBlobUrl = null; + } + }; + a.onended = () => { + cleanupBlob(); + resolve(); + }; + a.onerror = () => { + cleanupBlob(); + reject(new Error("Audio 元素解码/播放失败")); + }; + const p = a.play(); + if (p && typeof p.catch === "function") { + p.catch((err) => { + cleanupBlob(); + reject(err); + }); + } + }); + return; + } catch (playErr) { + if (lastPromptBlobUrl === o) { URL.revokeObjectURL(o); - resolve(); - }; - a.onerror = () => { - URL.revokeObjectURL(o); - reject(new Error("Audio 元素播放失败")); - }; - const p = a.play(); - if (p && typeof p.catch === "function") { - p.catch((err) => { - URL.revokeObjectURL(o); - reject(err); - }); + lastPromptBlobUrl = null; } - }); + console.warn("[demo-client] MP3 play() 被拒或失败,尝试浏览器朗读", playErr); + } + } catch (e) { + console.warn("[demo-client] Base64 MP3 解码失败,尝试浏览器朗读", e); } - } catch (e) { - console.warn("[demo-client] prompt-audio 不可用,回退浏览器 TTS", e); } - return speakTextPromise((textFallback || "").trim()); + if (t) await speakTextPromise(t); } if (window.speechSynthesis) { @@ -788,9 +850,22 @@ } $("surgery-id").addEventListener("input", () => { - lastTtsConfirmationId = null; + lastSpokenConfirmationId = null; + lastPendingPayload = null; }); + async function playLastPendingManually() { + const p = lastPendingPayload; + if (!p || !p.confirmation_id) return; + const pt = (p.prompt_text || "").trim(); + try { + await playPromptAudioBase64(p.prompt_audio_mp3_base64, pt); + lastSpokenConfirmationId = p.confirmation_id; + } catch (e) { + console.warn("[demo-client] 手动播放失败", e); + } + } + async function fetchPendingOnce() { const sid = surgeryId(); if (!/^\d{6}$/.test(sid)) return; @@ -818,6 +893,7 @@ const box = $("pending-render"); if (res.status === 200 && body && body.confirmation_id) { box.hidden = false; + lastPendingPayload = body; $("confirmation-id").value = body.confirmation_id; const opts = (body.options || []) .map(o => `
${o.label}${(o.confidence * 100).toFixed(1)}%
`) @@ -826,14 +902,27 @@
confirmation_id: ${body.confirmation_id}
prompt_text: ${body.prompt_text || ""}
Top1: ${body.model_top1_label} (${(body.model_top1_confidence * 100).toFixed(1)}%)
-
options:${opts || '
(无)
'}
`; +
options:${opts || '
(无)
'}
+
+ + 首次在页面任意处点按可解锁自动播报;仍失败时点此处 +
`; + const btnPlay = $("btn-play-pending"); + if (btnPlay) btnPlay.onclick = () => void playLastPendingManually(); const pt = (body.prompt_text || "").trim(); const ttsOn = $("tts-pending") && $("tts-pending").checked; - if (ttsOn && pt && body.confirmation_id !== lastTtsConfirmationId) { - lastTtsConfirmationId = body.confirmation_id; - void playPromptTts(sid, body.confirmation_id, pt).catch((e) => console.warn(e)); + if (ttsOn && pt && body.confirmation_id !== lastSpokenConfirmationId) { + void (async () => { + try { + await playPromptAudioBase64(body.prompt_audio_mp3_base64, pt); + lastSpokenConfirmationId = body.confirmation_id; + } catch (e) { + console.warn("[demo-client] 自动播报未完成(可点「播放话术」)", e); + } + })(); } } else if (res.status === 404) { + lastPendingPayload = null; box.hidden = false; box.innerHTML = '暂无待确认项。'; } else { @@ -1020,7 +1109,7 @@ $("btn-resolve").disabled = true; $("audio-preview").hidden = true; $("btn-download").style.display = "none"; - lastTtsConfirmationId = null; + lastSpokenConfirmationId = null; $("rec-info").textContent = "已提交,正在拉取下一条待确认…"; $("rec-info").className = "ok small"; await fetchPendingOnce(); diff --git a/tests/faces/图片_20260423151100_350_42.png b/tests/faces/图片_20260423151100_350_42.png new file mode 100644 index 0000000..0cf5128 Binary files /dev/null and b/tests/faces/图片_20260423151100_350_42.png differ diff --git a/tests/test_api_contract.py b/tests/test_api_contract.py index f1ef8c3..00388b8 100644 --- a/tests/test_api_contract.py +++ b/tests/test_api_contract.py @@ -115,10 +115,9 @@ def test_get_result_200(api_app: FastAPI) -> None: SurgeryConsumptionDetail( item_id="纱布", item_name="纱布", - quantity=1, + qty=1, doctor_id="vision", timestamp=ts, - source="vision", ), ] ) @@ -129,6 +128,13 @@ def test_get_result_200(api_app: FastAPI) -> None: body = r.json() assert body["surgery_id"] == "123456" assert len(body["details"]) == 1 + assert list(body["details"][0].keys()) == [ + "item_id", + "item_name", + "qty", + "doctor_id", + "timestamp", + ] assert body["summary"][0]["total_quantity"] == 1 @@ -148,21 +154,24 @@ def test_pending_confirmation_200_and_404(api_app: FastAPI) -> None: surgery_id="123456", confirmation_id="cid", prompt_text="请确认", + prompt_audio_mp3_base64="//uQ", options=[], model_top1_label="x", model_top1_confidence=0.4, created_at=ts, ) pipeline_ok = MagicMock() - pipeline_ok.get_pending_confirmation_for_client = MagicMock(return_value=payload) + pipeline_ok.get_pending_confirmation_for_client = AsyncMock(return_value=payload) api_app.dependency_overrides[get_surgery_pipeline] = lambda: pipeline_ok client = TestClient(api_app) r = client.get("/client/surgeries/123456/pending-confirmation") assert r.status_code == 200 - assert r.json()["confirmation_id"] == "cid" + body_ok = r.json() + assert body_ok["confirmation_id"] == "cid" + assert body_ok["prompt_audio_mp3_base64"] == "//uQ" pipeline_none = MagicMock() - pipeline_none.get_pending_confirmation_for_client = MagicMock(return_value=None) + pipeline_none.get_pending_confirmation_for_client = AsyncMock(return_value=None) api_app.dependency_overrides[get_surgery_pipeline] = lambda: pipeline_none client2 = TestClient(api_app) r2 = client2.get("/client/surgeries/123456/pending-confirmation") @@ -193,60 +202,6 @@ def test_resolve_non_wav_422(api_app: FastAPI) -> None: assert r.status_code == 422 -def test_prompt_audio_200(api_app: FastAPI) -> None: - pipeline = MagicMock() - pipeline.get_pending_prompt_audio_mp3 = AsyncMock(return_value=b"\xff\xfb\x90") - api_app.dependency_overrides[get_surgery_pipeline] = lambda: pipeline - client = TestClient(api_app) - r = client.get("/client/surgeries/123456/pending-confirmation/cid1/prompt-audio") - assert r.status_code == 200 - assert r.content == b"\xff\xfb\x90" - assert "mpeg" in (r.headers.get("content-type") or "") - pipeline.get_pending_prompt_audio_mp3.assert_awaited_once_with( - surgery_id="123456", - confirmation_id="cid1", - ) - - -def test_resolve_text_200(api_app: FastAPI) -> None: - pipeline = MagicMock() - pipeline.resolve_pending_confirmation_from_client_text = AsyncMock( - return_value=VoiceResolveResult( - resolved_label="纱布", - rejected=False, - asr_text="第一个", - audio_object_key=None, - message="ok", - ) - ) - api_app.dependency_overrides[get_surgery_pipeline] = lambda: pipeline - client = TestClient(api_app) - r = client.post( - "/client/surgeries/123456/pending-confirmation/cid/resolve-text", - json={"recognized_text": "第一个"}, - ) - assert r.status_code == 200 - body = r.json() - assert body["resolved_label"] == "纱布" - assert body["asr_text"] == "第一个" - pipeline.resolve_pending_confirmation_from_client_text.assert_awaited_once() - - -def test_resolve_text_maps_surgery_error(api_app: FastAPI) -> None: - pipeline = MagicMock() - pipeline.resolve_pending_confirmation_from_client_text = AsyncMock( - side_effect=SurgeryPipelineError("VOICE_PARSE_FAILED", "无法匹配") - ) - api_app.dependency_overrides[get_surgery_pipeline] = lambda: pipeline - client = TestClient(api_app) - r = client.post( - "/client/surgeries/123456/pending-confirmation/cid/resolve-text", - json={"recognized_text": "随便说说"}, - ) - assert r.status_code == 422 - assert r.json()["detail"]["code"] == "VOICE_PARSE_FAILED" - - def test_resolve_200(api_app: FastAPI) -> None: pipeline = MagicMock() pipeline.resolve_pending_confirmation_from_audio = AsyncMock( @@ -284,47 +239,3 @@ def test_resolve_maps_surgery_pipeline_error_to_http(api_app: FastAPI) -> None: ) assert r.status_code == 404 assert r.json()["detail"]["code"] == "CONFIRMATION_NOT_FOUND" - - -def test_internal_voice_status_404_and_200(api_app: FastAPI) -> None: - p_none = MagicMock() - p_none.voice_status = MagicMock(return_value=None) - api_app.dependency_overrides[get_surgery_pipeline] = lambda: p_none - client = TestClient(api_app) - r = client.get("/internal/surgeries/123456/voice-status") - assert r.status_code == 404 - - p_ok = MagicMock() - p_ok.voice_status = MagicMock( - return_value={ - "voice_enabled": True, - "pending_queue_approx": 2, - "last_prompt_snippet": "hi", - "last_asr_text": "纱布", - "last_error": None, - } - ) - api_app.dependency_overrides[get_surgery_pipeline] = lambda: p_ok - client2 = TestClient(api_app) - r2 = client2.get("/internal/surgeries/123456/voice-status") - assert r2.status_code == 200 - assert r2.json()["pending_queue_approx"] == 2 - - -def test_internal_voice_audits_200_empty(api_app: FastAPI) -> None: - pipeline = MagicMock() - pipeline.list_voice_audits = AsyncMock(return_value=([], 0)) - api_app.dependency_overrides[get_surgery_pipeline] = lambda: pipeline - client = TestClient(api_app) - r = client.get( - "/internal/surgeries/123456/voice-audits", - params={"limit": 1, "offset": 0}, - ) - assert r.status_code == 200 - j = r.json() - assert j["surgery_id"] == "123456" - assert j["total"] == 0 - assert j["limit"] == 1 - assert j["offset"] == 0 - assert j["items"] == [] - pipeline.list_voice_audits.assert_awaited_once_with("123456", limit=1, offset=0) diff --git a/tests/test_consumption_tsv_log.py b/tests/test_consumption_tsv_log.py index 8a8ce2d..25b0988 100644 --- a/tests/test_consumption_tsv_log.py +++ b/tests/test_consumption_tsv_log.py @@ -6,11 +6,14 @@ from app.config import settings from app.services.consumable_vision_algorithm import ClsTop3 from app.services.consumption_tsv_log import ( HEADER, + SUMMARY_HEADER, _RANGE_SEP, + append_consumption_log_summary, append_consumption_tsv_line, build_consumption_markdown, build_tsv_line, init_consumption_log_file, + resolve_consumption_item_id, short_camera_label, ) @@ -44,25 +47,27 @@ def test_build_tsv_line_matches_sample_shape(monkeypatch: pytest.MonkeyPatch) -> wall_end_epoch=w0 + 45.0, ) parts = line.rstrip("\n").split("\t") - assert len(parts) == 7 + assert len(parts) == 5 assert parts[0] == "2237844" - assert parts[1] == "一次性医用灭菌棉签 0.9997" - assert parts[2] == "cls2" - assert parts[3] == "cls3" - assert parts[4] == "1" - assert parts[5] == "DOCTOR_PLACEHOLDER" + assert parts[1] == "一次性医用灭菌棉签" + assert parts[2] == "1" + assert parts[3] == "DOCTOR_PLACEHOLDER" assert ( - parts[6] + parts[4] == "cam01@2024-01-01T00:00:00.000+00:00" + _RANGE_SEP + "2024-01-01T00:00:45.000+00:00" ) +def test_resolve_consumption_item_id_uses_normalized_catalog_key() -> None: + name_to_code = {"一次性使用手术单(一次性医用垫单)": "PID-900"} + assert resolve_consumption_item_id("一次性医用垫单", "", name_to_code) == "PID-900" + + def test_header_columns() -> None: cols = HEADER.strip().split("\t") - assert cols[0] == "物品id" - assert cols[-1] == "时间戳" + assert cols == ["item_id", "item_name", "qty", "doctor_id", "timestamp"] def test_per_surgery_file_init_and_append( @@ -84,6 +89,31 @@ def test_per_surgery_file_init_and_append( assert p.read_text(encoding="utf-8") == HEADER +def test_append_consumption_log_summary_appends_three_column_block( + tmp_path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(settings, "consumption_tsv_log_enabled", True) + monkeypatch.setattr( + settings, + "consumption_tsv_log_path", + str(tmp_path / "{surgery_id}.txt"), + ) + init_consumption_log_file("s1") + append_consumption_tsv_line("s1", "x\n") + append_consumption_log_summary( + "s1", + {"A": ("nA", 2), "B": ("nB", 1)}, + ) + text = (tmp_path / "s1.txt").read_text(encoding="utf-8") + assert text.endswith( + "\n" + + SUMMARY_HEADER + + "A\tnA\t2\n" + + "B\tnB\t1\n" + ) + + def test_build_consumption_markdown_top123_columns(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setattr(settings, "consumption_log_timezone", "UTC") best = ClsTop3( @@ -106,15 +136,9 @@ def test_build_consumption_markdown_top123_columns(monkeypatch: pytest.MonkeyPat wall_start_epoch=w0, wall_end_epoch=w0 + 45.0, ) - assert "Top1 物品id" in md and "Top1 物品名称" in md and "Top1 置信度" in md - assert "Top2 物品名称" in md and "Top3 物品名称" in md - assert "Top2 物品id" not in md + assert "| item_id |" in md and "| item_name |" in md and "| qty |" in md assert "2237844" in md assert "一次性医用灭菌棉签" in md - assert "0.9997" in md - assert "cls2" in md and "cls3" in md - assert "11765-1-101" not in md and "21504-1-1" not in md - assert "0.0003" not in md and "0.0002" not in md assert "DOCTOR_PLACEHOLDER" in md assert "| 1 |" in md # 终端为可读时间戳,非落盘用 ISO@cam diff --git a/tests/test_surgery_pipeline_persistence.py b/tests/test_surgery_pipeline_persistence.py index e6f6aa7..c70dc30 100644 --- a/tests/test_surgery_pipeline_persistence.py +++ b/tests/test_surgery_pipeline_persistence.py @@ -11,7 +11,7 @@ from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker from app.config import Settings from app.repositories.surgery_results import SurgeryResultRepository -from app.schemas import SurgeryConsumptionDetail +from app.schemas import SurgeryConsumptionStored from app.services.surgery_pipeline import SurgeryPipeline from app.services.video.session_manager import ( ArchivedSurgery, @@ -53,10 +53,10 @@ async def test_stop_surgery_persists_final_result( ts = datetime(2026, 4, 21, 12, 0, tzinfo=timezone.utc) st = SurgerySessionState(candidate_consumables=["纱布"]) st.details.append( - SurgeryConsumptionDetail( + SurgeryConsumptionStored( item_id="纱布", item_name="纱布", - quantity=1, + qty=1, doctor_id="vision", timestamp=ts, source="vision", @@ -107,10 +107,10 @@ async def test_stop_surgery_failed_persist_goes_to_archive_then_retry_persists( ts = datetime(2026, 4, 21, 12, 0, tzinfo=timezone.utc) st = SurgerySessionState(candidate_consumables=[]) st.details.append( - SurgeryConsumptionDetail( + SurgeryConsumptionStored( item_id="缝线", item_name="缝线", - quantity=1, + qty=1, doctor_id="vision", timestamp=ts, source="vision", @@ -161,10 +161,10 @@ async def test_pipeline_prefers_live_then_db_then_archive( ts = datetime(2026, 4, 21, 12, 0, tzinfo=timezone.utc) st = SurgerySessionState(candidate_consumables=["纱布"]) st.details.append( - SurgeryConsumptionDetail( + SurgeryConsumptionStored( item_id="纱布", item_name="纱布", - quantity=1, + qty=1, doctor_id="vision", timestamp=ts, source="vision", @@ -188,10 +188,10 @@ async def test_pipeline_prefers_live_then_db_then_archive( mgr._archive["333333"] = ArchivedSurgery( details=[ - SurgeryConsumptionDetail( + SurgeryConsumptionStored( item_id="归档项", item_name="归档项", - quantity=1, + qty=1, doctor_id="vision", timestamp=ts, source="vision", diff --git a/tests/test_surgery_repository.py b/tests/test_surgery_repository.py index 8c65952..f34dac1 100644 --- a/tests/test_surgery_repository.py +++ b/tests/test_surgery_repository.py @@ -3,12 +3,14 @@ from __future__ import annotations from datetime import datetime, timezone import pytest +from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine import app.db.models # noqa: F401 from app.db.base import Base +from app.db.models import SurgeryResultDetailRow from app.repositories.surgery_results import SurgeryResultRepository -from app.schemas import SurgeryConsumptionDetail +from app.schemas import SurgeryConsumptionStored @pytest.fixture @@ -38,18 +40,18 @@ async def test_save_roundtrip(db_session: AsyncSession) -> None: repo = SurgeryResultRepository() ts = datetime(2026, 4, 21, 10, 0, tzinfo=timezone.utc) details = [ - SurgeryConsumptionDetail( + SurgeryConsumptionStored( item_id="纱布", item_name="纱布", - quantity=1, + qty=1, doctor_id="D1", timestamp=ts, source="vision", ), - SurgeryConsumptionDetail( + SurgeryConsumptionStored( item_id="纱布", item_name="纱布", - quantity=1, + qty=1, doctor_id="voice", timestamp=ts, source="voice", @@ -61,8 +63,17 @@ async def test_save_roundtrip(db_session: AsyncSession) -> None: loaded = await repo.load_final_details(db_session, "654321") assert loaded is not None assert len(loaded) == 2 - assert loaded[0].source == "vision" - assert loaded[1].source == "voice" + assert loaded[0].qty == 1 and loaded[0].item_id == "纱布" + assert loaded[1].qty == 1 + async with db_session.begin(): + res = await db_session.execute( + select(SurgeryResultDetailRow) + .where(SurgeryResultDetailRow.surgery_id == "654321") + .order_by(SurgeryResultDetailRow.id) + ) + orm_rows = res.scalars().all() + assert orm_rows[0].source == "vision" + assert orm_rows[1].source == "voice" @pytest.mark.asyncio @@ -83,10 +94,10 @@ async def test_save_overwrites_previous_final_result(db_session: AsyncSession) - db_session, surgery_id="888888", details=[ - SurgeryConsumptionDetail( + SurgeryConsumptionStored( item_id="旧", item_name="旧", - quantity=1, + qty=1, doctor_id="D1", timestamp=ts1, source="vision", @@ -98,10 +109,10 @@ async def test_save_overwrites_previous_final_result(db_session: AsyncSession) - db_session, surgery_id="888888", details=[ - SurgeryConsumptionDetail( + SurgeryConsumptionStored( item_id="新", item_name="新", - quantity=2, + qty=2, doctor_id="D2", timestamp=ts2, source="voice", @@ -113,5 +124,4 @@ async def test_save_overwrites_previous_final_result(db_session: AsyncSession) - assert loaded is not None assert len(loaded) == 1 assert loaded[0].item_id == "新" - assert loaded[0].quantity == 2 - assert loaded[0].source == "voice" + assert loaded[0].qty == 2