diff --git a/.env.example b/.env.example index 578e730..ac52e4f 100644 --- a/.env.example +++ b/.env.example @@ -63,13 +63,16 @@ CONSUMABLE_CLASSIFIER_TOPK=5 # VIDEO_INFERENCE_INTERVAL_SEC=2 # VIDEO_INFERENCE_CONFIDENCE_THRESHOLD=0.35 # 置信度 >= 此值且命中候选清单时自动 vision 记账。提高到 0.9 可减少自动记账、更多走待确认。 -# VIDEO_AUTO_CONFIRM_CONFIDENCE=0.55 -# 置信度处于 [VIDEO_VOICE_CONFIRM_MIN_CONFIDENCE, VIDEO_AUTO_CONFIRM_CONFIDENCE) 时入队待确认(客户端拉取 pending-confirmation)。 +# 默认 0.9:Top1 置信度不足该值时入队待确认;达到且标签在 candidate_consumables 内则直接记 vision。 +# VIDEO_AUTO_CONFIRM_CONFIDENCE=0.9 +# 与 VIDEO_VOICE_CONFIRM_MIN_CONFIDENCE 共同决定何时自动 / 待确认(见 app/config 注释)。 # VIDEO_VOICE_CONFIRM_MIN_CONFIDENCE=0.35 # 待确认话术由服务端生成(prompt_text),TTS 一般在客户端播放;医生 WAV 上传后服务端 ASR 解析。 # 解析顺序:① pending 里展示的 topk(序号/名称);② 仍不匹配时,对「开始手术」请求体中的 candidate_consumables 全文做名称子串匹配——医生报清单内其它耗材也以医生为准入账。 # 是否启用低置信度人工确认(客户端播报 + resolve 回传;服务端无麦克风/扬声器要求)。 # VOICE_CONFIRMATION_ENABLED=true +# 同一条待确认在语音/文本「解析失败」时累计的允许失败轮次(默认 2:首败后再给 1 次重试提示;见 422 的 detail.retry_remaining)。 +# VOICE_CONFIRM_MAX_FAILED_PARSE_ROUNDS=2 # VIDEO_VOICE_CONFIRM_DOCTOR_ID=voice # (已弃用)服务端本机录音 / ffmpeg 音频输入;当前闭环不依赖。 # VOICE_RECORD_SECONDS=5 @@ -79,6 +82,21 @@ CONSUMABLE_CLASSIFIER_TOPK=5 # VIDEO_DETAIL_COOLDOWN_SEC=15 # VIDEO_JPEG_QUALITY=85 # VIDEO_RESULT_DOCTOR_ID=vision +# 每次单帧分类得到 top1~3 时打一条 INFO(联调开;生产建议 false) +# VIDEO_LOG_INFERENCE_RESULTS=true +# 时间窗级消耗文本日志(制表符列;每例手术 start 时截断+表头,窗内结果追加;终端 Rich 为可读时间戳,文件内为 ISO 时间戳列) +# CONSUMPTION_TSV_LOG_ENABLED=true +# 须含 {surgery_id},如 logs/consumption_{surgery_id}.txt +# CONSUMPTION_TSV_LOG_PATH=logs/consumption_{surgery_id}.txt +# 同一时间窗结果在终端以 Markdown 表格打印(Top1~3 分列 id / 名称 / 置信度) +# CONSUMPTION_LOG_MARKDOWN_TERMINAL=true +# 消耗日志时间戳列的时区(IANA,如 Asia/Shanghai);不设置则用运行环境的系统时区 +# CONSUMPTION_LOG_TIMEZONE=Asia/Shanghai +# +# 语音确认:stderr 中可 grep 的 `VoiceConfirm ...` 行 + 每例手术 TSV(与 `start_surgery` 同次截断写表头;成功/ASR/解析失败均追加一行) +# VOICE_FILE_LOG_ENABLED=true +# 须含 {surgery_id},如 logs/voice_{surgery_id}.txt +# VOICE_FILE_LOG_PATH=logs/voice_{surgery_id}.txt # --- Hikvision: mount vendor Linux x86_64 .so at runtime (do not commit proprietary binaries) --- # HIKVISION_LIB_DIR=/opt/hikvision/lib @@ -102,3 +120,28 @@ CONSUMABLE_CLASSIFIER_TOPK=5 # BAIDU_SPEECH_SECRET_KEY= # BAIDU_SPEECH_CONNECTION_TIMEOUT_MS= # BAIDU_SPEECH_SOCKET_TIMEOUT_MS= +# 短语音识别模型:固定普通话(默认 1537;勿用 1737 英语等)。代码会始终带上此 dev_pid。 +# BAIDU_SPEECH_ASR_DEV_PID=1537 + +# --- MinIO(语音 WAV 存桶;`docker-compose.dev.yml` 内已含 `minio` 服务;本机只跑 API 时填 127.0.0.1:9000)--- +# docker compose -f docker-compose.dev.yml up -d minio +# MINIO_ENDPOINT=127.0.0.1:9000 +# MINIO_ACCESS_KEY=minioadmin +# MINIO_SECRET_KEY=minioadmin +# MINIO_BUCKET=operation-room-voice +# MINIO_SECURE=false +# optional: MINIO_REGION= + +# --- Demo 浏览器客户端 / 一键联调假 RTSP(仅开发;生产关)--- +# demo_client/index.html 跨源访问本服务 / 一键开录 +# DEMO_CORS_ENABLED=true +# DEMO_CORS_ORIGINS=* +# 为 true 时提供 POST /internal/demo/orchestrate-and-start;需能执行 docker+ffmpeg 的**同一进程**内起 MediaMTX(通常=在宿主机直接跑 main.py,或容器挂载 /var/run/docker.sock) +# DEMO_ORCHESTRATOR_ENABLED=false +# VIDEO_RTSP_URLS_JSON_FILE 必须设成**可写**的 JSON 文件;Docker 中请 bind-mount 宿主机文件,与一键覆盖写入的映射一致 +# DEMO_ORCHESTRATOR_RTSP_PORT=18554 +# 手配假流、只改 JSON 给「另一进程」用时:可把 127.0.0.1 换成 host.docker.internal 等。 +# 一键联调 orchestrate-and-start 在本进程起流+拉流,固定写 127.0.0.1,不读此项。 +# DEMO_ORCHESTRATOR_RTSP_JSON_HOST=host.docker.internal +# 一键起 MediaMTX 后,等待本机 RTSP 端口可连接的最长时间(秒) +# MEDIAMTX_TCP_READY_SEC=30 diff --git a/.gitignore b/.gitignore index 01e1070..77ca04e 100644 --- a/.gitignore +++ b/.gitignore @@ -12,11 +12,17 @@ wheels/ # Local reference / scratch content refs/ +# Runtime consumption TSV (开发联调) +logs/ + # Environment .env .env.* !.env.example +# Demo 一键联调写入的 RTSP 映射(可被覆盖) +scripts/demo_client/.runtime/ + # IDE / OS .idea/ .vscode/ diff --git a/Dockerfile b/Dockerfile index 8dce626..9efc32c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,6 +2,7 @@ FROM python:3.13-slim-bookworm # OpenCV (pulled in by ultralytics) links against X11 client libs; slim images omit them. RUN apt-get update && apt-get install -y --no-install-recommends \ + docker.io \ ffmpeg \ libgl1 \ libglib2.0-0 \ diff --git a/app/api.py b/app/api.py index 28b6887..5ce7a64 100644 --- a/app/api.py +++ b/app/api.py @@ -2,7 +2,8 @@ import asyncio from collections.abc import Awaitable, Callable from typing import Annotated -from fastapi import APIRouter, Depends, File, HTTPException, Path, UploadFile, status +from fastapi import APIRouter, Depends, File, HTTPException, Path, Query, UploadFile, status +from fastapi.responses import Response from fastapi.responses import JSONResponse from loguru import logger from sqlalchemy.exc import SQLAlchemyError @@ -17,8 +18,11 @@ from app.schemas import ( SurgeryEndRequest, SurgeryPendingConfirmationResolveResponse, SurgeryPendingConfirmationResponse, + SurgeryPendingResolveTextRequest, SurgeryResultResponse, SurgeryStartRequest, + SurgeryVoiceAuditItem, + SurgeryVoiceAuditsListResponse, SurgeryVoiceStatusResponse, build_consumption_summary, ) @@ -28,14 +32,21 @@ from app.surgery_errors import SurgeryPipelineError router = APIRouter() +def _pipeline_error_detail(exc: SurgeryPipelineError, surgery_id: str) -> dict: + d: dict = { + "code": exc.code, + "message": exc.message, + "surgery_id": surgery_id, + } + if exc.extra: + d.update(exc.extra) + return d + + def _raise_surgery_pipeline_http(exc: SurgeryPipelineError, surgery_id: str) -> None: raise HTTPException( status_code=status.HTTP_503_SERVICE_UNAVAILABLE, - detail={ - "code": exc.code, - "message": exc.message, - "surgery_id": surgery_id, - }, + detail=_pipeline_error_detail(exc, surgery_id), ) from exc @@ -47,6 +58,9 @@ def _raise_confirmation_http(exc: SurgeryPipelineError, surgery_id: str) -> None "CONFIRMATION_INVALID": status.HTTP_422_UNPROCESSABLE_CONTENT, "VOICE_ASR_FAILED": status.HTTP_422_UNPROCESSABLE_CONTENT, "VOICE_PARSE_FAILED": status.HTTP_422_UNPROCESSABLE_CONTENT, + "VOICE_TEXT_EMPTY": status.HTTP_422_UNPROCESSABLE_CONTENT, + "TTS_TEXT_EMPTY": status.HTTP_422_UNPROCESSABLE_CONTENT, + "TTS_ERROR": status.HTTP_503_SERVICE_UNAVAILABLE, "VOICE_AUDIO_INVALID": status.HTTP_422_UNPROCESSABLE_CONTENT, "MINIO_NOT_CONFIGURED": status.HTTP_503_SERVICE_UNAVAILABLE, "MINIO_UPLOAD_FAILED": status.HTTP_503_SERVICE_UNAVAILABLE, @@ -55,11 +69,7 @@ def _raise_confirmation_http(exc: SurgeryPipelineError, surgery_id: str) -> None st = status_map.get(exc.code, status.HTTP_500_INTERNAL_SERVER_ERROR) raise HTTPException( status_code=st, - detail={ - "code": exc.code, - "message": exc.message, - "surgery_id": surgery_id, - }, + detail=_pipeline_error_detail(exc, surgery_id), ) from exc @@ -110,6 +120,25 @@ async def health() -> HealthResponse | JSONResponse: return HealthResponse(status="ok", database="connected") +@router.get( + "/internal/demo/orchestrator-status", + tags=["demo"], + summary="一键联调接口是否可用", + description="供 demo 页探测:是否启用 orchestrator、RTSP 文件配置等;此路由始终存在,不依赖 DEMO_ORCHESTRATOR_ENABLED。", +) +async def demo_orchestrator_status() -> dict: + f = (settings.video_rtsp_urls_json_file or "").strip() + return { + "orchestrator_enabled": bool(settings.demo_orchestrator_enabled), + "orchestrate_method": "POST", + "orchestrate_path": "/internal/demo/orchestrate-and-start", + "video_rtsp_urls_json_file_set": bool(f), + "video_rtsp_urls_json_file": f or None, + "orchestrator_rtsp_port": settings.demo_orchestrator_rtsp_port, + "orchestrator_rtsp_json_host": settings.demo_orchestrator_rtsp_json_host, + } + + @router.post( "/client/surgeries/start", response_model=SurgeryApiResponse, @@ -380,6 +409,94 @@ async def resolve_pending_consumable_confirmation( ) +@router.post( + "/client/surgeries/{surgery_id}/pending-confirmation/{confirmation_id}/resolve-text", + response_model=SurgeryPendingConfirmationResolveResponse, + responses={ + status.HTTP_404_NOT_FOUND: {"model": SurgeryClientErrorResponse}, + status.HTTP_409_CONFLICT: {"model": SurgeryClientErrorResponse}, + status.HTTP_422_UNPROCESSABLE_CONTENT: {"model": SurgeryClientErrorResponse}, + }, + tags=["client"], + summary="提交客户端语音识别文本以确认耗材", + description=( + "由浏览器 Web Speech 等本机 STT 得到的文本,不做 MinIO/百度 ASR;" + "候选项解析与上传 WAV 接口一致。" + ), +) +async def resolve_pending_consumable_confirmation_text( + surgery_id: Annotated[ + str, + Path( + min_length=6, + max_length=6, + pattern=r"^\d{6}$", + description="手术 6 位号,仅允许 6 位数字。", + ), + ], + confirmation_id: Annotated[str, Path(min_length=1, max_length=128)], + body: SurgeryPendingResolveTextRequest, + pipeline: Annotated[SurgeryPipeline, Depends(get_surgery_pipeline)], +) -> SurgeryPendingConfirmationResolveResponse: + try: + result = await pipeline.resolve_pending_confirmation_from_client_text( + surgery_id=surgery_id, + confirmation_id=confirmation_id, + recognized_text=body.recognized_text, + ) + except SurgeryPipelineError as exc: + _raise_confirmation_http(exc, surgery_id) + return SurgeryPendingConfirmationResolveResponse( + surgery_id=surgery_id, + confirmation_id=confirmation_id, + status="accepted", + message=result.message, + resolved_label=result.resolved_label, + rejected=result.rejected, + asr_text=result.asr_text, + audio_object_key=result.audio_object_key, + ) + + +@router.get( + "/client/surgeries/{surgery_id}/pending-confirmation/{confirmation_id}/prompt-audio", + responses={ + status.HTTP_404_NOT_FOUND: {"model": SurgeryClientErrorResponse}, + status.HTTP_422_UNPROCESSABLE_CONTENT: {"model": SurgeryClientErrorResponse}, + status.HTTP_503_SERVICE_UNAVAILABLE: {"model": SurgeryClientErrorResponse}, + }, + tags=["client"], + summary="待确认话术的 TTS 音频(MP3)", + description="使用百度在线合成,与 prompt_text 一致;供浏览器 MediaElement 直放。未配置百度语音时返回 503。", + response_class=Response, +) +async def get_pending_prompt_audio_mpeg( + surgery_id: Annotated[ + str, + Path( + min_length=6, + max_length=6, + pattern=r"^\d{6}$", + description="手术 6 位号,仅允许 6 位数字。", + ), + ], + confirmation_id: Annotated[str, Path(min_length=1, max_length=128)], + pipeline: Annotated[SurgeryPipeline, Depends(get_surgery_pipeline)], +) -> Response: + try: + data = await pipeline.get_pending_prompt_audio_mp3( + surgery_id=surgery_id, + confirmation_id=confirmation_id, + ) + except SurgeryPipelineError as exc: + _raise_confirmation_http(exc, surgery_id) + return Response( + content=data, + media_type="audio/mpeg", + headers={"Cache-Control": "no-store"}, + ) + + @router.get( "/internal/surgeries/{surgery_id}/voice-status", response_model=SurgeryVoiceStatusResponse, @@ -417,3 +534,40 @@ async def get_surgery_voice_status( last_asr_text=payload.get("last_asr_text"), last_error=payload.get("last_error"), ) + + +@router.get( + "/internal/surgeries/{surgery_id}/voice-audits", + response_model=SurgeryVoiceAuditsListResponse, + tags=["internal"], + summary="语音确认审计记录(按手术号分页)", + description=( + "查询持久化表 `voice_confirmation_audits`:ASR 文本、解析结果、" + "候选项快照、MinIO 对象键、失败原因等。用于追溯、对账与报表;" + "不区分手术是否仍进行中,只要库里有记录即返回。" + ), +) +async def get_surgery_voice_audits( + surgery_id: Annotated[ + str, + Path( + min_length=6, + max_length=6, + pattern=r"^\d{6}$", + description="手术 6 位号,仅允许 6 位数字。", + ), + ], + pipeline: Annotated[SurgeryPipeline, Depends(get_surgery_pipeline)], + limit: Annotated[int, Query(ge=1, le=200, description="每页条数。")] = 50, + offset: Annotated[int, Query(ge=0, description="跳过前若干条,供分页。")] = 0, +) -> SurgeryVoiceAuditsListResponse: + rows, total = await pipeline.list_voice_audits( + surgery_id, limit=limit, offset=offset + ) + return SurgeryVoiceAuditsListResponse( + surgery_id=surgery_id, + total=total, + limit=limit, + offset=offset, + items=[SurgeryVoiceAuditItem.model_validate(r) for r in rows], + ) diff --git a/app/config.py b/app/config.py index cd1b5be..7f44d87 100644 --- a/app/config.py +++ b/app/config.py @@ -7,6 +7,9 @@ from pydantic import Field, field_validator from pydantic_settings import BaseSettings, SettingsConfigDict _PACKAGE_DIR = Path(__file__).resolve().parent +# 仓库根目录(含 .env)。用绝对路径读 .env,避免从子目录/IDE 启动时 cwd 不同导致联调项未生效。 +_REPO_ROOT = _PACKAGE_DIR.parent +_DEFAULT_ENV_FILE = _REPO_ROOT / ".env" def _default_consumable_classifier_weights() -> str: @@ -74,9 +77,9 @@ class Settings(BaseSettings): video_inference_confidence_threshold: float = Field( default=0.35, ge=0.0, le=1.0 ) - #: 达到或超过该置信度时,自动记一条耗材消耗(需通过候选清单校验)。 - video_auto_confirm_confidence: float = Field(default=0.55, ge=0.0, le=1.0) - #: 置信度处于 [本值, video_auto_confirm_confidence) 时尝试语音追问(需有可播报的 top 候选)。 + #: 达到或超过该置信度且 Top1 在候选内时,自动记一条 vision 消耗;低于该值时走待确认(不低于 video_voice_confirm_min 且可展示候选项时)。默认 0.9:不足 0.9 的需人工确认。 + video_auto_confirm_confidence: float = Field(default=0.9, ge=0.0, le=1.0) + #: 低于本值的帧不进入自动/待确认逻辑(与 `video_auto_confirm_confidence` 下沿之间的区间可入队待确认)。 video_voice_confirm_min_confidence: float = Field(default=0.35, ge=0.0, le=1.0) #: 是否启用低置信度时的人工确认(客户端拉取待确认项并回传结果;不依赖服务端麦克风/扬声器)。 voice_confirmation_enabled: bool = True @@ -96,6 +99,21 @@ class Settings(BaseSettings): video_jpeg_quality: int = Field(default=85, ge=40, le=100) #: 写入消耗明细时的 doctor_id(无外部医生 ID 来源时的占位)。 video_result_doctor_id: str = "vision" + #: 为 true 时,每次单帧分类得到 top1 等结果会打一条 INFO 日志(联调用;高流量时建议关)。 + video_log_inference_results: bool = False + #: 为 true 时,将时间窗级识别写入文本日志(`start_surgery` 时按手术截断/初始化,窗内结果追加;Top2/3 仅名称;数量恒 1)。 + consumption_tsv_log_enabled: bool = True + #: 路径模板,须含 `{surgery_id}`(每例手术独立文件)。不含占位时自动在扩展名前追加 `_`。 + consumption_tsv_log_path: str = "logs/consumption_{surgery_id}.txt" + #: 为 true 时,同一时间窗结果在终端以 Markdown 表格打印(Top1~3 分列 id / 名称 / 置信度)。 + consumption_log_markdown_terminal: bool = True + #: 消耗日志「时间戳」列的时区,IANA 名如 `Asia/Shanghai`;空串则使用「当前系统时区」。 + consumption_log_timezone: str = "" + + #: 为 true 时,语音确认(WAV/文本)的 ASR/解析结果写 TSV 文件,并在终端打 `VoiceConfirm` 行;`start_surgery` 时与消耗日志同寿命截断初始化。 + voice_file_log_enabled: bool = True + #: 路径模板,须含 `{surgery_id}`,与 `consumption_tsv_log_path` 规则相同。 + voice_file_log_path: str = "logs/voice_{surgery_id}.txt" #: 海康 SDK `.so` 所在目录(容器内可挂载 `/opt/hikvision/lib`)。 hikvision_lib_dir: str = "/opt/hikvision/lib" @@ -121,6 +139,8 @@ class Settings(BaseSettings): baidu_speech_connection_timeout_ms: int | None = None #: 传输数据超时(毫秒)。未设置则使用 SDK 默认。 baidu_speech_socket_timeout_ms: int | None = None + #: 百度短语音识别 `dev_pid`,**始终**用于 ASR(调用方传入的 options 不会覆盖)。1537=普通话通用(与百度控制台一致;勿用 1737 英语、1837 粤语等)。 + baidu_speech_asr_dev_pid: int = Field(default=1537, ge=1000, le=99999) # --- MinIO:语音确认原始 WAV 追溯存储 --- #: 为空则视为未配置 MinIO,语音确认接口将返回业务错误(联调需配置)。 @@ -134,6 +154,8 @@ class Settings(BaseSettings): minio_region: str = "" #: 上传医生语音 WAV 的最大字节数(默认 10MB)。 voice_upload_max_bytes: int = Field(default=10 * 1024 * 1024, ge=64, le=50 * 1024 * 1024) + #: 同一条待确认在 ASR/文本解析为选项或耗材名失败时,计数的最大失败轮数。默认 2 表示首败后再允许 1 次「显式重试」语义(API 的 retry_remaining 首轮为 1、再败为 0);不阻止后续继续上传直至成功或否认。 + voice_confirm_max_failed_parse_rounds: int = Field(default=2, ge=1, le=20) # --- Demo 客户端跨源(仅用于 scripts/demo_client 联调;生产置 false) --- #: 为 true 时挂载 CORSMiddleware,便于浏览器 demo 从另一个端口访问本服务。 @@ -141,6 +163,15 @@ class Settings(BaseSettings): #: 逗号分隔的允许来源;`*` 表示允许全部来源(demo/联调用,生产应显式指定)。 demo_cors_origins: str = "*" + # --- 一键联调:上传视频 → 起假 RTSP → 写 VIDEO_RTSP_URLS_JSON_FILE → 开始手术(仅开发;生产必须 false) --- + #: 为 true 时注册 `POST /internal/demo/orchestrate-and-start`。 + demo_orchestrator_enabled: bool = False + #: 假 RTSP(MediaMTX)在宿主机上映射的端口(与 scripts/demo_client 默认一致)。 + demo_orchestrator_rtsp_port: int = Field(default=18554, ge=1, le=65535) + #: 手配假流时:写入 JSON 可把 `rtsp://127.0.0.1` 换成此主机,便于**别一进程**(如仅容器内的监控)访问宿主机推流。 + #: `POST /internal/demo/orchestrate-and-start` 在本进程起流+拉流,始终写 `127.0.0.1`,**不读**此字段。 + demo_orchestrator_rtsp_json_host: str = "host.docker.internal" + def parsed_demo_cors_origins(self) -> list[str]: raw = (self.demo_cors_origins or "").strip() if not raw: @@ -157,7 +188,7 @@ class Settings(BaseSettings): return str(value) model_config = SettingsConfigDict( - env_file=".env", + env_file=(str(_DEFAULT_ENV_FILE),), env_file_encoding="utf-8", extra="ignore", ) diff --git a/app/db/models.py b/app/db/models.py index 9cf2143..de7c6e2 100644 --- a/app/db/models.py +++ b/app/db/models.py @@ -59,6 +59,7 @@ class VoiceConfirmationAudit(Base): surgery_id: Mapped[str] = mapped_column(String(6), index=True, nullable=False) confirmation_id: Mapped[str] = mapped_column(String(128), index=True, nullable=False) #: recognized | rejected | asr_failed | parse_failed | invalid_audio | upload_failed + #: | client_stt_empty | client_stt_parse_failed status: Mapped[str] = mapped_column(String(32), nullable=False) audio_object_key: Mapped[str | None] = mapped_column(String(512), nullable=True) audio_content_type: Mapped[str | None] = mapped_column(String(128), nullable=True) diff --git a/app/dependencies.py b/app/dependencies.py index 513a069..322a2a0 100644 --- a/app/dependencies.py +++ b/app/dependencies.py @@ -24,7 +24,6 @@ surgery_result_repository = SurgeryResultRepository() voice_audit_repository = VoiceAuditRepository() baidu_speech_service = BaiduSpeechService() minio_audio_storage_service = MinioAudioStorageService(settings) - camera_session_manager = CameraSessionManager( settings=settings, vision_algorithm=consumable_vision_algorithm_service, diff --git a/app/repositories/voice_audits.py b/app/repositories/voice_audits.py index 494e459..3f7cbe7 100644 --- a/app/repositories/voice_audits.py +++ b/app/repositories/voice_audits.py @@ -2,6 +2,7 @@ from __future__ import annotations from datetime import datetime, timezone +from sqlalchemy import func, select from sqlalchemy.ext.asyncio import AsyncSession from app.db.models import VoiceConfirmationAudit @@ -10,6 +11,29 @@ from app.db.models import VoiceConfirmationAudit class VoiceAuditRepository: """Persist voice confirmation audit rows.""" + async def list_by_surgery( + self, + session: AsyncSession, + surgery_id: str, + *, + limit: int = 50, + offset: int = 0, + ) -> tuple[list[VoiceConfirmationAudit], int]: + """按手术号分页列出审计行,按 `created_at` 降序(新在前)。""" + c = select(func.count()).select_from(VoiceConfirmationAudit).where( + VoiceConfirmationAudit.surgery_id == surgery_id + ) + total = int((await session.execute(c)).scalar_one()) + q = ( + select(VoiceConfirmationAudit) + .where(VoiceConfirmationAudit.surgery_id == surgery_id) + .order_by(VoiceConfirmationAudit.created_at.desc()) + .offset(offset) + .limit(limit) + ) + rows = list((await session.execute(q)).scalars().all()) + return rows, total + async def save_audit( self, session: AsyncSession, diff --git a/app/routers/__init__.py b/app/routers/__init__.py new file mode 100644 index 0000000..0efa394 --- /dev/null +++ b/app/routers/__init__.py @@ -0,0 +1 @@ +"""Optional API routers.""" diff --git a/app/routers/demo_orch.py b/app/routers/demo_orch.py new file mode 100644 index 0000000..2997dd7 --- /dev/null +++ b/app/routers/demo_orch.py @@ -0,0 +1,189 @@ +"""Dev-only: upload two videos, start synthetic RTSP, write RTSP URL file, then start surgery.""" + +from __future__ import annotations + +import json +import shutil +import tempfile +from pathlib import Path +from typing import Annotated + +import anyio +from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile, status +from loguru import logger + +from app.config import settings +from app.dependencies import get_surgery_pipeline +from app.schemas import SurgeryApiResponse, SurgeryStartRequest +from app.services.synthetic_rtsp import StreamSpec, SyntheticRtspManager, write_rtsp_url_json_file +from app.services.surgery_pipeline import SurgeryPipeline +from app.surgery_errors import SurgeryPipelineError + +router = APIRouter(prefix="/internal/demo", tags=["demo"]) + + +def _orchestrate_write_rtsp_host() -> str: + """Write JSON 里用于 RTSP 的主机名。 + + 一键在本进程起 MediaMTX(端口映射在**本机网络命名空间**的 127.0.0.1)并拉流,OpenCV + 必须连 ``rtsp://127.0.0.1:port/...``。若改写成 ``host.docker.internal``,会指到 + 宿主机上的同端口,通常没有这路流,故 DESCRIBE 返回 404。 + `DEMO_ORCHESTRATOR_RTSP_JSON_HOST` 对此路由无效;手填假流+仅改 JSON 的拓扑仍可用该配置。 + """ + return "127.0.0.1" + + +@router.post( + "/orchestrate-and-start", + response_model=SurgeryApiResponse, + summary="一键联调:上传两路视频并开录", + description=( + "仅当 DEMO_ORCHESTRATOR_ENABLED=true。保存两路视频、启动 MediaMTX+ffmpeg、" + "将 RTSP 映射写入 VIDEO_RTSP_URLS_JSON_FILE,再执行与 /client/surgeries/start 相同的开录逻辑。" + ), +) +async def orchestrate_and_start( + video1: Annotated[UploadFile, File(description="第一路视频")], + video2: Annotated[UploadFile, File(description="第二路视频")], + surgery_id: Annotated[str, Form()], + camera_1: Annotated[str, Form()] = "or-cam-01", + camera_2: Annotated[str, Form()] = "or-cam-02", + rtsp_path_1: Annotated[str, Form()] = "demo1", + rtsp_path_2: Annotated[str, Form()] = "demo2", + candidate_consumables_json: Annotated[str, Form()] = "[]", + pipeline: SurgeryPipeline = Depends(get_surgery_pipeline), +) -> SurgeryApiResponse: + logger.info( + "demo orchestrate-and-start: surgery_id={} cameras={} {}", + surgery_id, + (camera_1, camera_2), + f"rpaths=({rtsp_path_1},{rtsp_path_2})", + ) + if not settings.demo_orchestrator_enabled: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Demo orchestrator disabled (set DEMO_ORCHESTRATOR_ENABLED=true).", + ) + path_raw = (settings.video_rtsp_urls_json_file or "").strip() + if not path_raw: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=( + "VIDEO_RTSP_URLS_JSON_FILE must be set to a writable path; " + "in Docker, bind-mount a host file to this path." + ), + ) + json_path = Path(path_raw).expanduser() + + try: + candidates = json.loads(candidate_consumables_json) + except json.JSONDecodeError as exc: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_CONTENT, + detail=f"invalid candidate_consumables_json: {exc}", + ) from exc + if not isinstance(candidates, list) or not all(isinstance(x, str) for x in candidates): + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_CONTENT, + detail="candidate_consumables_json must be a JSON array of strings", + ) + + try: + body = SurgeryStartRequest( + surgery_id=surgery_id, + camera_ids=[camera_1.strip(), camera_2.strip()], + candidate_consumables=[str(x) for x in candidates], + ) + except Exception as exc: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_CONTENT, + detail=str(exc), + ) from exc + + ext1 = Path(video1.filename or "a.mp4").suffix or ".mp4" + ext2 = Path(video2.filename or "b.mp4").suffix or ".mp4" + v1_bytes = await video1.read() + v2_bytes = await video2.read() + if not v1_bytes or not v2_bytes: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_CONTENT, + detail="Video files must be non-empty", + ) + + work_root = Path(tempfile.mkdtemp(prefix="orm-orch-")) + try: + fp1 = work_root / f"v1{ext1}" + fp2 = work_root / f"v2{ext2}" + + def _save_files() -> None: + fp1.write_bytes(v1_bytes) + fp2.write_bytes(v2_bytes) + + await anyio.to_thread.run_sync(_save_files) + except OSError as exc: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"failed to save uploads: {exc}", + ) from exc + + streams = [ + StreamSpec(camera_id=body.camera_ids[0], file_path=fp1, rtsp_path=rtsp_path_1.strip() or "demo1"), + StreamSpec(camera_id=body.camera_ids[1], file_path=fp2, rtsp_path=rtsp_path_2.strip() or "demo2"), + ] + port = int(settings.demo_orchestrator_rtsp_port) + + try: + + def _start_synth() -> dict[str, str]: + mgr = SyntheticRtspManager.get() + _run, url_map = mgr.start(streams, host_port=port, work_dir=work_root) + return url_map + + url_map_host = await anyio.to_thread.run_sync(_start_synth) + except (FileNotFoundError, OSError, ValueError, RuntimeError) as exc: + logger.exception("synthetic RTSP start failed: {}", exc) + await anyio.to_thread.run_sync(SyntheticRtspManager.stop_active) + shutil.rmtree(work_root, ignore_errors=True) + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail=f"synthetic RTSP failed: {exc}", + ) from exc + + host_for_json = _orchestrate_write_rtsp_host() + try: + + def _write() -> None: + write_rtsp_url_json_file( + json_path, + url_map_host, + replace_host=host_for_json, + ) + + await anyio.to_thread.run_sync(_write) + except OSError as exc: + await anyio.to_thread.run_sync(SyntheticRtspManager.stop_active) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"failed to write RTSP JSON file: {exc}", + ) from exc + + await anyio.sleep(0.2) + + try: + await pipeline.start_recording( + body.surgery_id, + list(body.camera_ids), + list(body.candidate_consumables), + ) + except SurgeryPipelineError as exc: + await anyio.to_thread.run_sync(SyntheticRtspManager.stop_active) + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail={"code": exc.code, "message": exc.message, "surgery_id": body.surgery_id}, + ) from exc + + return SurgeryApiResponse( + surgery_id=body.surgery_id, + status="accepted", + message="假 RTSP 已起;映射已写入;摄像头录制已开始。", + ) diff --git a/app/schemas.py b/app/schemas.py index 2274bf2..de78a35 100644 --- a/app/schemas.py +++ b/app/schemas.py @@ -141,6 +141,48 @@ class SurgeryVoiceStatusResponse(BaseModel): ) +class SurgeryVoiceAuditItem(BaseModel): + """单条 `voice_confirmation_audits` 行(追溯对账用)。""" + + id: int + confirmation_id: str + status: str = Field( + description=( + "recognized / rejected / parse_failed / asr_failed / invalid_audio / " + "upload_failed / client_stt_empty / client_stt_parse_failed 等" + ), + ) + audio_object_key: str | None = None + audio_content_type: str | None = None + audio_size_bytes: int | None = None + audio_sha256: str | None = None + asr_text: str | None = None + resolved_label: str | None = None + options_snapshot_json: str | None = Field( + default=None, + description="当次候选项与置信度 JSON 快照。", + ) + error_message: str | None = None + created_at: datetime = Field( + description="记录写入时间(UTC)。", + ) + + model_config = ConfigDict(from_attributes=True) + + +class SurgeryVoiceAuditsListResponse(BaseModel): + """按手术号分页的语音确认审计列表。""" + + surgery_id: str + total: int = Field(ge=0, description="该手术在表中的总条数(不受本页 limit 截断)。") + limit: int = Field(ge=1, le=200) + offset: int = Field(ge=0) + items: list[SurgeryVoiceAuditItem] = Field( + default_factory=list, + description="按 `created_at` 降序。", + ) + + class PendingConfirmationOption(BaseModel): label: str confidence: float @@ -158,6 +200,16 @@ class SurgeryPendingConfirmationResponse(BaseModel): created_at: datetime +class SurgeryPendingResolveTextRequest(BaseModel): + """由浏览器 Web Speech 等客户端本地识别后提交的文本,语义与经百度 ASR 得到的文本相同。""" + + recognized_text: str = Field( + min_length=1, + max_length=2000, + description="识别文本;服务端用与语音接口相同的规则解析候选项。", + ) + + class SurgeryPendingConfirmationResolveResponse(BaseModel): surgery_id: str confirmation_id: str diff --git a/app/services/baidu_speech.py b/app/services/baidu_speech.py index 8062615..daf2b92 100644 --- a/app/services/baidu_speech.py +++ b/app/services/baidu_speech.py @@ -52,8 +52,13 @@ class BaiduSpeechService: rate: int = 16000, options: dict[str, Any] | None = None, ) -> dict[str, Any]: - """短语音识别。返回百度 JSON(含 `err_no`、`result` 等)。""" - return self._client_or_raise().asr(speech, format, rate, options) + """短语音识别。返回百度 JSON(含 `err_no`、`result` 等)。 + + 固定使用普通话模型(`dev_pid` 来自配置),避免未传参时误用服务端默认导致偏英语等结果。 + """ + merged: dict[str, Any] = dict(options or {}) + merged["dev_pid"] = int(settings.baidu_speech_asr_dev_pid) + return self._client_or_raise().asr(speech, format, rate, merged) def synthesis( self, diff --git a/app/services/consumption_tsv_log.py b/app/services/consumption_tsv_log.py new file mode 100644 index 0000000..8de4ca0 --- /dev/null +++ b/app/services/consumption_tsv_log.py @@ -0,0 +1,241 @@ +"""每例手术一个文本文件(制表符列):`start_surgery` 时截断并写表头,每次时间窗识别**追加**一行。终端 Markdown 中时间戳为可读形式;落盘行内仍为 ISO 便于程序解析。 + +时间戳:在拉流起点记录 `time.time()`,与 `time.monotonic()` 时间窗对齐。直播 RTSP 经 OpenCV 一般无可靠绝对时码,以本机接收时刻为准。 +""" + +from __future__ import annotations + +import re +import threading +from datetime import datetime, timezone +from pathlib import Path +from zoneinfo import ZoneInfo, ZoneInfoNotFoundError + +from loguru import logger + +from app.config import settings +from app.services.consumable_vision_algorithm import ClsTop3 +from app.terminal_markdown import print_markdown_stderr + +# 制表符分隔;时间范围用 U+2013 连接;Top2/3 仅名称;本窗消耗数量恒为 1 +HEADER = "物品id\t物品名称\tTop2物品名称\tTop3物品名称\t消耗数量\t医生id\t时间戳\n" +_RANGE_SEP = "\u2013" # en dash,与样例 `00:00:00.000–00:00:45.000` 一致 + +_lock = threading.Lock() + + +def _consumption_tzinfo(): + raw = (settings.consumption_log_timezone or "").strip() + if not raw: + lt = datetime.now().astimezone().tzinfo + return lt if lt is not None else timezone.utc + try: + return ZoneInfo(raw) + except ZoneInfoNotFoundError: + logger.warning("无效的 consumption_log_timezone={!r},回退为 UTC", raw) + return timezone.utc + + +def format_consumption_timestamp( + camera_id: str, + wall_start_epoch: float, + wall_end_epoch: float, +) -> str: + """落盘用:墙钟 + 配置时区 → `camXX@ISO8601–ISO8601`。""" + tz = _consumption_tzinfo() + a = datetime.fromtimestamp(wall_start_epoch, tz=tz) + b = datetime.fromtimestamp(wall_end_epoch, tz=tz) + cam = short_camera_label(camera_id) + return f"{cam}@{a.isoformat(timespec='milliseconds')}{_RANGE_SEP}{b.isoformat(timespec='milliseconds')}" + + +def format_consumption_timestamp_readable( + camera_id: str, + wall_start_epoch: float, + wall_end_epoch: float, +) -> str: + """仅终端 Rich:不含 `T` 的本地可读区间 + 摄像头简名,便于人眼对时。""" + tz = _consumption_tzinfo() + a = datetime.fromtimestamp(wall_start_epoch, tz=tz) + b = datetime.fromtimestamp(wall_end_epoch, tz=tz) + cam = short_camera_label(camera_id) + + def _fmt(d: datetime) -> str: + return d.strftime("%Y-%m-%d %H:%M:%S") + f".{d.microsecond // 1000:03d}" + + return f"{_fmt(a)} {_RANGE_SEP} {_fmt(b)} · {cam}" + + +def short_camera_label(camera_id: str) -> str: + s = (camera_id or "").strip() + m = re.match(r"^or-cam-(\d+)$", s, re.IGNORECASE) + if m: + return f"cam{int(m.group(1)):02d}" + m2 = re.match(r"^cam-?0*(\d+)$", s, re.IGNORECASE) + if m2: + return f"cam{int(m2.group(1)):02d}" + alnum = re.sub(r"[^\w-]", "", s)[:12] + return alnum or "cam" + + +def _encode_cell(value: str) -> str: + s = (value or "").replace("\r", " ").replace("\n", " ").replace("\t", " ") + return s + + +def _item_id_for_row(name: str, pid: str, name_to_code: dict[str, str]) -> str: + p = (pid or "").strip() + if p: + return p + n = (name or "").strip() + if n in name_to_code: + return (name_to_code.get(n) or n).strip() + return n + + +def build_tsv_line( + *, + name_to_code: dict[str, str], + best: ClsTop3, + doctor_id: str, + camera_id: str, + wall_start_epoch: float, + wall_end_epoch: float, +) -> str: + id1 = _item_id_for_row(best.t1_name, best.t1_pid, name_to_code) + # 与历史样例:Top1 为「名称 置信度」四位小数 + name1 = f"{(best.t1_name or '').strip()} {best.t1_conf:.4f}".strip() + n2 = (best.t2_name or "").strip() + n3 = (best.t3_name or "").strip() + ts = format_consumption_timestamp(camera_id, wall_start_epoch, wall_end_epoch) + row = [ + _encode_cell(id1), + _encode_cell(name1), + _encode_cell(n2), + _encode_cell(n3), + "1", + _encode_cell(doctor_id), + _encode_cell(ts), + ] + return "\t".join(row) + "\n" + + +def _safe_surgery_path_segment(surgery_id: str) -> str: + s = (surgery_id or "unknown").strip() or "unknown" + s = re.sub(r"[^\w\-.@]", "_", s) + return s[:200] if len(s) > 200 else s + + +def resolved_consumption_log_path(surgery_id: str) -> Path: + raw = (settings.consumption_tsv_log_path or "logs/consumption_{surgery_id}.txt").strip() + safe = _safe_surgery_path_segment(surgery_id) + if "{surgery_id}" in raw: + raw = raw.replace("{surgery_id}", safe) + else: + p0 = Path(raw) + if p0.suffix: + raw = str(p0.with_name(f"{p0.stem}_{safe}{p0.suffix}")) + else: + raw = f"{raw.rstrip('/')}_{safe}.txt" + p = Path(raw).expanduser() + if not p.is_absolute(): + p = Path.cwd() / p + return p + + +def init_consumption_log_file(surgery_id: str) -> None: + """新手术开始:截断该手术对应文件并写入表头(一次)。""" + if not settings.consumption_tsv_log_enabled: + return + path = resolved_consumption_log_path(surgery_id) + path.parent.mkdir(parents=True, exist_ok=True) + with _lock: + with path.open("w", encoding="utf-8") as f: + f.write(HEADER) + + +def append_consumption_tsv_line(surgery_id: str, line: str) -> None: + if not settings.consumption_tsv_log_enabled: + return + path = resolved_consumption_log_path(surgery_id) + path.parent.mkdir(parents=True, exist_ok=True) + with _lock: + with path.open("a", encoding="utf-8") as f: + f.write(line) + + +def _md_cell(value: str) -> str: + """避免破坏 Markdown 表格的 | 与换行。""" + s = (value or "").replace("\r", " ").replace("\n", " ").replace("|", "|") + return s + + +def build_consumption_markdown( + *, + name_to_code: dict[str, str], + best: ClsTop3, + doctor_id: str, + camera_id: str, + wall_start_epoch: float, + wall_end_epoch: float, +) -> str: + """终端用:Top1 含 id/名称/置信度;Top2/3 仅名称;消耗数量恒为 1。""" + id1 = _item_id_for_row(best.t1_name, best.t1_pid, name_to_code) + n1 = (best.t1_name or "").strip() + has2 = bool((best.t2_name or "").strip()) + has3 = bool((best.t3_name or "").strip()) + n2 = (best.t2_name or "").strip() if has2 else "" + n3 = (best.t3_name or "").strip() if has3 else "" + dash = "—" + ts = format_consumption_timestamp_readable(camera_id, wall_start_epoch, wall_end_epoch) + return "\n".join( + [ + "| Top1 物品id | Top1 物品名称 | Top1 置信度 | Top2 物品名称 | Top3 物品名称 | 消耗数量 | 医生id | 时间戳 |", + "| :--- | :--- | ---: | :--- | :--- | ---: | :--- | :--- |", + "| {} | {} | {:.4f} | {} | {} | 1 | {} | {} |".format( + _md_cell(id1), + _md_cell(n1), + best.t1_conf, + _md_cell(n2) if has2 else dash, + _md_cell(n3) if has3 else dash, + _md_cell(doctor_id), + _md_cell(ts), + ), + "", + ] + ) + + +def append_consumption_window( + *, + surgery_id: str, + name_to_code: dict[str, str], + best: ClsTop3, + doctor_id: str, + camera_id: str, + wall_start_epoch: float, + wall_end_epoch: float, +) -> None: + if not settings.consumption_tsv_log_enabled and not settings.consumption_log_markdown_terminal: + return + if settings.consumption_tsv_log_enabled: + line = build_tsv_line( + name_to_code=name_to_code, + best=best, + doctor_id=doctor_id, + camera_id=camera_id, + wall_start_epoch=wall_start_epoch, + wall_end_epoch=wall_end_epoch, + ) + append_consumption_tsv_line(surgery_id, line) + if settings.consumption_log_markdown_terminal: + print_markdown_stderr( + build_consumption_markdown( + name_to_code=name_to_code, + best=best, + doctor_id=doctor_id, + camera_id=camera_id, + wall_start_epoch=wall_start_epoch, + wall_end_epoch=wall_end_epoch, + ), + ) diff --git a/app/services/surgery_pipeline.py b/app/services/surgery_pipeline.py index e664682..e37e4f4 100644 --- a/app/services/surgery_pipeline.py +++ b/app/services/surgery_pipeline.py @@ -10,6 +10,8 @@ from app.schemas import ( SurgeryPendingConfirmationResponse, ) from app.services.video.session_manager import CameraSessionManager +from fastapi.concurrency import run_in_threadpool + from app.services.voice_resolution import VoiceConfirmationService, VoiceResolveResult from app.surgery_errors import SurgeryPipelineError @@ -79,6 +81,18 @@ class SurgeryPipeline: def voice_status(self, surgery_id: str) -> dict[str, object] | None: return self._sessions.voice_status(surgery_id) + async def list_voice_audits( + self, + surgery_id: str, + *, + limit: int = 50, + offset: int = 0, + ): + """持久化表 `voice_confirmation_audits` 分页,用于追溯/对账/报表。""" + return await self._voice.list_voice_audits_for_surgery( + surgery_id, limit=limit, offset=offset + ) + def get_pending_confirmation_for_client( self, surgery_id: str ) -> SurgeryPendingConfirmationResponse | None: @@ -114,3 +128,35 @@ class SurgeryPipeline: filename=filename, content_type=content_type, ) + + async def resolve_pending_confirmation_from_client_text( + self, + surgery_id: str, + confirmation_id: str, + recognized_text: str, + ) -> VoiceResolveResult: + """浏览器等客户端本机识别后的文本,解析规则与 WAV 路径一致(无需 MinIO/百度)。""" + return await self._voice.resolve_from_recognized_text( + surgery_id=surgery_id, + confirmation_id=confirmation_id, + recognized_text=recognized_text, + ) + + async def get_pending_prompt_audio_mp3( + self, + surgery_id: str, + confirmation_id: str, + ) -> bytes: + """待确认 `prompt_text` 的百度 TTS MP3,供模拟客户端用 Audio 直放。""" + pending = self._sessions.get_pending_confirmation_by_id( + surgery_id, confirmation_id + ) + if pending is None or pending.status != "pending": + raise SurgeryPipelineError( + "CONFIRMATION_NOT_FOUND", + "未找到该待确认项或已处理。", + ) + return await run_in_threadpool( + self._voice.synthesize_prompt_to_mp3, + pending.prompt_text, + ) diff --git a/app/services/synthetic_rtsp.py b/app/services/synthetic_rtsp.py new file mode 100644 index 0000000..d87ca45 --- /dev/null +++ b/app/services/synthetic_rtsp.py @@ -0,0 +1,242 @@ +"""Start/stop local fake RTSP streams (MediaMTX + ffmpeg) for dev orchestration.""" + +from __future__ import annotations + +import json +import os +import shutil +import socket +import subprocess +import time +import uuid +from dataclasses import dataclass, field +from pathlib import Path +from typing import ClassVar + +from loguru import logger + +MEDIAMTX_IMAGE = os.environ.get("MEDIAMTX_DOCKER_IMAGE", "bluenviron/mediamtx:latest") +CONTAINER_NAME_PREFIX = "orm-fake-rtsp-" +# 等待 127.0.0.1:host_port 可连接(避免开录时 Connection refused) +_MEDIAMTX_TCP_READY_SEC = float(os.environ.get("MEDIAMTX_TCP_READY_SEC", "30")) + + +def _wait_tcp_listening(host: str, port: int, *, total_timeout: float) -> None: + """Block until something accepts TCP on host:port (MediaMTX 映射口就绪).""" + deadline = time.monotonic() + max(1.0, total_timeout) + last: OSError | None = None + while time.monotonic() < deadline: + try: + with socket.create_connection((host, port), timeout=1.5): + logger.info("RTSP port ready {}:{}", host, port) + return + except OSError as exc: + last = exc + time.sleep(0.2) + hint = " MediaMTX 未监听:检查 docker 是否起成功、18554 是否被占用(orm-fake-rtsp-*) 已 docker ps。" + if last is not None: + raise RuntimeError( + f"等待 {host}:{port} 可连接超时({total_timeout:g}s): {last}{hint}" + ) from last + raise RuntimeError( + f"等待 {host}:{port} 可连接超时({total_timeout:g}s).{hint}" + ) + + +@dataclass +class StreamSpec: + camera_id: str + file_path: Path + rtsp_path: str # last segment, e.g. demo1 + + def __post_init__(self) -> None: + self.rtsp_path = (self.rtsp_path or "demo").strip().strip("/") or "demo" + + +@dataclass +class SyntheticRtspRun: + """Holds Popen handles and docker container for one multi-stream session.""" + + container_name: str + procs: list[subprocess.Popen] = field(default_factory=list) + work_dir: Path | None = None # temp dir for uploaded video files; removed on stop + + def stop(self) -> None: + for p in self.procs: + if p.poll() is None: + p.terminate() + try: + p.wait(timeout=5.0) + except subprocess.TimeoutExpired: + p.kill() + self.procs.clear() + if self.work_dir is not None and self.work_dir.is_dir(): + try: + shutil.rmtree(self.work_dir, ignore_errors=True) + except OSError as exc: + logger.debug("rmtree work_dir: {}", exc) + self.work_dir = None + if shutil.which("docker") is not None: + try: + subprocess.run( + ["docker", "rm", "-f", self.container_name], + capture_output=True, + timeout=30, + ) + except (OSError, subprocess.SubprocessError) as exc: + logger.debug("docker rm: {}", exc) + self.work_dir = None + + +class SyntheticRtspManager: + _instance: ClassVar[SyntheticRtspManager | None] = None + _active: ClassVar[SyntheticRtspRun | None] = None + + @classmethod + def get(cls) -> SyntheticRtspManager: + if cls._instance is None: + cls._instance = cls() + return cls._instance + + @classmethod + def active_run(cls) -> SyntheticRtspRun | None: + return cls._active + + @classmethod + def _cleanup_prefixed_containers(cls) -> None: + """Remove stale MediaMTX containers left by earlier runs/reloads.""" + if shutil.which("docker") is None: + return + try: + listed = subprocess.run( + [ + "docker", + "ps", + "-aq", + "--filter", + f"name={CONTAINER_NAME_PREFIX}", + ], + capture_output=True, + text=True, + timeout=30, + check=False, + ) + except (OSError, subprocess.SubprocessError) as exc: + logger.debug("docker ps stale cleanup: {}", exc) + return + ids = [x.strip() for x in (listed.stdout or "").splitlines() if x.strip()] + if not ids: + return + try: + subprocess.run( + ["docker", "rm", "-f", *ids], + capture_output=True, + text=True, + timeout=60, + check=False, + ) + logger.info("Removed stale fake RTSP containers: {}", ids) + except (OSError, subprocess.SubprocessError) as exc: + logger.debug("docker rm stale cleanup: {}", exc) + + @classmethod + def stop_active(cls) -> None: + if cls._active is not None: + cls._active.stop() + cls._active = None + cls._cleanup_prefixed_containers() + + def start( + self, + streams: list[StreamSpec], + *, + host_port: int, + work_dir: Path, + ) -> tuple[SyntheticRtspRun, dict[str, str]]: + """Start MediaMTX and one ffmpeg per stream. Returns (run, url_by_camera).""" + if not streams: + raise ValueError("no streams") + if not shutil.which("ffmpeg"): + raise RuntimeError("ffmpeg not in PATH") + if not shutil.which("docker"): + raise RuntimeError("docker not in PATH (required to run MediaMTX)") + + self.stop_active() + + for s in streams: + if not s.file_path.is_file(): + raise FileNotFoundError(str(s.file_path)) + for ch in s.rtsp_path: + if ch not in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_.-": + raise ValueError(f"invalid RTSP path segment: {s.rtsp_path!r}") + + container = CONTAINER_NAME_PREFIX + uuid.uuid4().hex[:12] + cmd = [ + "docker", "run", "-d", "--name", container, + "-p", f"127.0.0.1:{host_port}:8554", + MEDIAMTX_IMAGE, + ] + r = subprocess.run(cmd, capture_output=True, text=True, timeout=120) + if r.returncode != 0: + try: + subprocess.run( + ["docker", "rm", "-f", container], + capture_output=True, + text=True, + timeout=30, + check=False, + ) + except (OSError, subprocess.SubprocessError) as exc: + logger.debug("docker rm failed container cleanup: {}", exc) + err = (r.stderr or r.stdout or "").strip() + raise RuntimeError(f"MediaMTX docker failed: {err}") + + run = SyntheticRtspRun(container_name=container) + url_map: dict[str, str] = {} + time.sleep(0.5) + _wait_tcp_listening("127.0.0.1", host_port, total_timeout=_MEDIAMTX_TCP_READY_SEC) + + run.work_dir = work_dir + try: + for s in streams: + dest = f"rtsp://127.0.0.1:{host_port}/{s.rtsp_path}" + url_map[s.camera_id] = dest + pub = [ + "ffmpeg", "-hide_banner", "-loglevel", "warning", + "-re", "-stream_loop", "-1", + "-i", str(s.file_path), + "-c", "copy", "-f", "rtsp", "-rtsp_transport", "tcp", dest, + ] + p = subprocess.Popen(pub) # noqa: S603 + run.procs.append(p) + except Exception: + run.stop() + raise + + # 给 ffmpeg 一点时间连上 MediaMTX,减少首帧前 OpenCV 连上却 DESCRIBE 失败 + time.sleep(0.4) + + self._active = run + return run, url_map + + +def write_rtsp_url_json_file( + path: Path, + url_map: dict[str, str], + *, + replace_host: str, +) -> None: + """Write JSON map; replace 127.0.0.1 in values with `replace_host` (e.g. host.docker.internal).""" + if replace_host in ("", "127.0.0.1"): + out = url_map + else: + out = { + k: v.replace("127.0.0.1", replace_host, 1) + for k, v in url_map.items() + } + path.parent.mkdir(parents=True, exist_ok=True) + text = json.dumps(out, ensure_ascii=False, indent=2, sort_keys=True) + "\n" + temp = path.with_name(path.name + ".tmp") + temp.write_text(text, encoding="utf-8") + temp.replace(path) + logger.info("Wrote RTSP map to {}", path) diff --git a/app/services/video/backend_resolver.py b/app/services/video/backend_resolver.py index 6780e9b..c796904 100644 --- a/app/services/video/backend_resolver.py +++ b/app/services/video/backend_resolver.py @@ -21,7 +21,6 @@ class BackendResolver: ) -> None: self._s = settings self._hik = hikvision_runtime - self._rtsp_urls_map = settings.video_rtsp_url_map() def _parse_json_object(self, raw: str) -> dict[str, Any]: raw = (raw or "").strip() @@ -55,8 +54,10 @@ class BackendResolver: return VideoBackendKind.RTSP def rtsp_url_for_camera(self, camera_id: str) -> str: - if camera_id in self._rtsp_urls_map: - return self._rtsp_urls_map[camera_id] + # Re-read on each use so VIDEO_RTSP_URLS_JSON_FILE can be hot-updated (e.g. dev orchestrator). + m = self._s.video_rtsp_url_map() + if camera_id in m: + return m[camera_id] tpl = (self._s.video_rtsp_url_template or "").strip() if tpl: try: diff --git a/app/services/video/session_manager.py b/app/services/video/session_manager.py index 1359c14..e2d5ae4 100644 --- a/app/services/video/session_manager.py +++ b/app/services/video/session_manager.py @@ -26,6 +26,8 @@ from app.services.video.backend_resolver import BackendResolver from app.services.video.hikvision_runtime import HikvisionInitRefCount, HikvisionRuntime from app.services.video.rtsp_capture import RtspCapture from app.services.video.types import VideoBackendKind +from app.services.consumption_tsv_log import append_consumption_window, init_consumption_log_file +from app.services.voice_file_log import init_voice_log_file from app.services.voice_confirm import build_prompt_text from app.surgery_errors import SurgeryPipelineError @@ -41,6 +43,8 @@ class PendingConsumableConfirmation: created_at: datetime model_top1_label: str model_top1_confidence: float + #: 本轮待确认在解析失败时累计次数(首败 + 重试),供 API 计算 retry_remaining。 + voice_parse_failures: int = 0 @dataclass @@ -49,6 +53,8 @@ class CameraStreamInferState: votes: list[tuple[float, str, ClsTop3]] = field(default_factory=list) stream_t0: float | None = None + #: 与 `stream_t0` 同一次初始化时的 `time.time()`,与 monotonic 流逝秒相加得到墙钟时间戳 + stream_wall_start: float | None = None next_bucket: int = 0 @@ -258,6 +264,8 @@ class CameraSessionManager: ) run = RunningSurgery(stop_event=stop_event, state=state, tasks=tasks) + init_consumption_log_file(surgery_id) + init_voice_log_file(surgery_id, self._s) async with self._manager_lock: self._active[surgery_id] = run @@ -408,6 +416,22 @@ class CameraSessionManager: return [] return list(self._active[surgery_id].state.candidate_consumables) + async def record_voice_parse_failure( + self, surgery_id: str, confirmation_id: str + ) -> tuple[int, int]: + """解析失败时累加计数,返回 (当前失败次数, 距上限还剩几次「重试机会」)。""" + if surgery_id not in self._active: + return 0, 0 + st = self._active[surgery_id].state + max_r = int(self._s.voice_confirm_max_failed_parse_rounds) + async with st.lock: + p = st.pending_by_id.get(confirmation_id) + if p is None or p.status != "pending": + return 0, 0 + p.voice_parse_failures += 1 + remaining = max(0, max_r - p.voice_parse_failures) + return p.voice_parse_failures, remaining + def next_pending_confirmation( self, surgery_id: str ) -> PendingConsumableConfirmation | None: @@ -622,6 +646,19 @@ class CameraSessionManager: if snap is None: continue + if self._s.video_log_inference_results: + logger.info( + "Vision result surgery={} camera={} top1={}({:.3f}) top2={}({:.3f}) top3={}({:.3f})", + surgery_id, + camera_id, + snap.t1_name, + snap.t1_conf, + snap.t2_name, + snap.t2_conf, + snap.t3_name, + snap.t3_conf, + ) + wsec = self._s.consumable_vision_window_sec pending_preds: list[PredictionResult] = [] async with state.lock: @@ -630,6 +667,7 @@ class CameraSessionManager: ) if cis.stream_t0 is None: cis.stream_t0 = time.monotonic() + cis.stream_wall_start = time.time() t_rel = time.monotonic() - cis.stream_t0 cis.votes.append((t_rel, snap.t1_name, snap)) current_b = int(t_rel // wsec) @@ -648,7 +686,19 @@ class CameraSessionManager: if not bucket_pts: continue best = window_bucket_to_best_snap(bucket_pts) - if best is not None: + if best is not None and cis.stream_wall_start is not None: + if self._s.consumption_tsv_log_enabled or self._s.consumption_log_markdown_terminal: + wall_lo = cis.stream_wall_start + lo + wall_hi = cis.stream_wall_start + hi + append_consumption_window( + surgery_id=surgery_id, + name_to_code=state.name_to_code, + best=best, + doctor_id=self._s.video_result_doctor_id, + camera_id=camera_id, + wall_start_epoch=wall_lo, + wall_end_epoch=wall_hi, + ) pending_preds.append( cls_top3_to_prediction_result(best) ) diff --git a/app/services/voice_confirm.py b/app/services/voice_confirm.py index 2252036..f05a26a 100644 --- a/app/services/voice_confirm.py +++ b/app/services/voice_confirm.py @@ -32,12 +32,100 @@ _CN_DIGITS = { } +def _parse_ordinal_index_1based(token: str) -> int | None: + """将「1」「3」「一」「三」「十一」等解析为 1-based 序数,失败返回 None。""" + t = (token or "").strip() + if not t: + return None + if t.isdigit(): + v = int(t) + return v if 1 <= v <= 99 else None + if t in _CN_DIGITS and t != "零" and t != "十": + return int(_CN_DIGITS[t]) + if t == "十": + return 10 + if len(t) == 2 and t[0] == "十" and t[1] in _CN_DIGITS and t[1] not in ("零", "十"): + return 10 + int(_CN_DIGITS[t[1]]) + if len(t) == 2 and t[1] == "十" and t[0] in _CN_DIGITS and t[0] != "零": + return int(_CN_DIGITS[t[0]]) * 10 + if len(t) == 3 and t[0] in _CN_DIGITS and t[1] == "十" and t[2] in _CN_DIGITS: + return int(_CN_DIGITS[t[0]]) * 10 + int(_CN_DIGITS[t[2]]) + return None + + +def _label_from_ordinal_1based(n1: int, options: list[str]) -> str | None: + if n1 < 1: + return None + idx = n1 - 1 + if 0 <= idx < len(options): + return options[idx] + return None + + +def _choose_from_ordinal_text(raw: str, options: list[str]) -> str | None: + """从「第一个」「第2个」「选3」「1号」等表述解析选项。返回 None 表示本函数未识别。""" + n_opt = len(options) + if n_opt < 1: + return None + + # 1) 显式「第N个/项/款/…」,允许夹带后噪声,如「第一个对」 + for m in re.finditer( + r"第([0-9]+|[一二两三四五六七八九十百]+)(?:个|项|款|的|种|名)?", raw + ): + n1 = _parse_ordinal_index_1based(m.group(1)) + if n1 is not None: + ch = _label_from_ordinal_1based(n1, options) + if ch is not None: + return ch + m_pick = re.search( + r"(?:^|[\s,,;;::])(?:选|要|就)\s*0*([1-9]\d?)(?:\s*号|个|项|款)?", + raw, + ) + if m_pick: + n1 = int(m_pick.group(1)) + ch = _label_from_ordinal_1based(n1, options) + if ch is not None: + return ch + norm_for_opt = raw.replace(" ", "").lower() + m_op = re.search(r"(?:option|选项)\s*[::]?\s*(\d+)", norm_for_opt, re.IGNORECASE) + if m_op: + n1 = int(m_op.group(1)) + ch = _label_from_ordinal_1based(n1, options) + if ch is not None: + return ch + + # 2) 行首/句末「一」「二」单字,仅当候选项数较少时 + s = raw.replace(" ", "") + if n_opt <= 3: + m_one = re.match(r"^([一二两三四])$", s) + if m_one: + tok = m_one.group(1) + if tok in _CN_DIGITS and tok not in ("零", "十"): + n1 = int(_CN_DIGITS[tok]) + ch = _label_from_ordinal_1based(n1, options) + if ch is not None: + return ch + m_tail = re.search(r"([0-9一二两三四五六七八九十]+)\s*号$", s) + if m_tail: + n1 = _parse_ordinal_index_1based(m_tail.group(1)) + if n1 is not None: + ch = _label_from_ordinal_1based(n1, options) + if ch is not None: + return ch + + return None + + def parse_voice_choice(asr_text: str, options: list[str]) -> str | None: """ 从识别文本中解析医生选择的耗材名称。 支持:完全匹配、子串匹配、第 N 个(1/一/第一个)。 """ - raw = (asr_text or "").strip() + raw = re.sub( + r"^[。,、;:!?\s]+|[。,、;:!?\s]+$", + "", + (asr_text or "").strip(), + ) if not raw: return None normalized = raw.replace(" ", "").lower() @@ -46,6 +134,10 @@ def parse_voice_choice(asr_text: str, options: list[str]) -> str | None: if opt and opt in raw: return opt + chosen_ord = _choose_from_ordinal_text(raw, options) + if chosen_ord is not None: + return chosen_ord + m_num = re.search(r"(\d+)", raw) if m_num: idx = int(m_num.group(1)) - 1 @@ -55,14 +147,11 @@ def parse_voice_choice(asr_text: str, options: list[str]) -> str | None: m_cn = re.search(r"第([一二两三四五六七八九十\d]+)个", raw) if m_cn: token = m_cn.group(1) - if token.isdigit(): - idx = int(token) - 1 - elif token in _CN_DIGITS: - idx = _CN_DIGITS[token] - 1 - else: - idx = -1 - if 0 <= idx < len(options): - return options[idx] + n1 = int(token) if token.isdigit() else _parse_ordinal_index_1based(token) + if n1 is not None: + ch = _label_from_ordinal_1based(n1, options) + if ch is not None: + return ch for i, opt in enumerate(options): if not opt: @@ -107,13 +196,9 @@ def is_rejection_phrase(asr_text: str) -> bool: def build_prompt_text(options: list[tuple[str, float]]) -> str: - parts = [ - "请确认刚才使用的耗材是下面哪一项,可以说序号或名称;" - "若是清单内其它耗材,也可以直接说该耗材名称。" - ] + parts = ["请确认刚才使用的耗材是下面哪一项。"] for i, (name, _conf) in enumerate(options, start=1): parts.append(f"第{i}个,{name}。") - parts.append("若都不是请说不是。") return "".join(parts) @@ -228,6 +313,32 @@ class VoiceConfirmationOrchestrator: tmp.close() return path, None + async def speak_prompt(self, text: str) -> None: + """仅百度 TTS + ffplay 播报,不录音。供待确认入队时提示手术室。""" + if not (text or "").strip(): + return + if not self._s.voice_tts_on_pending_enqueued: + return + if not self._s.voice_confirmation_enabled: + return + if not self._baidu.configured: + logger.debug("speak_prompt skipped: baidu_speech not configured") + return + async with self._lock: + mp3_path, err = await run_in_threadpool(self._synthesize_to_temp_mp3, text) + if err or not mp3_path: + logger.warning("TTS synthesis failed: {}", err) + return + try: + play_err = await run_in_threadpool(self._play_mp3_file, mp3_path) + if play_err: + logger.warning("TTS play failed: {}", play_err) + finally: + try: + os.unlink(mp3_path) + except OSError: + pass + async def run_confirmation( self, *, diff --git a/app/services/voice_file_log.py b/app/services/voice_file_log.py new file mode 100644 index 0000000..86992d6 --- /dev/null +++ b/app/services/voice_file_log.py @@ -0,0 +1,167 @@ +"""语音确认(ASR/解析/审计)的终端 loguru 行 + 每手术 TSV 落盘,与 `consumption_tsv_log` 并列。""" + +from __future__ import annotations + +import re +import threading +from datetime import datetime, timezone +from pathlib import Path +from zoneinfo import ZoneInfo, ZoneInfoNotFoundError + +from loguru import logger + +from app.config import Settings + +_lock = threading.Lock() + +HEADER = ( + "时间戳(ISO,UTC)\t来源\t状态\tconfirmation_id\tasr/识别文本\t" + "resolved_label\trejected\terror\taudio_object_key\n" +) + + +def _ts_iso_utc() -> str: + return datetime.now(timezone.utc).isoformat(timespec="milliseconds") + + +def _encode_cell(value: str) -> str: + return (value or "").replace("\r", " ").replace("\n", " ").replace("\t", " ") + + +def _log_tz_info(settings: Settings) -> object: + raw = (settings.consumption_log_timezone or "").strip() + if not raw: + lt = datetime.now().astimezone().tzinfo + return lt if lt is not None else timezone.utc + try: + return ZoneInfo(raw) + except ZoneInfoNotFoundError: + return timezone.utc + + +def _ts_local_for_display(settings: Settings) -> str: + tz = _log_tz_info(settings) + return datetime.now(tz).isoformat(timespec="milliseconds") + + +def _safe_surgery_path_segment(surgery_id: str) -> str: + s = (surgery_id or "unknown").strip() or "unknown" + s = re.sub(r"[^\w\-.@]", "_", s) + return s[:200] if len(s) > 200 else s + + +def resolved_voice_log_path(surgery_id: str, settings: Settings) -> Path: + raw = (settings.voice_file_log_path or "logs/voice_{surgery_id}.txt").strip() + safe = _safe_surgery_path_segment(surgery_id) + if "{surgery_id}" in raw: + raw = raw.replace("{surgery_id}", safe) + else: + p0 = Path(raw) + if p0.suffix: + raw = str(p0.with_name(f"{p0.stem}_{safe}{p0.suffix}")) + else: + raw = f"{raw.rstrip('/')}_{safe}.txt" + p = Path(raw).expanduser() + if not p.is_absolute(): + p = Path.cwd() / p + return p + + +def init_voice_log_file(surgery_id: str, settings: Settings) -> None: + """与 `init_consumption_log_file` 同生命周期:`start_surgery` 时截断并写表头。""" + if not settings.voice_file_log_enabled: + return + path = resolved_voice_log_path(surgery_id, settings) + path.parent.mkdir(parents=True, exist_ok=True) + with _lock: + with path.open("w", encoding="utf-8") as f: + f.write(HEADER) + + +def append_voice_tsv_line(surgery_id: str, line: str, settings: Settings) -> None: + if not settings.voice_file_log_enabled: + return + path = resolved_voice_log_path(surgery_id, settings) + path.parent.mkdir(parents=True, exist_ok=True) + with _lock: + with path.open("a", encoding="utf-8") as f: + f.write(line) + + +def emit_voice_event( + settings: Settings, + *, + surgery_id: str, + source: str, + status: str, + confirmation_id: str, + asr_text: str | None = None, + resolved_label: str | None = None, + rejected: str | bool | None = None, + error_message: str | None = None, + audio_object_key: str | None = None, +) -> None: + """ + 终端:单条可 grep 的 VoiceConfirm 行;文件:TSV 一行(与启用的 `voice_file_log_enabled` 一致)。 + + :param source: `wav` | `text` | `n/a` + :param status: 与审计 `status` 或 `minio_not_configured` 等说明型状态一致 + """ + rj: str + if rejected is None: + rj = "" + elif isinstance(rejected, bool): + rj = "true" if rejected else "false" + else: + rj = str(rejected) + + ts_utc = _ts_iso_utc() + local_hint = _ts_local_for_display(settings) + if status in ("recognized", "rejected"): + logger.info( + "VoiceConfirm local_ts={!r} surgery_id={} source={} status={} " + "confirmation_id={} asr_text={!r} resolved_label={!r} rejected={} " + "error={!r} audio_key={!r}", + local_hint, + surgery_id, + source, + status, + confirmation_id, + asr_text, + resolved_label, + rj, + error_message, + audio_object_key, + ) + else: + logger.warning( + "VoiceConfirm local_ts={!r} surgery_id={} source={} status={} " + "confirmation_id={} asr_text={!r} resolved_label={!r} rejected={} " + "error={!r} audio_key={!r}", + local_hint, + surgery_id, + source, + status, + confirmation_id, + asr_text, + resolved_label, + rj, + error_message, + audio_object_key, + ) + + if not settings.voice_file_log_enabled: + return + row = [ + _encode_cell(ts_utc), + _encode_cell(source), + _encode_cell(status), + _encode_cell(confirmation_id), + _encode_cell("" if asr_text is None else asr_text), + _encode_cell("" if resolved_label is None else resolved_label), + _encode_cell(rj), + _encode_cell("" if error_message is None else error_message), + _encode_cell("" if audio_object_key is None else audio_object_key), + ] + line = "\t".join(row) + "\n" + append_voice_tsv_line(surgery_id, line, settings) diff --git a/app/services/voice_resolution.py b/app/services/voice_resolution.py index 262562d..9391d81 100644 --- a/app/services/voice_resolution.py +++ b/app/services/voice_resolution.py @@ -9,7 +9,9 @@ from fastapi.concurrency import run_in_threadpool from loguru import logger from app.config import Settings +from app.services.voice_file_log import emit_voice_event from app.database import AsyncSessionLocal +from app.db.models import VoiceConfirmationAudit from app.repositories.voice_audits import VoiceAuditRepository from app.services.audio_wav import WavDecodeError, wav_bytes_to_pcm16k_mono_s16le from app.services.baidu_speech import BaiduSpeechNotConfiguredError, BaiduSpeechService @@ -49,6 +51,50 @@ class VoiceConfirmationService: self._minio = minio self._audits = audits + def _emit_voice_trace( + self, + *, + source: str, + status: str, + surgery_id: str, + confirmation_id: str, + asr_text: str | None = None, + resolved_label: str | None = None, + rejected: bool | str | None = None, + error_message: str | None = None, + audio_object_key: str | None = None, + ) -> None: + emit_voice_event( + self._s, + surgery_id=surgery_id, + source=source, + status=status, + confirmation_id=confirmation_id, + asr_text=asr_text, + resolved_label=resolved_label, + rejected=rejected, + error_message=error_message, + audio_object_key=audio_object_key, + ) + + def synthesize_prompt_to_mp3(self, text: str) -> bytes: + """百度在线语音合成,供浏览器直接播放,与 `voice_confirm._synthesize_to_temp_mp3` 同参。""" + t = (text or "").strip() + if not t: + raise SurgeryPipelineError("TTS_TEXT_EMPTY", "提示文本为空。") + try: + r = self._baidu.synthesis( + t, "zh", 1, {"spd": 5, "pit": 5, "vol": 9, "per": 0} + ) + except BaiduSpeechNotConfiguredError as exc: + raise SurgeryPipelineError( + "BAIDU_NOT_CONFIGURED", + "服务端未配置百度语音,无法合成播报音频。", + ) from exc + if isinstance(r, dict): + raise SurgeryPipelineError("TTS_ERROR", f"百度 TTS 失败: {r!r}") + return r + async def resolve_from_wav( self, *, @@ -74,18 +120,39 @@ class VoiceConfirmationService: options_snapshot_json=None, error_message="音频超过大小限制", ) + self._emit_voice_trace( + source="wav", + status="invalid_audio", + surgery_id=surgery_id, + confirmation_id=confirmation_id, + error_message="音频超过大小限制", + ) raise SurgeryPipelineError( "VOICE_AUDIO_INVALID", f"音频大小超过限制(最大 {self._s.voice_upload_max_bytes} 字节)。", ) if not self._minio.configured: + self._emit_voice_trace( + source="wav", + status="minio_not_configured", + surgery_id=surgery_id, + confirmation_id=confirmation_id, + error_message="服务端未配置 MinIO,无法保存语音追溯文件。", + ) raise SurgeryPipelineError( "MINIO_NOT_CONFIGURED", "服务端未配置 MinIO,无法保存语音追溯文件。", ) if not self._baidu.configured: + self._emit_voice_trace( + source="wav", + status="baidu_not_configured", + surgery_id=surgery_id, + confirmation_id=confirmation_id, + error_message="服务端未配置百度语音,无法进行语音识别。", + ) raise SurgeryPipelineError( "BAIDU_NOT_CONFIGURED", "服务端未配置百度语音,无法进行语音识别。", @@ -95,6 +162,13 @@ class VoiceConfirmationService: surgery_id, confirmation_id ) if pending is None: + self._emit_voice_trace( + source="wav", + status="confirmation_not_found", + surgery_id=surgery_id, + confirmation_id=confirmation_id, + error_message="未找到该待确认项或已处理。", + ) raise SurgeryPipelineError( "CONFIRMATION_NOT_FOUND", "未找到该待确认项或已处理。", @@ -133,6 +207,13 @@ class VoiceConfirmationService: error_message=str(exc), ) self._sessions.record_voice_trace(surgery_id, asr_text=None, error=str(exc)) + self._emit_voice_trace( + source="wav", + status="upload_failed", + surgery_id=surgery_id, + confirmation_id=confirmation_id, + error_message=str(exc), + ) raise SurgeryPipelineError( "MINIO_UPLOAD_FAILED", f"语音文件上传失败:{exc}", @@ -155,6 +236,14 @@ class VoiceConfirmationService: error_message=str(exc), ) self._sessions.record_voice_trace(surgery_id, asr_text=None, error=str(exc)) + self._emit_voice_trace( + source="wav", + status="invalid_audio", + surgery_id=surgery_id, + confirmation_id=confirmation_id, + error_message=str(exc), + audio_object_key=stored.object_key, + ) raise SurgeryPipelineError( "VOICE_AUDIO_INVALID", f"无法解析 WAV 音频:{exc}", @@ -165,6 +254,14 @@ class VoiceConfirmationService: self._baidu.asr, pcm, "pcm", 16000, None ) except BaiduSpeechNotConfiguredError as exc: + self._emit_voice_trace( + source="wav", + status="baidu_not_configured", + surgery_id=surgery_id, + confirmation_id=confirmation_id, + error_message=str(exc), + audio_object_key=stored.object_key, + ) raise SurgeryPipelineError( "BAIDU_NOT_CONFIGURED", str(exc), @@ -184,6 +281,14 @@ class VoiceConfirmationService: error_message=str(exc), ) self._sessions.record_voice_trace(surgery_id, asr_text=None, error=str(exc)) + self._emit_voice_trace( + source="wav", + status="asr_failed", + surgery_id=surgery_id, + confirmation_id=confirmation_id, + error_message=str(exc), + audio_object_key=stored.object_key, + ) raise SurgeryPipelineError( "VOICE_ASR_FAILED", f"语音识别调用失败:{exc}", @@ -205,6 +310,14 @@ class VoiceConfirmationService: error_message=msg, ) self._sessions.record_voice_trace(surgery_id, asr_text=None, error=msg) + self._emit_voice_trace( + source="wav", + status="asr_failed", + surgery_id=surgery_id, + confirmation_id=confirmation_id, + error_message=msg, + audio_object_key=stored.object_key, + ) raise SurgeryPipelineError("VOICE_ASR_FAILED", msg) if asr_payload.get("err_no") != 0: @@ -226,6 +339,14 @@ class VoiceConfirmationService: error_message=msg, ) self._sessions.record_voice_trace(surgery_id, asr_text=None, error=msg) + self._emit_voice_trace( + source="wav", + status="asr_failed", + surgery_id=surgery_id, + confirmation_id=confirmation_id, + error_message=msg, + audio_object_key=stored.object_key, + ) raise SurgeryPipelineError("VOICE_ASR_FAILED", msg) results = asr_payload.get("result") @@ -252,6 +373,14 @@ class VoiceConfirmationService: error_message=msg, ) self._sessions.record_voice_trace(surgery_id, asr_text=None, error=msg) + self._emit_voice_trace( + source="wav", + status="asr_failed", + surgery_id=surgery_id, + confirmation_id=confirmation_id, + error_message=msg, + audio_object_key=stored.object_key, + ) raise SurgeryPipelineError("VOICE_ASR_FAILED", msg) self._sessions.record_voice_trace(surgery_id, asr_text=text, error=None) @@ -269,10 +398,24 @@ class VoiceConfirmationService: ) if not rejected and not chosen: - msg = ( - "无法从语音中匹配候选项或本台手术候选清单中的耗材名称," - "请重试或说「不是」否认全部" + _, retry_remaining = await self._sessions.record_voice_parse_failure( + surgery_id, confirmation_id ) + base = ( + "无法从语音中匹配候选项或本台手术候选清单中的耗材名称," + "请重试或说「不是」否认全部。" + ) + if retry_remaining > 0: + msg = ( + f"{base} 本次未听清或未能解析," + f"您还可重试 {retry_remaining} 次," + "请说「第一个」「第二个」等序号或候选项全名。" + ) + else: + msg = ( + f"{base} 本轮重试机会已用完," + "请再清晰地说序号/全名,或说「不是」否认全部。" + ) await self._persist_audit( surgery_id=surgery_id, confirmation_id=confirmation_id, @@ -287,7 +430,23 @@ class VoiceConfirmationService: error_message=msg, ) self._sessions.record_voice_trace(surgery_id, asr_text=text, error=msg) - raise SurgeryPipelineError("VOICE_PARSE_FAILED", msg) + self._emit_voice_trace( + source="wav", + status="parse_failed", + surgery_id=surgery_id, + confirmation_id=confirmation_id, + asr_text=text, + error_message=msg, + audio_object_key=stored.object_key, + ) + raise SurgeryPipelineError( + "VOICE_PARSE_FAILED", + msg, + extra={ + "confirmation_id": confirmation_id, + "retry_remaining": retry_remaining, + }, + ) await self._sessions.resolve_pending_confirmation( surgery_id, @@ -310,6 +469,16 @@ class VoiceConfirmationService: options_snapshot_json=options_snapshot, error_message=None, ) + self._emit_voice_trace( + source="wav", + status=final_status, + surgery_id=surgery_id, + confirmation_id=confirmation_id, + asr_text=text, + resolved_label=chosen if not rejected else None, + rejected=rejected, + audio_object_key=stored.object_key, + ) if rejected: return VoiceResolveResult( @@ -327,6 +496,186 @@ class VoiceConfirmationService: message="已确认并记一条消耗。", ) + async def resolve_from_recognized_text( + self, + *, + surgery_id: str, + confirmation_id: str, + recognized_text: str, + ) -> VoiceResolveResult: + """浏览器 Web Speech 等客户端本机识别后的文本,不经 MinIO/百度 ASR,解析规则与 `resolve_from_wav` 一致。""" + pending = self._sessions.get_pending_confirmation_by_id( + surgery_id, confirmation_id + ) + if pending is None: + self._emit_voice_trace( + source="text", + status="confirmation_not_found", + surgery_id=surgery_id, + confirmation_id=confirmation_id, + error_message="未找到该待确认项或已处理。", + ) + raise SurgeryPipelineError( + "CONFIRMATION_NOT_FOUND", + "未找到该待确认项或已处理。", + ) + + option_labels = [a.strip() for a, _ in pending.options if a.strip()] + options_snapshot = json.dumps( + [{"label": a, "confidence": b} for a, b in pending.options], + ensure_ascii=False, + ) + + text = (recognized_text or "").strip() + if not text: + await self._persist_audit( + surgery_id=surgery_id, + confirmation_id=confirmation_id, + status="client_stt_empty", + audio_object_key=None, + audio_content_type=None, + audio_size_bytes=None, + audio_sha256=None, + asr_text=None, + resolved_label=None, + options_snapshot_json=options_snapshot, + error_message="客户端识别文本为空", + ) + self._sessions.record_voice_trace(surgery_id, asr_text=None, error="empty text") + self._emit_voice_trace( + source="text", + status="client_stt_empty", + surgery_id=surgery_id, + confirmation_id=confirmation_id, + error_message="客户端识别文本为空", + ) + raise SurgeryPipelineError("VOICE_TEXT_EMPTY", "recognized_text 为空。") + + self._sessions.record_voice_trace(surgery_id, asr_text=text, error=None) + + rejected = is_rejection_phrase(text) + chosen: str | None = None + if not rejected: + chosen = parse_voice_choice(text, option_labels) + if chosen is None: + surgery_candidates = self._sessions.get_surgery_candidate_consumables( + surgery_id + ) + chosen = match_voice_choice_against_candidates(text, surgery_candidates) + + if not rejected and not chosen: + _, retry_remaining = await self._sessions.record_voice_parse_failure( + surgery_id, confirmation_id + ) + base = ( + "无法从文本中匹配候选项或本台手术候选清单中的耗材名称," + "请重试或说「不是」否认全部。" + ) + if retry_remaining > 0: + msg = ( + f"{base} 本次未能解析," + f"您还可重试 {retry_remaining} 次," + "请输入「第一个」「第二个」等或候选项全名。" + ) + else: + msg = ( + f"{base} 本轮重试机会已用完," + "请再输入序号/全名,或说「不是」否认全部。" + ) + await self._persist_audit( + surgery_id=surgery_id, + confirmation_id=confirmation_id, + status="client_stt_parse_failed", + audio_object_key=None, + audio_content_type=None, + audio_size_bytes=None, + audio_sha256=None, + asr_text=text, + resolved_label=None, + options_snapshot_json=options_snapshot, + error_message=msg, + ) + self._sessions.record_voice_trace(surgery_id, asr_text=text, error=msg) + self._emit_voice_trace( + source="text", + status="client_stt_parse_failed", + surgery_id=surgery_id, + confirmation_id=confirmation_id, + asr_text=text, + error_message=msg, + ) + raise SurgeryPipelineError( + "VOICE_PARSE_FAILED", + msg, + extra={ + "confirmation_id": confirmation_id, + "retry_remaining": retry_remaining, + }, + ) + + await self._sessions.resolve_pending_confirmation( + surgery_id, + confirmation_id, + chosen_label=chosen, + rejected=rejected, + ) + + final_status = "rejected" if rejected else "recognized" + await self._persist_audit( + surgery_id=surgery_id, + confirmation_id=confirmation_id, + status=final_status, + audio_object_key=None, + audio_content_type=None, + audio_size_bytes=None, + audio_sha256=None, + asr_text=text, + resolved_label=chosen if not rejected else None, + options_snapshot_json=options_snapshot, + error_message=None, + ) + self._emit_voice_trace( + source="text", + status=final_status, + surgery_id=surgery_id, + confirmation_id=confirmation_id, + asr_text=text, + resolved_label=chosen if not rejected else None, + rejected=rejected, + ) + + if rejected: + return VoiceResolveResult( + resolved_label=None, + rejected=True, + asr_text=text, + audio_object_key=None, + message="已否认全部候选,未记消耗。", + ) + return VoiceResolveResult( + resolved_label=chosen, + rejected=False, + asr_text=text, + audio_object_key=None, + message="已确认并记一条消耗。", + ) + + async def list_voice_audits_for_surgery( + self, + surgery_id: str, + *, + limit: int = 50, + offset: int = 0, + ) -> tuple[list[VoiceConfirmationAudit], int]: + """从 `voice_confirmation_audits` 表分页读取,供内部查询与报表。""" + async with AsyncSessionLocal() as session: + return await self._audits.list_by_surgery( + session, + surgery_id, + limit=limit, + offset=offset, + ) + async def _persist_audit( self, *, diff --git a/app/surgery_errors.py b/app/surgery_errors.py index a67764b..7340a1c 100644 --- a/app/surgery_errors.py +++ b/app/surgery_errors.py @@ -1,10 +1,21 @@ """Errors surfaced by the surgery recording / result pipeline.""" +from __future__ import annotations + +from typing import Any + class SurgeryPipelineError(Exception): """录制未能按约定完成启动或停止。""" - def __init__(self, code: str, message: str) -> None: + def __init__( + self, + code: str, + message: str, + *, + extra: dict[str, Any] | None = None, + ) -> None: self.code = code self.message = message + self.extra = extra super().__init__(message) diff --git a/app/terminal_markdown.py b/app/terminal_markdown.py new file mode 100644 index 0000000..e64ed2a --- /dev/null +++ b/app/terminal_markdown.py @@ -0,0 +1,20 @@ +"""在终端中渲染 Markdown(含 GFM 表格),依赖 Rich。非 TTY 时仍尽量输出为可读表格。""" + +from __future__ import annotations + +from loguru import logger +from rich.console import Console +from rich.markdown import Markdown + + +def print_markdown_stderr(content: str) -> None: + text = (content or "").rstrip() + if not text: + return + try: + # stderr=True 与 loguru 的默认输出一致,便于在同一终端里对齐其它日志 + console = Console(stderr=True, soft_wrap=True) + console.print(Markdown(text)) + except Exception as exc: # pragma: no cover + logger.warning("Rich Markdown 渲染失败 ({}), 回退为纯文本", exc) + logger.info("{}", text) diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index 29e5d55..9040bcb 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -1,10 +1,11 @@ -# Local development stack. You can either: -# 1. run `docker compose -f docker-compose.dev.yml up --build` for API + DB in containers, or -# 2. run `./start.sh` to start only DB and keep the API on the host with hot reload. +# 本地仅起 PostgreSQL + MinIO;FastAPI 在宿主机跑(如 ./start.sh、uv run uvicorn)。 # -# Default host ports avoid common 5432/8000 clashes when many services run in parallel: -# Postgres published: POSTGRES_PORT -> 35432 (container still listens on 5432) -# API published: API_PORT -> 38080 (uvicorn inside container still listens on 8000) +# docker compose -f docker-compose.dev.yml up -d +# +# 默认端口避开工区常见占用: +# Postgres: 宿主机 ${POSTGRES_PORT:-35432} -> 容器 5432 +# MinIO API: 9000;控制台: 9001 + services: db: image: postgres:16-alpine @@ -23,57 +24,19 @@ services: retries: 20 start_period: 5s - api: - build: - context: . - dockerfile: Dockerfile + # S3 兼容:语音确认原始 WAV;与本项目 .env 中 MINIO_ACCESS_KEY / MINIO_SECRET_KEY 一致 + minio: + image: minio/minio:latest + command: server /data --console-address ":9001" environment: - POSTGRES_USER: ${POSTGRES_USER:-postgres} - POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres} - POSTGRES_DB: ${POSTGRES_DB:-operation_room} - POSTGRES_HOST: db - POSTGRES_PORT: 5432 - CONSUMABLE_CLASSIFIER_IMGSZ: ${CONSUMABLE_CLASSIFIER_IMGSZ:-224} - CONSUMABLE_CLASSIFIER_DEVICE: ${CONSUMABLE_CLASSIFIER_DEVICE:-} - CONSUMABLE_CLASSIFIER_TOPK: ${CONSUMABLE_CLASSIFIER_TOPK:-5} - CONSUMABLE_MIN_CLS_CONFIDENCE: ${CONSUMABLE_MIN_CLS_CONFIDENCE:-0.5} - CONSUMABLE_VISION_WINDOW_SEC: ${CONSUMABLE_VISION_WINDOW_SEC:-15} - CONSUMABLE_CATALOG_XLSX_PATH: ${CONSUMABLE_CATALOG_XLSX_PATH:-} - HAND_DETECTION_WEIGHTS: ${HAND_DETECTION_WEIGHTS:-} - HAND_DETECTION_IMGSZ: ${HAND_DETECTION_IMGSZ:-640} - HAND_DETECTION_DEVICE: ${HAND_DETECTION_DEVICE:-} - VIDEO_DEFAULT_BACKEND: ${VIDEO_DEFAULT_BACKEND:-rtsp} - VIDEO_RTSP_URL_TEMPLATE: ${VIDEO_RTSP_URL_TEMPLATE:-} - VIDEO_RTSP_URLS_JSON_FILE: ${VIDEO_RTSP_URLS_JSON_FILE:-} - VIDEO_RTSP_URLS_JSON: ${VIDEO_RTSP_URLS_JSON:-} - VIDEO_CAMERA_BACKEND_OVERRIDES_JSON: ${VIDEO_CAMERA_BACKEND_OVERRIDES_JSON:-} - HIKVISION_SDK_ENABLED: ${HIKVISION_SDK_ENABLED:-false} - HIKVISION_LIB_DIR: ${HIKVISION_LIB_DIR:-/opt/hikvision/lib} - HIKVISION_DEVICE_IP: ${HIKVISION_DEVICE_IP:-} - HIKVISION_USER: ${HIKVISION_USER:-} - HIKVISION_PASSWORD: ${HIKVISION_PASSWORD:-} - HIKVISION_PREVIEW_RTSP_TEMPLATE: ${HIKVISION_PREVIEW_RTSP_TEMPLATE:-} + MINIO_ROOT_USER: ${MINIO_ACCESS_KEY:-minioadmin} + MINIO_ROOT_PASSWORD: ${MINIO_SECRET_KEY:-minioadmin} ports: - - "${API_PORT:-38080}:8000" - command: ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"] + - "${MINIO_PORT:-9000}:9000" + - "${MINIO_CONSOLE_PORT:-9001}:9001" volumes: - - ./app:/app/app - - ./main.py:/app/main.py - depends_on: - db: - condition: service_healthy - healthcheck: - test: - [ - "CMD", - "python", - "-c", - "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8000/health', timeout=2)", - ] - interval: 10s - timeout: 5s - retries: 5 - start_period: 10s + - minio_data_dev:/data volumes: pgdata_dev: + minio_data_dev: diff --git a/docs/video-backends.md b/docs/video-backends.md index fa6d0d6..6094a8d 100644 --- a/docs/video-backends.md +++ b/docs/video-backends.md @@ -34,8 +34,8 @@ SDK **不作为构建期依赖**:将厂商提供的 Linux x86_64 动态库挂 - 开录后按 `VIDEO_INFERENCE_INTERVAL_SEC` 抽帧,依次调用耗材分类与撕扯动作模型。 - **候选耗材清单**(开始手术请求体中的 `candidate_consumables`)为**硬约束**:若为空,服务端**不会**写入任何消耗明细(仅拉流推理);非空时仅允许清单内标签自动记账。 -- 当分类 Top1 置信度 ≥ `VIDEO_AUTO_CONFIRM_CONFIDENCE` 且标签在候选清单内时,自动写入一条 `source=vision` 的消耗明细。 -- 置信度在 \[`VIDEO_VOICE_CONFIRM_MIN_CONFIDENCE`, `VIDEO_AUTO_CONFIRM_CONFIDENCE`\) 且存在可向医生展示的候选时,会生成一条**待确认**任务(不阻塞后续帧);客户端通过 `GET /client/surgeries/{surgery_id}/pending-confirmation` 拉取话术并播报,确认后 `POST .../pending-confirmation/{id}/resolve`。 +- 当分类 Top1 置信度 **≥** `VIDEO_AUTO_CONFIRM_CONFIDENCE`(**默认 0.9**)且标签在候选清单内时,自动写入一条 `source=vision` 的消耗明细;**低于**该线的识别需人工确认(在语音下沿之上且能展示候选项时入队)。 +- 置信度在 \[`VIDEO_VOICE_CONFIRM_MIN_CONFIDENCE`, `VIDEO_AUTO_CONFIRM_CONFIDENCE`\) 等区间且存在可向医生展示的候选时,会生成**待确认**任务;客户端 `GET /client/surgeries/{surgery_id}/pending-confirmation`,确认后 `POST .../pending-confirmation/{id}/resolve` 等。 - 已有至少一条消耗明细后,`GET /client/surgeries/{surgery_id}/result` 返回 200;若已开录但尚未产生任何明细,返回 503 `RESULT_NOT_READY`。 - 同类物品写入受 `VIDEO_DETAIL_COOLDOWN_SEC` 节流。 - RTSP 读帧连续失败达到 `VIDEO_READ_FAILURE_RECONNECT_THRESHOLD` 时会 `release` 并尝试重连,间隔 `VIDEO_RECONNECT_BACKOFF_SECONDS`。 diff --git a/main.py b/main.py index 0a6a5b1..7c6d567 100644 --- a/main.py +++ b/main.py @@ -47,6 +47,19 @@ def create_app() -> FastAPI: ) logger.info("CORS enabled for demo client; origins={}", origins) application.include_router(api_router) + if settings.demo_orchestrator_enabled: + from app.routers import demo_orch + + application.include_router(demo_orch.router) + logger.info( + "Demo orchestrator enabled: POST /internal/demo/orchestrate-and-start", + ) + else: + logger.info( + "Demo orchestrator disabled (DEMO_ORCHESTRATOR_ENABLED=false): " + "GET /internal/demo/orchestrator-status for status; " + "POST /internal/demo/orchestrate-and-start is not registered", + ) return application diff --git a/pyproject.toml b/pyproject.toml index 5dc174a..4c30255 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ dependencies = [ "sqlalchemy>=2.0.49", "ultralytics>=8.4.40", "uvicorn[standard]>=0.44.0", + "rich>=15.0.0", ] [project.scripts] diff --git a/scripts/demo_client/README.md b/scripts/demo_client/README.md index bb0c784..e1fc0d9 100644 --- a/scripts/demo_client/README.md +++ b/scripts/demo_client/README.md @@ -6,10 +6,77 @@ ``` scripts/demo_client/ - server.py # 基于 stdlib 的静态服务器;额外暴露 /labels.json - index.html # 单文件页面(原生 JS,零构建依赖) + server.py # 基于 stdlib 的静态服务器;额外暴露 /labels.json + index.html # 单文件页面(原生 JS,零构建依赖) + fake_rtsp_from_file.py # 无真摄像头时:把本地视频循环发布为 RTSP(ffmpeg + Docker MediaMTX) ``` +## 调试:无真实摄像头,用录好的视频模拟 RTSP + +监控服务**只从 RTSP URL 拉流**(`cv2.VideoCapture`),**没有**「上传视频文件」的 HTTP 接口;在不改 Python 后端的前提下,只能让「摄像头地址」指向一个**真实可连的 RTSP 源。 + +推荐做法:在**本机**把视频文件用 **ffmpeg** 推到本机上的 **RTSP 服务**(脚本用 Docker 启动 [MediaMTX](https://github.com/bluenviron/mediamtx)),得到 `rtsp://127.0.0.1:<端口>/<路径>`,再通过**环境变量**告诉后端(**只改配置,不改仓库里的后端代码**): + +**单路**(一个文件、一个 `camera_id`,兼容旧命令): + +```bash +# 依赖:ffmpeg、Docker(首次会拉取 bluenviron/mediamtx) +cd /path/to/operation-room-monitor-server +python3 scripts/demo_client/fake_rtsp_from_file.py /path/to/recording.mp4 --port 18554 --path demo +``` + +**两路**(两路不同视频、两个 `camera_id`;**一个** MediaMTX、**两路** ffmpeg;每路用不同的 `RTSP_PATH`): + +```bash +python3 scripts/demo_client/fake_rtsp_from_file.py --port 18554 \ + --stream 'or-cam-01|./a.mp4|demo1' \ + --stream 'or-cam-02|./b.mp4|demo2' +``` + +`--stream` 格式为 `CAMERA_ID|文件路径|RTSP_PATH`(竖线分隔,整条加引号),生成的 `VIDEO_RTSP_URLS_JSON` 会同时包含 `or-cam-01` 与 `or-cam-02`。 + +在**另一终端**启动监控服务前 `source` 或手动 `export` 上述变量,使 `POST /client/surgeries/start` 里使用的 `camera_ids`(如 `or-cam-01,or-cam-02`)能解析到对应 URL。Demo 页里「将 camera_id 填到开始手术」可一键同步两路 id。 + +### 监控在 Docker、假 RTSP 在宿主机(推荐联调拓扑) + +常见安排是:**假摄像头脚本**(`fake_rtsp_from_file.py` + ffmpeg + MediaMTX)在**宿主机**终端里跑,推流地址是 `rtsp://127.0.0.1:<端口>/...`;**监控 API 服务**在 **Docker 容器**里跑,容器里的进程要访问宿主机上的 RTSP,应使用: + +- **macOS / Windows Docker Desktop**:`rtsp://host.docker.internal:<端口>/<路径>` +- **Linux**:`host.docker.internal` 可能未预置,可任选其一: + - 给该服务容器加 `--add-host=host.docker.internal:host-gateway`(Docker 20.10+),或 + - 直接把 URL 写成宿主在 **docker0/桥接网** 上可达的局域网 IP(如 `192.168.x.x`),保证从容器内 `curl`/`ffprobe` 能通 + +`docker-compose` 里可将 `VIDEO_RTSP_URLS_JSON` 写进 `environment:` 或 env 文件;**不要**在仅容器可解析的配置里写 `127.0.0.1` 去指宿主机上的 RTSP(`127.0.0.1` 在容器内是容器自己)。 + +若监控与假 RTSP **都在宿主机同一系统**里直接跑(非容器),则用 `rtsp://127.0.0.1:...` 即可;否则应使用上面「容器连宿主」的写法。 + +发布失败时,可尝试把输入转码后再推流(示例,需自行调整): + +```bash +ffmpeg -re -stream_loop -1 -i recording.mp4 -c:v libx264 -pix_fmt yuv420p -f rtsp -rtsp_transport tcp rtsp://127.0.0.1:18554/demo +``` + +(仍须先自行启动 MediaMTX 或等价 RTSP 服务端。) + +Demo 页面「调试:两路视频」中可用 **选择视频** / **拖放** 为路1/路2 指定文件,并配合下面 **一键开录** 上传,无需在页面里手抄 `python3` / `export` 命令。若必须完全手跑 `fake_rtsp_from_file.py`,请在上文命令示例与 `export VIDEO_RTSP_URLS_JSON=...` 方式自行在终端完成。 + +## 一键开录(不再手抄命令) + +在 §4.1 勾选 **「一键联调」** 后,在「调试」里为**路1/路2**各选一段视频,再点 **开始手术**,浏览器会把两路视频 **multipart 上传到监控 API**(`POST /internal/demo/orchestrate-and-start`),由服务进程依次: + +1. 落盘两路视频到临时目录 +2. 用 Docker 起 MediaMTX、两路 ffmpeg 推 RTSP(与 `fake_rtsp_from_file.py` 等效) +3. 把 `{"or-cam-01":"rtsp://127.0.0.1:…","or-cam-02":"rtsp://127.0.0.1:…"}` 写入 `VIDEO_RTSP_URLS_JSON_FILE`(与开录/拉流同进程,固定本机回环;`DEMO_ORCHESTRATOR_RTSP_JSON_HOST` 仅影响你**手配**假流、给另一进程读 JSON 的用法) +4. 调用与普通开录相同逻辑 + +**需同时满足**: + +- `.env` 中 `DEMO_ORCHESTRATOR_ENABLED=true`(并重启 API) +- 已设置 `VIDEO_RTSP_URLS_JSON_FILE` 指向**可写**的 JSON 文件;Docker 中请用 **bind-mount** 到容器内同一路径 +- **运行 `main.py` 的进程**能执行本机 `docker` 与 `ffmpeg`(与手动跑 `fake_rtsp_from_file` 相同)。**仅将 API 放 Docker、且不挂载** ` /var/run/docker.sock` 时,容器内往往无法为你在宿主机起 MediaMTX,此时请继续用手动假流方式。 + +由于每次解析都会重新读取 `video_rtsp_url_map()`,覆盖 JSON 后**无需重启**主服务即可被下一次开录用到。 + ## 运行方式 ```bash @@ -35,6 +102,7 @@ open http://localhost:38081/ - §4.3 `GET /client/surgeries/{id}/result` — 以表格渲染 `details` 与 `summary` - §4.4 `GET /client/surgeries/{id}/pending-confirmation` — 支持手动拉取与 2s 自动轮询 - §4.5 `POST .../resolve` — 本地麦克风录音 → 16 kHz 单声道 WAV → `multipart/form-data` 上传 +- **调试:无摄像头** — 两路视频选择与 `camera_id`;一键联调见上文;手跑假流见 `fake_rtsp_from_file.py` 与本文「调试:无真实摄像头」 右侧「响应日志」按时间倒序展示每次请求的 method/url/status/body,便于联调截图。 diff --git a/scripts/demo_client/fake_rtsp_from_file.py b/scripts/demo_client/fake_rtsp_from_file.py new file mode 100644 index 0000000..213f58a --- /dev/null +++ b/scripts/demo_client/fake_rtsp_from_file.py @@ -0,0 +1,266 @@ +#!/usr/bin/env python3 +"""Publish local video file(s) as looping RTSP stream(s) (fake camera) for local dev. + +The Operation Room server only opens RTSP URLs (OpenCV); there is no video-upload API. +This script does NOT change the application backend: it runs ffmpeg + a small +RTSP server (MediaMTX) so you can point VIDEO_RTSP_URLS_JSON to rtsp://.../yourpath. + +Requires: + - ffmpeg in PATH + - Docker, with the image pulled: bluenviron/mediamtx (recommended), OR a local + `mediamtx` binary in PATH (advanced). + +Single stream (legacy):: + python3 scripts/demo_client/fake_rtsp_from_file.py /path/to/video.mp4 + python3 scripts/demo_client/fake_rtsp_from_file.py video.mp4 --port 18554 --path demo + +Multiple streams (one MediaMTX, one ffmpeg per camera; different RTSP path per stream):: + + python3 scripts/demo_client/fake_rtsp_from_file.py --port 18554 \\ + --stream 'or-cam-01|./a.mp4|demo1' \\ + --stream 'or-cam-02|./b.mp4|demo2' + +--stream format: ``CAMERA_ID|FILE|RTSP_PATH`` (use quotes in shell; RTSP path is +the last segment, e.g. ``demo1`` -> ``rtsp://127.0.0.1:/demo1``). +""" + +from __future__ import annotations + +import argparse +import atexit +import json +import os +import signal +import shutil +import subprocess +import sys +import time +from pathlib import Path + +MEDIAMTX_IMAGE = os.environ.get("MEDIAMTX_DOCKER_IMAGE", "bluenviron/mediamtx:latest") +CONTAINER_NAME = "orm-fake-rtsp-mediamtx" + + +def _has_docker() -> bool: + return shutil.which("docker") is not None + + +def _has_ffmpeg() -> bool: + return shutil.which("ffmpeg") is not None + + +def _stop_mediamtx_container() -> None: + if not _has_docker(): + return + try: + subprocess.run( + ["docker", "rm", "-f", CONTAINER_NAME], + capture_output=True, + check=False, + timeout=30, + ) + except (OSError, subprocess.SubprocessError): + pass + + +def _start_mediamtx_docker(host_port: int) -> bool: + _stop_mediamtx_container() + cmd = [ + "docker", "run", "-d", + "--name", CONTAINER_NAME, + "-p", f"127.0.0.1:{host_port}:8554", + MEDIAMTX_IMAGE, + ] + print("[fake-rtsp] Starting MediaMTX:", " ".join(cmd), file=sys.stderr) + try: + proc = subprocess.run(cmd, capture_output=True, text=True, timeout=120) + except (OSError, subprocess.SubprocessError) as exc: + print(f"[fake-rtsp] docker run failed: {exc}", file=sys.stderr) + return False + if proc.returncode != 0: + err = (proc.stderr or proc.stdout or "").strip() + print(f"[fake-rtsp] docker run exit {proc.returncode}: {err}", file=sys.stderr) + return False + atexit.register(_stop_mediamtx_container) + return True + + +def _parse_stream_arg(spec: str) -> tuple[str, Path, str]: + parts = spec.split("|", 2) + if len(parts) != 3: + raise ValueError( + f"Invalid --stream {spec!r}; expected CAM|FILE|RTSP_PATH (three fields separated by |)" + ) + cam = parts[0].strip() + fpath = Path(parts[1].strip()).expanduser() + rpath = parts[2].strip().strip("/") + if not cam: + raise ValueError("empty camera id in --stream") + if not rpath: + rpath = "demo" + return cam, fpath, rpath + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Loop video file(s) to RTSP URL(s) (dev fake camera; no backend code change).", + ) + parser.add_argument( + "video", + nargs="?", + type=Path, + default=None, + help="(single-stream mode) Path to a video file", + ) + parser.add_argument( + "--path", + default="demo", + help="(single-stream mode) RTSP path segment (rtsp://host:port/)", + ) + parser.add_argument( + "--port", + type=int, + default=18554, + help="Host port mapped to MediaMTX RTSP (container internal 8554). Default: 18554", + ) + parser.add_argument( + "--stream", + action="append", + default=None, + help=( + "Multi-stream mode. Repeat for each camera. " + "Format: CAM|FILE|RTSP_PATH e.g. or-cam-01|./a.mp4|demo1" + ), + ) + parser.add_argument( + "--no-docker", + action="store_true", + help="Do not start Docker; run MediaMTX yourself on the host port mapping.", + ) + args = parser.parse_args() + + if not _has_ffmpeg(): + print("ffmpeg not found in PATH. Install ffmpeg and retry.", file=sys.stderr) + return 1 + + streams: list[tuple[str, Path, str]] = [] + if args.stream: + for s in args.stream: + try: + streams.append(_parse_stream_arg(s)) + except ValueError as exc: + print(f"[fake-rtsp] {exc}", file=sys.stderr) + return 1 + elif args.video is not None: + fpath = args.video.resolve() + sp = (args.path or "demo").strip().strip("/") or "demo" + streams = [("or-cam-01", fpath, sp)] + else: + parser.error("Provide a video file (single mode) or one or more --stream CAM|FILE|RTSP_PATH") + + for cam, fpath, rpath in streams: + rp_file = fpath.resolve() + if not rp_file.is_file(): + print(f"File not found: {rp_file} (camera {cam!r})", file=sys.stderr) + return 1 + for ch in rpath: + if ch not in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_.-": + print( + f"[fake-rtsp] RTSP path segment {rpath!r} for {cam!r} should be " + r"[a-zA-Z0-9_.-] only; adjust --path/--stream", + file=sys.stderr, + ) + return 1 + + host_port: int = args.port + if not args.no_docker: + if not _has_docker(): + print("Docker not found. Use --no-docker and start MediaMTX manually.", file=sys.stderr) + return 1 + if not _start_mediamtx_docker(host_port): + return 1 + print("[fake-rtsp] MediaMTX container started. Waiting for RTSP…", file=sys.stderr) + time.sleep(1.0) + else: + print( + f"[fake-rtsp] --no-docker: ensure an RTSP server is listening for publish on port {host_port}.", + file=sys.stderr, + ) + + procs: list[subprocess.Popen] = [] + url_map: dict[str, str] = {} + + for cam, fpath, stream_path in streams: + fp = fpath.resolve() + dest_url = f"rtsp://127.0.0.1:{host_port}/{stream_path}" + url_map[cam] = dest_url + publish_cmd: list[str] = [ + "ffmpeg", + "-hide_banner", "-loglevel", "info", + "-re", + "-stream_loop", "-1", + "-i", str(fp), + "-c", "copy", + "-f", "rtsp", + "-rtsp_transport", "tcp", + dest_url, + ] + print("---", file=sys.stderr) + print(f"Publish {cam} -> {dest_url}", file=sys.stderr) + print(" " + " ".join(publish_cmd), file=sys.stderr) + p = subprocess.Popen(publish_cmd) # noqa: S603 + procs.append(p) + + j_compact = json.dumps(url_map, ensure_ascii=False, separators=(",", ":")) + print("---", file=sys.stderr) + print("RTSP mapping (set on monitoring server):", file=sys.stderr) + for k, u in url_map.items(): + print(f" {k}: {u}", file=sys.stderr) + print("", file=sys.stderr) + print("export (same machine as monitoring server, env snippet):", file=sys.stderr) + print(f" export VIDEO_RTSP_URLS_JSON='{j_compact}'", file=sys.stderr) + print("", file=sys.stderr) + print("If the server runs in Docker on Mac/Win, use host.docker.internal, e.g.:", file=sys.stderr) + for cam, u in url_map.items(): + h = u.replace("127.0.0.1", "host.docker.internal", 1) + print(f" {cam}: {h}", file=sys.stderr) + print("---", file=sys.stderr) + print("Fake RTSP running (Ctrl+C to stop; MediaMTX container removed on exit).", file=sys.stderr) + + def on_sigint(_sig: int, _frame) -> None: + for p in procs: + if p.poll() is None: + p.terminate() + _stop_mediamtx_container() + raise SystemExit(130) + + signal.signal(signal.SIGINT, on_sigint) + signal.signal(signal.SIGTERM, on_sigint) + + try: + while True: + time.sleep(0.5) + for p in procs: + if p.poll() is not None: + print( + f"[fake-rtsp] ffmpeg ended (code {p.returncode}), stopping all.", + file=sys.stderr, + ) + raise KeyboardInterrupt + except KeyboardInterrupt: + pass + finally: + for p in procs: + if p.poll() is None: + p.terminate() + try: + p.wait(timeout=5) + except subprocess.TimeoutExpired: + p.kill() + _stop_mediamtx_container() + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/demo_client/index.html b/scripts/demo_client/index.html index 73fcee4..2e3d9b2 100644 --- a/scripts/demo_client/index.html +++ b/scripts/demo_client/index.html @@ -137,6 +137,25 @@ white-space: pre-wrap; word-break: break-word; } + .log-hint { + margin-top: 6px; + padding: 6px 8px; + font-size: 11px; + line-height: 1.4; + color: #fcd34d; + background: rgba(245, 158, 11, 0.12); + border: 1px solid rgba(245, 158, 11, 0.35); + border-radius: 4px; + } + #orch-status-banner { border: 1px solid var(--border); } + .callout-ok { + background: rgba(34, 197, 94, 0.12); + border: 1px solid rgba(34, 197, 94, 0.4); + border-radius: 8px; + padding: 10px 12px; + margin: 0 0 10px; + line-height: 1.5; + } .log-time { color: var(--muted); font-size: 11px; } .badge { display: inline-block; @@ -171,6 +190,7 @@ .muted { color: var(--muted); } .err { color: var(--danger); } .ok { color: var(--accent-2); } + .warn { color: var(--warn); } .small { font-size: 12px; } .grow { flex: 1; } audio { width: 100%; margin-top: 8px; } @@ -180,6 +200,18 @@ .layout { grid-template-columns: 1fr; } .log { position: static; height: auto; max-height: 50vh; } } + pre.cmd { + background: var(--panel-2); + border: 1px solid var(--border); + border-radius: 6px; + padding: 10px 12px; + font-size: 11px; + line-height: 1.45; + overflow-x: auto; + margin: 8px 0 0; + white-space: pre-wrap; + word-break: break-all; + } @@ -187,6 +219,7 @@

Operation Room Monitor · Demo Client

+

手动触发 /client/* 5 个接口;本地麦克风录音后生成 WAV 上传语音确认接口。

@@ -200,16 +233,74 @@
+
+
+

调试:两路视频(与一键联调 / 无真摄像头)

+

+ 在路1 / 路2选好视频、§4.1 勾选「一键联调」后点「开始手术」即可;服务端会起假 RTSP 并写 VIDEO_RTSP_URLS_JSON_FILE。无法使用一键时,请按 scripts/demo_client/README.md 在宿主机手跑 + fake_rtsp_from_file.py 并配置环境变量。 +

+

两路视频(为 §4.1 一键选文件;两路 RTSP_PATH / camera_id 须与 API 配置一致,如 demo1 / demo2

+
+
+

路 1

+ + +
+ + + +
+
+
+ + +
+
+ + +
+
+
+
+

路 2

+ + +
+ + + +
+
+
+ + +
+
+ + +
+
+
+
+

+ 一键联调会直接上传你在此为路1/路2选择的文件。选文件时会把框内填成 ./文件名,仅作展示;真正上传以文件选择器为准,无需在框里改路径。 +

+
+ +
+
+

§4.1 开始手术

- +
@@ -218,8 +309,14 @@
+

+ +

- +
@@ -245,10 +342,14 @@
+
+

默认策略:Top1 置信度 < 0.9 且达语音下沿时多会入队待确认;≥ VIDEO_AUTO_CONFIRM_CONFIDENCE(默认 0.9)且标签在 candidate_consumables 内则直接记 vision,拉取待确认为 404。可在环境变量中调整 VIDEO_AUTO_CONFIRM_CONFIDENCE。确认时在「语音确认(录音)」上传 WAV 即可。

@@ -296,7 +397,7 @@ const surgeryId = () => $("surgery-id").value.trim(); const logEl = $("log"); - function addLog(method, url, status, body, { error = false } = {}) { + function addLog(method, url, status, body, { error = false, hint = "" } = {}) { const item = document.createElement("div"); item.className = "log-item"; const time = new Date().toLocaleTimeString(); @@ -318,6 +419,12 @@ catch { bodyEl.textContent = String(body); } } item.appendChild(bodyEl); + if (hint) { + const h = document.createElement("div"); + h.className = "log-hint"; + h.textContent = hint; + item.appendChild(h); + } logEl.insertBefore(item, logEl.children[1] ?? null); } @@ -345,6 +452,40 @@ return { res, body: parsed }; } + async function apiMultipart(path, formData) { + const url = baseUrl() + path; + const bu = baseUrl(); + console.info("[demo-client] orchestrate request", { baseUrl: bu, path, fullUrl: url }); + let res; + try { + res = await fetch(url, { method: "POST", body: formData }); + } catch (e) { + console.error("[demo-client] orchestrate network error", e); + const netHint = "无法连接 " + url + "。请确认「服务端 Base URL」指向监控 API(默认 :38080),且本页在 :38081 打开时勿把 Base URL 填成 demo 页自身。"; + addLog("POST (orchestrate)", url, "NETWORK", String(e), { error: true, hint: netHint }); + throw e; + } + const text = await res.text(); + let parsed; + try { parsed = text ? JSON.parse(text) : null; } catch { parsed = text; } + const err = !res.ok; + let hint = ""; + if (res.status === 404) { + hint = "HTTP 404:本路径在服务端未注册。常见原因:1) 未设 DEMO_ORCHESTRATOR_ENABLED=true 并重启主进程,POST /internal/demo/orchestrate-and-start 未挂载;2)「服务端 Base URL」填错(须指向主 API 如 http://127.0.0.1:38080,不是本 demo 静态站 :38081)。可点「GET 联调状态」或打开浏览器控制台查看 [demo-client] 日志。"; + } else if (res.status === 400 && parsed && (parsed.detail || "").toString().indexOf("VIDEO_RTSP") >= 0) { + hint = "需配置可写的 VIDEO_RTSP_URLS_JSON_FILE,且 Docker 下请 bind-mount 到容器内同路径。"; + } else if (res.status === 503) { + hint = "合成假 RTSP 或开录失败,请见响应体与主服务终端 log(demo orchestrate-and-start / ffmpeg / docker)。"; + } + if (err) { + console.error("[demo-client] orchestrate response", { status: res.status, statusText: res.statusText, body: parsed, url }); + } else { + console.info("[demo-client] orchestrate ok", { status: res.status, url }); + } + addLog("POST (orchestrate)", url, res.status, parsed, { error: err, hint }); + return { res, body: parsed }; + } + // ============================================================ // Surgery ID validation // ============================================================ @@ -427,6 +568,42 @@ }; $("btn-clear-labels").onclick = () => { tags = []; renderTags(); }; + // ============================================================ + // 联调状态(不依赖一键开关,用于诊断 404) + // ============================================================ + async function refreshOrchStatus() { + const b = $("orch-status-banner"); + const url = baseUrl() + "/internal/demo/orchestrator-status"; + try { + const res = await fetch(url); + const text = await res.text(); + let data; + try { data = text ? JSON.parse(text) : null; } catch { data = { raw: text }; } + console.info("[demo-client] GET orchestrator-status", { url, httpStatus: res.status, data }); + addLog("GET (联调状态)", url, res.status, data, { error: !res.ok }); + b.style.display = "block"; + if (!res.ok) { + b.style.background = "rgba(239, 68, 68, 0.1)"; + b.style.color = "var(--text)"; + b.textContent = "无法拉取 " + url + "(HTTP " + res.status + ")。请把「服务端 Base URL」设为主 API(如 http://127.0.0.1:38080)。"; + return; + } + const on = data.orchestrator_enabled === true; + const fset = data.video_rtsp_urls_json_file_set === true; + b.style.background = on && fset ? "rgba(34, 197, 94, 0.1)" : "rgba(245, 158, 11, 0.12)"; + b.style.color = "var(--text)"; + const fp = data.video_rtsp_urls_json_file || "(未设)"; + b.innerHTML = on + ? ("一键 POST " + (data.orchestrate_path || "/internal/demo/orchestrate-and-start") + ":" + (fset ? "已开放;RTSP 映射文件 " : "未设 ") + "" + fp + "") + : ("一键开录 未注册:请在主服务 .env 设 DEMO_ORCHESTRATOR_ENABLED=true重启。当前 " + (data.orchestrate_path || "") + " 会 404。"); + } catch (e) { + console.error("[demo-client] orchestrator-status failed", e); + b.style.display = "block"; + b.style.background = "rgba(239, 68, 68, 0.1)"; + b.textContent = "联调状态请求失败: " + e; + } + } + // ============================================================ // §health // ============================================================ @@ -435,6 +612,7 @@ $("health-status").textContent = `HTTP ${res.status}`; $("health-status").className = "small " + (res.ok ? "ok" : "err"); }; + $("btn-orch-status").onclick = () => { refreshOrchStatus(); }; // ============================================================ // §4.1 start @@ -442,6 +620,30 @@ $("btn-start").onclick = async () => { const sid = ensureSurgeryId(); if (!sid) return; + if ($("orch-oneclick") && $("orch-oneclick").checked) { + const f1 = $("debug-vfile-1").files[0]; + const f2 = $("debug-vfile-2").files[0]; + if (!f1 || !f2) { + alert("请先在上方「调试」里为 路1 / 路2 各「选择…」一个视频文件。"); + return; + } + const fd = new FormData(); + fd.append("video1", f1, f1.name); + fd.append("video2", f2, f2.name); + fd.append("surgery_id", sid); + fd.append("camera_1", ($("debug-cam-1").value || "or-cam-01").trim() || "or-cam-01"); + fd.append("camera_2", ($("debug-cam-2").value || "or-cam-02").trim() || "or-cam-02"); + fd.append("rtsp_path_1", ($("debug-rpath-1").value || "demo1").trim() || "demo1"); + fd.append("rtsp_path_2", ($("debug-rpath-2").value || "demo2").trim() || "demo2"); + fd.append("candidate_consumables_json", JSON.stringify([...tags])); + const { res, body } = await apiMultipart("/internal/demo/orchestrate-and-start", fd); + if (!res.ok) { + const detail = (body && (body.detail !== undefined)) ? body.detail : body; + const errText = (typeof detail === "object" && detail !== null) ? JSON.stringify(detail, null, 2) : String(detail || body || "错误"); + alert("一键开录失败 HTTP " + res.status + "\n\n" + errText); + } + return; + } const camera_ids = $("camera-ids").value.split(",").map(s => s.trim()).filter(Boolean); if (camera_ids.length === 0) { alert("camera_ids 至少要 1 个"); return; } await apiJson("POST", "/client/surgeries/start", { @@ -508,14 +710,111 @@ }; // ============================================================ - // §4.4 pending-confirmation + // §4.4 pending-confirmation + 可选 TTS // ============================================================ let pollTimer = null; + let lastTtsConfirmationId = null; + + function pickZhTtsVoice() { + if (!window.speechSynthesis) return null; + const vs = window.speechSynthesis.getVoices() || []; + return ( + vs.find((v) => /^zh/i.test((v.lang || "") + (v.voiceURI || ""))) || + vs.find((v) => (v.lang || "").startsWith("zh")) || + null + ); + } + + function speakTextPromise(text) { + return new Promise((resolve, reject) => { + if (!text || !window.speechSynthesis) { + resolve(); + return; + } + try { + window.speechSynthesis.cancel(); + const u = new SpeechSynthesisUtterance(text); + u.lang = "zh-CN"; + const v = pickZhTtsVoice(); + if (v) u.voice = v; + u.rate = 0.95; + u.onend = () => resolve(); + u.onerror = (ev) => reject(ev.error || new Error("tts")); + window.speechSynthesis.speak(u); + } catch (e) { + reject(e); + } + }); + } + + /** 优先 GET /prompt-audio 播放百度 MP3,失败时 speechSynthesis */ + async function playPromptTts(surgeryId, confirmationId, textFallback) { + const path = `/client/surgeries/${surgeryId}/pending-confirmation/${encodeURIComponent(confirmationId)}/prompt-audio`; + const u = baseUrl() + path; + try { + const res = await fetch(u); + if (res.ok) { + const blob = await res.blob(); + const o = URL.createObjectURL(blob); + return new Promise((resolve, reject) => { + const a = new Audio(); + a.preload = "auto"; + a.src = o; + a.onended = () => { + URL.revokeObjectURL(o); + resolve(); + }; + a.onerror = () => { + URL.revokeObjectURL(o); + reject(new Error("Audio 元素播放失败")); + }; + const p = a.play(); + if (p && typeof p.catch === "function") { + p.catch((err) => { + URL.revokeObjectURL(o); + reject(err); + }); + } + }); + } + } catch (e) { + console.warn("[demo-client] prompt-audio 不可用,回退浏览器 TTS", e); + } + return speakTextPromise((textFallback || "").trim()); + } + + if (window.speechSynthesis) { + window.speechSynthesis.addEventListener("voiceschanged", () => {}); + } + + $("surgery-id").addEventListener("input", () => { + lastTtsConfirmationId = null; + }); async function fetchPendingOnce() { const sid = surgeryId(); if (!/^\d{6}$/.test(sid)) return; - const { res, body } = await apiJson("GET", `/client/surgeries/${sid}/pending-confirmation`); + const path = `/client/surgeries/${sid}/pending-confirmation`; + const url = baseUrl() + path; + let res; + try { + res = await fetch(url); + } catch (e) { + addLog("GET", url, "NETWORK", String(e), { error: true }); + return; + } + const raw = await res.text(); + let body; + try { + body = raw ? JSON.parse(raw) : null; + } catch { + body = raw; + } + if (res.status === 404) { + // 无待确认为常态,不写入右侧「响应日志」,减少刷屏 + } else { + addLog("GET", url, res.status, body); + } const box = $("pending-render"); if (res.status === 200 && body && body.confirmation_id) { box.hidden = false; @@ -528,6 +827,12 @@
prompt_text: ${body.prompt_text || ""}
Top1: ${body.model_top1_label} (${(body.model_top1_confidence * 100).toFixed(1)}%)
options:${opts || '
(无)
'}
`; + const pt = (body.prompt_text || "").trim(); + const ttsOn = $("tts-pending") && $("tts-pending").checked; + if (ttsOn && pt && body.confirmation_id !== lastTtsConfirmationId) { + lastTtsConfirmationId = body.confirmation_id; + void playPromptTts(sid, body.confirmation_id, pt).catch((e) => console.warn(e)); + } } else if (res.status === 404) { box.hidden = false; box.innerHTML = '暂无待确认项。'; @@ -538,16 +843,20 @@ } $("btn-pending").onclick = fetchPendingOnce; - $("auto-poll").onchange = (e) => { + function applyAutoPoll() { if (pollTimer) { clearInterval(pollTimer); pollTimer = null; } - if (e.target.checked) { + if ($("auto-poll") && $("auto-poll").checked) { $("voice-status").textContent = "自动轮询中…"; - pollTimer = setInterval(fetchPendingOnce, 2000); + pollTimer = setInterval(fetchPendingOnce, 10000); fetchPendingOnce(); } else { $("voice-status").textContent = ""; } - }; + } + $("auto-poll").onchange = applyAutoPoll; + if ($("auto-poll") && $("auto-poll").checked) { + applyAutoPoll(); + } // ============================================================ // §4.5 Recording (mic → WAV 16kHz mono PCM) @@ -706,12 +1015,91 @@ let parsed; try { parsed = text ? JSON.parse(text) : null; } catch { parsed = text; } addLog("POST (multipart)", url, res.status, parsed); + if (res.ok) { + recordingWav = null; + $("btn-resolve").disabled = true; + $("audio-preview").hidden = true; + $("btn-download").style.display = "none"; + lastTtsConfirmationId = null; + $("rec-info").textContent = "已提交,正在拉取下一条待确认…"; + $("rec-info").className = "ok small"; + await fetchPendingOnce(); + if ($("auto-poll") && $("auto-poll").checked) { + $("voice-status").textContent = "自动轮询中…"; + } + } else if (res.status === 422 && parsed && parsed.detail && typeof parsed.detail === "object") { + const d = parsed.detail; + if (d.message) { + let line = "解析未通过:" + d.message; + if (typeof d.retry_remaining === "number") { + line += "(retry_remaining=" + d.retry_remaining + ")"; + } + $("rec-info").textContent = line; + $("rec-info").className = "warn small"; + } + } }; + // ============================================================ + // Debug: two streams for one-click upload (路1/路2) + // ============================================================ + $("btn-dbg-pick-1").onclick = () => $("debug-vfile-1").click(); + $("debug-vfile-1").addEventListener("change", (e) => { + const f = e.target.files && e.target.files[0]; + if (!f) return; + $("debug-vpath-1").value = "./" + f.name; + $("debug-hint-1").textContent = "已选: " + f.name; + }); + $("btn-dbg-pick-2").onclick = () => $("debug-vfile-2").click(); + $("debug-vfile-2").addEventListener("change", (e) => { + const f = e.target.files && e.target.files[0]; + if (!f) return; + $("debug-vpath-2").value = "./" + f.name; + $("debug-hint-2").textContent = "已选: " + f.name; + }); + + $("btn-debug-apply-cams").onclick = () => { + const a = ($("debug-cam-1").value || "or-cam-01").trim() || "or-cam-01"; + const b = ($("debug-cam-2").value || "or-cam-02").trim() || "or-cam-02"; + $("camera-ids").value = a + "," + b; + }; + + (function setupDebugVideoDrop() { + function bindStreamCard(el, vpathId, hintId) { + if (!el) return; + el.addEventListener("dragover", (ev) => { + ev.preventDefault(); + el.style.outline = "1px dashed var(--accent)"; + }); + el.addEventListener("dragleave", () => { + el.style.outline = ""; + }); + el.addEventListener("drop", (ev) => { + ev.preventDefault(); + el.style.outline = ""; + const f = ev.dataTransfer && ev.dataTransfer.files && ev.dataTransfer.files[0]; + const looksVideo = + f && + (/^video\//.test(f.type || "") || + /\.(mp4|mov|mkv|avi|webm|m4v|mpeg|mpg)$/i.test(f.name || "")); + if (!looksVideo) { + $(hintId).textContent = "请拖入视频文件"; + return; + } + $(vpathId).value = "./" + f.name; + $(hintId).textContent = "已选: " + f.name + "(拖放)"; + }); + } + bindStreamCard($("debug-stream-1"), "debug-vpath-1", "debug-hint-1"); + bindStreamCard($("debug-stream-2"), "debug-vpath-2", "debug-hint-2"); + })(); + // ============================================================ // Boot // ============================================================ loadLabels(); + $("base-url").addEventListener("change", () => { refreshOrchStatus(); }); + refreshOrchStatus(); diff --git a/scripts/start_fresh.py b/scripts/start_fresh.py new file mode 100644 index 0000000..e28062d --- /dev/null +++ b/scripts/start_fresh.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +"""清空本应用写入的 PostgreSQL 业务表(开发用;表结构保留)。 + +直接执行即可:``uv run python scripts/start_fresh.py`` + +``./start_fresh.sh`` 与 ``./start.sh`` 一致,仅在启动 uvicorn 前多执行本脚本。 +""" + +from __future__ import annotations + +import asyncio +import os +import sys + +# 允许从任意 cwd 以 `uv run python scripts/start_fresh.py` 运行 +_REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +if _REPO_ROOT not in sys.path: + sys.path.insert(0, _REPO_ROOT) + +from sqlalchemy import text + +from app.config import settings +from app.database import engine, init_db_schema + + +# 与 app/db/models.py 一致;有 FK 时子表排前面 +_TABLES = ( + "surgery_result_details", + "surgery_final_results", + "voice_confirmation_audits", +) + +_TRUNCATE_SQL = text( + "TRUNCATE TABLE " + + ", ".join(_TABLES) + + " RESTART IDENTITY" +) + + +async def _run() -> None: + # 确保新库也有表 + await init_db_schema() + async with engine.begin() as conn: + await conn.execute(_TRUNCATE_SQL) + dsn = settings.sqlalchemy_database_url + safe = dsn + if "@" in dsn: + # 隐藏 user:pass + at = dsn.rfind("@") + if "://" in dsn: + parts = dsn.split("://", 1) + safe = f"{parts[0]}://***@{dsn[at + 1:]}" + print("已清空表:", ", ".join(_TABLES)) + print("数据库:", safe) + + +def main() -> None: + asyncio.run(_run()) + print("完成。") + + +if __name__ == "__main__": + main() diff --git a/start.sh b/start.sh index abbc278..29af451 100755 --- a/start.sh +++ b/start.sh @@ -1,7 +1,8 @@ #!/usr/bin/env bash -# Start PostgreSQL from docker-compose.dev.yml and run the FastAPI app on the host. +# Start PostgreSQL + MinIO from docker-compose.dev.yml and run FastAPI on the host. # Usage: ./start.sh # Optional: SKIP_DOCKER=1 to skip Compose and use an existing PostgreSQL instance. +# Same flow but TRUNCATE app tables before the server: ./start_fresh.sh set -euo pipefail @@ -11,7 +12,9 @@ cd "$ROOT" COMPOSE_FILE="${COMPOSE_FILE:-docker-compose.dev.yml}" if [[ "${SKIP_DOCKER:-0}" != "1" ]]; then - docker compose -f "$COMPOSE_FILE" up -d db + echo "Starting Docker Compose services: db, minio ($COMPOSE_FILE)" + docker compose -f "$COMPOSE_FILE" up -d db minio + echo "MinIO API: http://127.0.0.1:${MINIO_PORT:-9000} console: http://127.0.0.1:${MINIO_CONSOLE_PORT:-9001}" echo "Waiting for PostgreSQL..." for _ in $(seq 1 60); do if docker compose -f "$COMPOSE_FILE" exec -T db \ diff --git a/start_fresh.sh b/start_fresh.sh new file mode 100755 index 0000000..38ca9b1 --- /dev/null +++ b/start_fresh.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +# 与 start.sh 相同,唯一额外步骤:在启动 API 前清空本应用业务表(见 scripts/start_fresh.py)。 +# 用法与 start 一致: SKIP_DOCKER=1、COMPOSE_FILE、PORT 等,参见 start.sh +# +# 仅清空库、不启动服务: uv run python scripts/start_fresh.py + +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$ROOT" + +COMPOSE_FILE="${COMPOSE_FILE:-docker-compose.dev.yml}" + +if [[ "${SKIP_DOCKER:-0}" != "1" ]]; then + echo "Starting Docker Compose services: db, minio ($COMPOSE_FILE)" + docker compose -f "$COMPOSE_FILE" up -d db minio + echo "MinIO API: http://127.0.0.1:${MINIO_PORT:-9000} console: http://127.0.0.1:${MINIO_CONSOLE_PORT:-9001}" + echo "Waiting for PostgreSQL..." + for _ in $(seq 1 60); do + if docker compose -f "$COMPOSE_FILE" exec -T db \ + pg_isready -U "${POSTGRES_USER:-postgres}" -d "${POSTGRES_DB:-operation_room}" \ + >/dev/null 2>&1; then + echo "PostgreSQL is ready." + break + fi + sleep 1 + done +else + echo "SKIP_DOCKER=1: not starting Docker Compose; using POSTGRES_* or DATABASE_URL from the environment." +fi + +export POSTGRES_USER="${POSTGRES_USER:-postgres}" +export POSTGRES_PASSWORD="${POSTGRES_PASSWORD:-postgres}" +export POSTGRES_DB="${POSTGRES_DB:-operation_room}" +export POSTGRES_HOST="${POSTGRES_HOST:-localhost}" +export POSTGRES_PORT="${POSTGRES_PORT:-35432}" + +echo "start_fresh: clearing app tables (TRUNCATE)..." +uv run python scripts/start_fresh.py + +exec uv run uvicorn main:app --host "${HOST:-0.0.0.0}" --port "${PORT:-38080}" --reload diff --git a/tests/test_api_contract.py b/tests/test_api_contract.py index 62f9628..f1ef8c3 100644 --- a/tests/test_api_contract.py +++ b/tests/test_api_contract.py @@ -193,6 +193,60 @@ def test_resolve_non_wav_422(api_app: FastAPI) -> None: assert r.status_code == 422 +def test_prompt_audio_200(api_app: FastAPI) -> None: + pipeline = MagicMock() + pipeline.get_pending_prompt_audio_mp3 = AsyncMock(return_value=b"\xff\xfb\x90") + api_app.dependency_overrides[get_surgery_pipeline] = lambda: pipeline + client = TestClient(api_app) + r = client.get("/client/surgeries/123456/pending-confirmation/cid1/prompt-audio") + assert r.status_code == 200 + assert r.content == b"\xff\xfb\x90" + assert "mpeg" in (r.headers.get("content-type") or "") + pipeline.get_pending_prompt_audio_mp3.assert_awaited_once_with( + surgery_id="123456", + confirmation_id="cid1", + ) + + +def test_resolve_text_200(api_app: FastAPI) -> None: + pipeline = MagicMock() + pipeline.resolve_pending_confirmation_from_client_text = AsyncMock( + return_value=VoiceResolveResult( + resolved_label="纱布", + rejected=False, + asr_text="第一个", + audio_object_key=None, + message="ok", + ) + ) + api_app.dependency_overrides[get_surgery_pipeline] = lambda: pipeline + client = TestClient(api_app) + r = client.post( + "/client/surgeries/123456/pending-confirmation/cid/resolve-text", + json={"recognized_text": "第一个"}, + ) + assert r.status_code == 200 + body = r.json() + assert body["resolved_label"] == "纱布" + assert body["asr_text"] == "第一个" + pipeline.resolve_pending_confirmation_from_client_text.assert_awaited_once() + + +def test_resolve_text_maps_surgery_error(api_app: FastAPI) -> None: + pipeline = MagicMock() + pipeline.resolve_pending_confirmation_from_client_text = AsyncMock( + side_effect=SurgeryPipelineError("VOICE_PARSE_FAILED", "无法匹配") + ) + api_app.dependency_overrides[get_surgery_pipeline] = lambda: pipeline + client = TestClient(api_app) + r = client.post( + "/client/surgeries/123456/pending-confirmation/cid/resolve-text", + json={"recognized_text": "随便说说"}, + ) + assert r.status_code == 422 + assert r.json()["detail"]["code"] == "VOICE_PARSE_FAILED" + + def test_resolve_200(api_app: FastAPI) -> None: pipeline = MagicMock() pipeline.resolve_pending_confirmation_from_audio = AsyncMock( @@ -255,3 +309,22 @@ def test_internal_voice_status_404_and_200(api_app: FastAPI) -> None: r2 = client2.get("/internal/surgeries/123456/voice-status") assert r2.status_code == 200 assert r2.json()["pending_queue_approx"] == 2 + + +def test_internal_voice_audits_200_empty(api_app: FastAPI) -> None: + pipeline = MagicMock() + pipeline.list_voice_audits = AsyncMock(return_value=([], 0)) + api_app.dependency_overrides[get_surgery_pipeline] = lambda: pipeline + client = TestClient(api_app) + r = client.get( + "/internal/surgeries/123456/voice-audits", + params={"limit": 1, "offset": 0}, + ) + assert r.status_code == 200 + j = r.json() + assert j["surgery_id"] == "123456" + assert j["total"] == 0 + assert j["limit"] == 1 + assert j["offset"] == 0 + assert j["items"] == [] + pipeline.list_voice_audits.assert_awaited_once_with("123456", limit=1, offset=0) diff --git a/tests/test_consumption_tsv_log.py b/tests/test_consumption_tsv_log.py new file mode 100644 index 0000000..8a8ce2d --- /dev/null +++ b/tests/test_consumption_tsv_log.py @@ -0,0 +1,123 @@ +"""consumption_log.txt 兼容 TSV 格式。""" + +import pytest + +from app.config import settings +from app.services.consumable_vision_algorithm import ClsTop3 +from app.services.consumption_tsv_log import ( + HEADER, + _RANGE_SEP, + append_consumption_tsv_line, + build_consumption_markdown, + build_tsv_line, + init_consumption_log_file, + short_camera_label, +) + + +def test_short_camera_label() -> None: + assert short_camera_label("or-cam-01") == "cam01" + assert short_camera_label("or-cam-2") == "cam02" + + +def test_build_tsv_line_matches_sample_shape(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(settings, "consumption_log_timezone", "UTC") + best = ClsTop3( + t1_name="一次性医用灭菌棉签", + t1_conf=0.9997, + t2_name="cls2", + t2_conf=0.0003, + t3_name="cls3", + t3_conf=0.0002, + t1_pid="2237844", + t2_pid="11765-1-101", + t3_pid="21504-1-1", + ) + # 墙钟:拉流起点对齐到 2024-01-01T00:00:00Z,时间窗 +0s…+45s + w0 = 1704067200.0 + line = build_tsv_line( + name_to_code={}, + best=best, + doctor_id="DOCTOR_PLACEHOLDER", + camera_id="or-cam-01", + wall_start_epoch=w0, + wall_end_epoch=w0 + 45.0, + ) + parts = line.rstrip("\n").split("\t") + assert len(parts) == 7 + assert parts[0] == "2237844" + assert parts[1] == "一次性医用灭菌棉签 0.9997" + assert parts[2] == "cls2" + assert parts[3] == "cls3" + assert parts[4] == "1" + assert parts[5] == "DOCTOR_PLACEHOLDER" + assert ( + parts[6] + == "cam01@2024-01-01T00:00:00.000+00:00" + + _RANGE_SEP + + "2024-01-01T00:00:45.000+00:00" + ) + + +def test_header_columns() -> None: + cols = HEADER.strip().split("\t") + assert cols[0] == "物品id" + assert cols[-1] == "时间戳" + + +def test_per_surgery_file_init_and_append( + tmp_path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(settings, "consumption_tsv_log_enabled", True) + monkeypatch.setattr( + settings, + "consumption_tsv_log_path", + str(tmp_path / "{surgery_id}.txt"), + ) + init_consumption_log_file("or-001") + append_consumption_tsv_line("or-001", "row1\n") + append_consumption_tsv_line("or-001", "row2\n") + p = tmp_path / "or-001.txt" + assert p.read_text(encoding="utf-8") == HEADER + "row1\n" + "row2\n" + init_consumption_log_file("or-001") + assert p.read_text(encoding="utf-8") == HEADER + + +def test_build_consumption_markdown_top123_columns(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(settings, "consumption_log_timezone", "UTC") + best = ClsTop3( + t1_name="一次性医用灭菌棉签", + t1_conf=0.9997, + t2_name="cls2", + t2_conf=0.0003, + t3_name="cls3", + t3_conf=0.0002, + t1_pid="2237844", + t2_pid="11765-1-101", + t3_pid="21504-1-1", + ) + w0 = 1704067200.0 + md = build_consumption_markdown( + name_to_code={}, + best=best, + doctor_id="DOCTOR_PLACEHOLDER", + camera_id="or-cam-01", + wall_start_epoch=w0, + wall_end_epoch=w0 + 45.0, + ) + assert "Top1 物品id" in md and "Top1 物品名称" in md and "Top1 置信度" in md + assert "Top2 物品名称" in md and "Top3 物品名称" in md + assert "Top2 物品id" not in md + assert "2237844" in md + assert "一次性医用灭菌棉签" in md + assert "0.9997" in md + assert "cls2" in md and "cls3" in md + assert "11765-1-101" not in md and "21504-1-1" not in md + assert "0.0003" not in md and "0.0002" not in md + assert "DOCTOR_PLACEHOLDER" in md + assert "| 1 |" in md + # 终端为可读时间戳,非落盘用 ISO@cam + assert "2024-01-01 00:00:00.000" in md and "2024-01-01 00:00:45.000" in md + assert "cam01" in md and " · " in md and _RANGE_SEP in md + assert "cam01@2024-01" not in md diff --git a/tests/test_session_manager_unit.py b/tests/test_session_manager_unit.py index 152bb91..d6312cc 100644 --- a/tests/test_session_manager_unit.py +++ b/tests/test_session_manager_unit.py @@ -156,9 +156,7 @@ async def test_handle_skips_when_candidate_list_empty() -> None: confidence=0.99, topk=[PredictionCandidate(label="纱布", confidence=0.99)], ) - await mgr._handle_classification_result( - state=state, cls_res=res - ) + await mgr._handle_classification_result(state=state, cls_res=res) assert state.details == [] assert state.pending_fifo == [] @@ -199,9 +197,7 @@ async def test_handle_skips_below_voice_floor() -> None: confidence=0.4, topk=[PredictionCandidate(label="纱布", confidence=0.4)], ) - await mgr._handle_classification_result( - state=state, cls_res=res - ) + await mgr._handle_classification_result(state=state, cls_res=res) assert state.details == [] assert state.pending_fifo == [] @@ -221,9 +217,7 @@ async def test_handle_auto_vision_confirm() -> None: confidence=0.99, topk=[PredictionCandidate(label="纱布", confidence=0.99)], ) - await mgr._handle_classification_result( - state=state, cls_res=res - ) + await mgr._handle_classification_result(state=state, cls_res=res) assert len(state.details) == 1 assert state.details[0].source == "vision" assert state.details[0].item_id == "纱布" @@ -247,9 +241,7 @@ async def test_handle_high_conf_top1_not_in_candidates_enqueues_pending() -> Non PredictionCandidate(label="缝线", confidence=0.2), ], ) - await mgr._handle_classification_result( - state=state, cls_res=res - ) + await mgr._handle_classification_result(state=state, cls_res=res) assert state.details == [] assert len(state.pending_fifo) == 1 pid = state.pending_fifo[0] @@ -276,9 +268,7 @@ async def test_handle_mid_confidence_enqueues_pending() -> None: PredictionCandidate(label="缝线", confidence=0.3), ], ) - await mgr._handle_classification_result( - state=state, cls_res=res - ) + await mgr._handle_classification_result(state=state, cls_res=res) assert len(state.pending_fifo) == 1 @@ -299,9 +289,7 @@ async def test_handle_voice_disabled_no_pending_for_mid_conf() -> None: confidence=0.5, topk=[PredictionCandidate(label="纱布", confidence=0.5)], ) - await mgr._handle_classification_result( - state=state, cls_res=res - ) + await mgr._handle_classification_result(state=state, cls_res=res) assert state.pending_fifo == [] assert state.details == [] @@ -322,12 +310,8 @@ async def test_handle_vision_cooldown_skips_duplicate() -> None: confidence=0.99, topk=[PredictionCandidate(label="纱布", confidence=0.99)], ) - await mgr._handle_classification_result( - state=state, cls_res=res - ) - await mgr._handle_classification_result( - state=state, cls_res=res - ) + await mgr._handle_classification_result(state=state, cls_res=res) + await mgr._handle_classification_result(state=state, cls_res=res) assert len(state.details) == 1 @@ -350,12 +334,8 @@ async def test_handle_pending_dedupe_cooldown() -> None: PredictionCandidate(label="缝线", confidence=0.2), ], ) - await mgr._handle_classification_result( - state=state, cls_res=res - ) - await mgr._handle_classification_result( - state=state, cls_res=res - ) + await mgr._handle_classification_result(state=state, cls_res=res) + await mgr._handle_classification_result(state=state, cls_res=res) assert len(state.pending_fifo) == 1 diff --git a/tests/test_voice_audit_repository.py b/tests/test_voice_audit_repository.py index c8b731f..493b883 100644 --- a/tests/test_voice_audit_repository.py +++ b/tests/test_voice_audit_repository.py @@ -1,5 +1,6 @@ from __future__ import annotations +import asyncio import json import pytest @@ -55,3 +56,50 @@ async def test_save_audit_persists_fields(db_session: AsyncSession) -> None: assert r.resolved_label == "纱布" assert r.options_snapshot_json == opts assert r.error_message is None + + +@pytest.mark.asyncio +async def test_list_by_surgery_order_and_total(db_session: AsyncSession) -> None: + repo = VoiceAuditRepository() + async with db_session.begin(): + await repo.save_audit( + db_session, + surgery_id="111111", + confirmation_id="a", + status="parse_failed", + audio_object_key=None, + audio_content_type=None, + audio_size_bytes=None, + audio_sha256=None, + asr_text="糊", + resolved_label=None, + options_snapshot_json="[]", + error_message="x", + ) + await asyncio.sleep(0.02) + async with db_session.begin(): + await repo.save_audit( + db_session, + surgery_id="111111", + confirmation_id="b", + status="recognized", + audio_object_key="k.wav", + audio_content_type="audio/wav", + audio_size_bytes=10, + audio_sha256="b" * 64, + asr_text="纱布", + resolved_label="纱布", + options_snapshot_json="[]", + error_message=None, + ) + async with db_session.begin(): + rows, total = await repo.list_by_surgery(db_session, "111111", limit=10, offset=0) + assert total == 2 + assert [r.confirmation_id for r in rows] == ["b", "a"] + async with db_session.begin(): + page2, total2 = await repo.list_by_surgery( + db_session, "111111", limit=1, offset=1 + ) + assert total2 == 2 + assert len(page2) == 1 + assert page2[0].confirmation_id == "a" diff --git a/tests/test_voice_confirm.py b/tests/test_voice_confirm.py index e882f91..98eca5b 100644 --- a/tests/test_voice_confirm.py +++ b/tests/test_voice_confirm.py @@ -13,6 +13,21 @@ def test_parse_voice_choice_numeric() -> None: assert parse_voice_choice("第2个", ["纱布", "缝线", "钳子"]) == "缝线" +def test_parse_voice_choice_ordinal_chinese() -> None: + opts = ["纱布", "缝线", "钳子"] + assert parse_voice_choice("第一个", opts) == "纱布" + assert parse_voice_choice("第一个。", opts) == "纱布" + assert parse_voice_choice("第2个", opts) == "缝线" + assert parse_voice_choice("第二", opts) == "缝线" + assert parse_voice_choice("选3", opts) == "钳子" + assert parse_voice_choice("选项2", ["纱布", "缝线"]) == "缝线" + + +def test_parse_voice_choice_single_chinese_digit_with_few_options() -> None: + assert parse_voice_choice("一", ["纱布", "缝线"]) == "纱布" + assert parse_voice_choice("两", ["纱布", "缝线"]) == "缝线" + + def test_parse_voice_choice_negative() -> None: assert parse_voice_choice("不是", ["纱布", "缝线"]) is None diff --git a/tests/test_voice_file_log.py b/tests/test_voice_file_log.py new file mode 100644 index 0000000..c249a4e --- /dev/null +++ b/tests/test_voice_file_log.py @@ -0,0 +1,62 @@ +"""Tests for voice TSV + emit_voice_event (path + TSV line).""" + +from __future__ import annotations + +import tempfile +from pathlib import Path + +from app.config import Settings +from app.services.voice_file_log import ( + append_voice_tsv_line, + emit_voice_event, + init_voice_log_file, + resolved_voice_log_path, +) + + +def test_resolved_voice_log_path_replaces_surgery_id() -> None: + s = Settings() + s.voice_file_log_path = "logs/voice_{surgery_id}.txt" + p = resolved_voice_log_path("123456", s) + assert p.name == "voice_123456.txt" + assert "logs" in str(p) + + +def test_init_and_append_tsv() -> None: + with tempfile.TemporaryDirectory() as d: + base = Path(d) + s = Settings() + s.voice_file_log_enabled = True + s.voice_file_log_path = str((base / "v_{surgery_id}.txt").resolve()) + init_voice_log_file("999999", s) + p = resolved_voice_log_path("999999", s) + assert p.exists() + h = p.read_text(encoding="utf-8") + assert "来源" in h and "confirmation_id" in h + line = "ts\ttest\trecognized\tcid1\t同\t品\tfalse\t\tk.wav\n" + append_voice_tsv_line("999999", line, s) + assert p.read_text(encoding="utf-8").endswith(line) + + +def test_emit_voice_event_writes_when_enabled() -> None: + s = Settings() + s.voice_file_log_enabled = True + with tempfile.TemporaryDirectory() as d: + s.voice_file_log_path = str((Path(d) / "v_{surgery_id}.txt").resolve()) + init_voice_log_file("111111", s) + emit_voice_event( + s, + surgery_id="111111", + source="wav", + status="recognized", + confirmation_id="c1", + asr_text="纱布", + resolved_label="纱布", + rejected=False, + audio_object_key="k.wav", + ) + p = resolved_voice_log_path("111111", s) + body = p.read_text(encoding="utf-8") + assert "纱布" in body + assert "recognized" in body + assert "k.wav" in body diff --git a/uv.lock b/uv.lock index 033a8e7..73c7dc2 100644 --- a/uv.lock +++ b/uv.lock @@ -636,6 +636,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0c/29/0348de65b8cc732daa3e33e67806420b2ae89bdce2b04af740289c5c6c8c/loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c", size = 61595, upload-time = "2024-12-06T11:20:54.538Z" }, ] +[[package]] +name = "markdown-it-py" +version = "4.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mdurl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" }, +] + [[package]] name = "markupsafe" version = "3.0.3" @@ -706,6 +718,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5d/49/d651878698a0b67f23aa28e17f45a6d6dd3d3f933fa29087fa4ce5947b5a/matplotlib-3.10.8-cp314-cp314t-win_arm64.whl", hash = "sha256:113bb52413ea508ce954a02c10ffd0d565f9c3bc7f2eddc27dfe1731e71c7b5f", size = 8192560, upload-time = "2025-12-10T22:56:38.008Z" }, ] +[[package]] +name = "mdurl" +version = "0.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, +] + [[package]] name = "minio" version = "7.2.20" @@ -836,6 +857,7 @@ dependencies = [ { name = "pillow" }, { name = "pydantic-settings" }, { name = "python-multipart" }, + { name = "rich" }, { name = "sqlalchemy" }, { name = "ultralytics" }, { name = "uvicorn", extra = ["standard"] }, @@ -862,6 +884,7 @@ requires-dist = [ { name = "pillow", specifier = ">=12.2.0" }, { name = "pydantic-settings", specifier = ">=2.13.1" }, { name = "python-multipart", specifier = ">=0.0.26" }, + { name = "rich", specifier = ">=15.0.0" }, { name = "sqlalchemy", specifier = ">=2.0.49" }, { name = "ultralytics", specifier = ">=8.4.40" }, { name = "uvicorn", extras = ["standard"], specifier = ">=0.44.0" }, @@ -1258,6 +1281,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d7/8e/7540e8a2036f79a125c1d2ebadf69ed7901608859186c856fa0388ef4197/requests-2.33.1-py3-none-any.whl", hash = "sha256:4e6d1ef462f3626a1f0a0a9c42dd93c63bad33f9f1c1937509b8c5c8718ab56a", size = 64947, upload-time = "2026-03-30T16:09:13.83Z" }, ] +[[package]] +name = "rich" +version = "15.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown-it-py" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c0/8f/0722ca900cc807c13a6a0c696dacf35430f72e0ec571c4275d2371fca3e9/rich-15.0.0.tar.gz", hash = "sha256:edd07a4824c6b40189fb7ac9bc4c52536e9780fbbfbddf6f1e2502c31b068c36", size = 230680, upload-time = "2026-04-12T08:24:00.75Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/82/3b/64d4899d73f91ba49a8c18a8ff3f0ea8f1c1d75481760df8c68ef5235bf5/rich-15.0.0-py3-none-any.whl", hash = "sha256:33bd4ef74232fb73fe9279a257718407f169c09b78a87ad3d296f548e27de0bb", size = 310654, upload-time = "2026-04-12T08:24:02.83Z" }, +] + [[package]] name = "scipy" version = "1.17.1"