feat: 语音确认、联调与运维增强

- 语音：序数解析（第一个/第二个等）、解析失败计数与 API detail.retry_remaining；百度 ASR 固定 dev_pid 为普通话；SurgeryPipelineError 支持 extra 并入 HTTP detail。 - Demo：demo 路由与假 RTSP、客户端 index 与 README；BackendResolver 与配置调整。 - 可观测：消耗 TSV 日志、语音文件日志、终端 Markdown 辅助；相关测试与依赖更新。 - 注意：.env 仍被 gitignore，本地密钥不会进入本提交。 Made-with: Cursor
2026-04-23 14:24:20 +08:00
parent 42720f81cf
commit 0c05463617
39 changed files with 3030 additions and 143 deletions
--- a/app/api.py
+++ b/app/api.py
@@ -2,7 +2,8 @@ import asyncio
 from collections.abc import Awaitable, Callable
 from typing import Annotated

-from fastapi import APIRouter, Depends, File, HTTPException, Path, UploadFile, status
+from fastapi import APIRouter, Depends, File, HTTPException, Path, Query, UploadFile, status
+from fastapi.responses import Response
 from fastapi.responses import JSONResponse
 from loguru import logger
 from sqlalchemy.exc import SQLAlchemyError
@@ -17,8 +18,11 @@ from app.schemas import (
    SurgeryEndRequest,
    SurgeryPendingConfirmationResolveResponse,
    SurgeryPendingConfirmationResponse,
+    SurgeryPendingResolveTextRequest,
    SurgeryResultResponse,
    SurgeryStartRequest,
+    SurgeryVoiceAuditItem,
+    SurgeryVoiceAuditsListResponse,
    SurgeryVoiceStatusResponse,
    build_consumption_summary,
 )
@@ -28,14 +32,21 @@ from app.surgery_errors import SurgeryPipelineError
 router = APIRouter()


+def _pipeline_error_detail(exc: SurgeryPipelineError, surgery_id: str) -> dict:
+    d: dict = {
+        "code": exc.code,
+        "message": exc.message,
+        "surgery_id": surgery_id,
+    }
+    if exc.extra:
+        d.update(exc.extra)
+    return d
+
+
 def _raise_surgery_pipeline_http(exc: SurgeryPipelineError, surgery_id: str) -> None:
    raise HTTPException(
        status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
-        detail={
-            "code": exc.code,
-            "message": exc.message,
-            "surgery_id": surgery_id,
-        },
+        detail=_pipeline_error_detail(exc, surgery_id),
    ) from exc


@@ -47,6 +58,9 @@ def _raise_confirmation_http(exc: SurgeryPipelineError, surgery_id: str) -> None
        "CONFIRMATION_INVALID": status.HTTP_422_UNPROCESSABLE_CONTENT,
        "VOICE_ASR_FAILED": status.HTTP_422_UNPROCESSABLE_CONTENT,
        "VOICE_PARSE_FAILED": status.HTTP_422_UNPROCESSABLE_CONTENT,
+        "VOICE_TEXT_EMPTY": status.HTTP_422_UNPROCESSABLE_CONTENT,
+        "TTS_TEXT_EMPTY": status.HTTP_422_UNPROCESSABLE_CONTENT,
+        "TTS_ERROR": status.HTTP_503_SERVICE_UNAVAILABLE,
        "VOICE_AUDIO_INVALID": status.HTTP_422_UNPROCESSABLE_CONTENT,
        "MINIO_NOT_CONFIGURED": status.HTTP_503_SERVICE_UNAVAILABLE,
        "MINIO_UPLOAD_FAILED": status.HTTP_503_SERVICE_UNAVAILABLE,
@@ -55,11 +69,7 @@ def _raise_confirmation_http(exc: SurgeryPipelineError, surgery_id: str) -> None
    st = status_map.get(exc.code, status.HTTP_500_INTERNAL_SERVER_ERROR)
    raise HTTPException(
        status_code=st,
-        detail={
-            "code": exc.code,
-            "message": exc.message,
-            "surgery_id": surgery_id,
-        },
+        detail=_pipeline_error_detail(exc, surgery_id),
    ) from exc


@@ -110,6 +120,25 @@ async def health() -> HealthResponse | JSONResponse:
    return HealthResponse(status="ok", database="connected")


+@router.get(
+    "/internal/demo/orchestrator-status",
+    tags=["demo"],
+    summary="一键联调接口是否可用",
+    description="供 demo 页探测：是否启用 orchestrator、RTSP 文件配置等；此路由始终存在，不依赖 DEMO_ORCHESTRATOR_ENABLED。",
+)
+async def demo_orchestrator_status() -> dict:
+    f = (settings.video_rtsp_urls_json_file or "").strip()
+    return {
+        "orchestrator_enabled": bool(settings.demo_orchestrator_enabled),
+        "orchestrate_method": "POST",
+        "orchestrate_path": "/internal/demo/orchestrate-and-start",
+        "video_rtsp_urls_json_file_set": bool(f),
+        "video_rtsp_urls_json_file": f or None,
+        "orchestrator_rtsp_port": settings.demo_orchestrator_rtsp_port,
+        "orchestrator_rtsp_json_host": settings.demo_orchestrator_rtsp_json_host,
+    }
+
+
@router.post(
    "/client/surgeries/start",
    response_model=SurgeryApiResponse,
@@ -380,6 +409,94 @@ async def resolve_pending_consumable_confirmation(
    )


+@router.post(
+    "/client/surgeries/{surgery_id}/pending-confirmation/{confirmation_id}/resolve-text",
+    response_model=SurgeryPendingConfirmationResolveResponse,
+    responses={
+        status.HTTP_404_NOT_FOUND: {"model": SurgeryClientErrorResponse},
+        status.HTTP_409_CONFLICT: {"model": SurgeryClientErrorResponse},
+        status.HTTP_422_UNPROCESSABLE_CONTENT: {"model": SurgeryClientErrorResponse},
+    },
+    tags=["client"],
+    summary="提交客户端语音识别文本以确认耗材",
+    description=(
+        "由浏览器 Web Speech 等本机 STT 得到的文本，不做 MinIO/百度 ASR；"
+        "候选项解析与上传 WAV 接口一致。"
+    ),
+)
+async def resolve_pending_consumable_confirmation_text(
+    surgery_id: Annotated[
+        str,
+        Path(
+            min_length=6,
+            max_length=6,
+            pattern=r"^\d{6}$",
+            description="手术 6 位号，仅允许 6 位数字。",
+        ),
+    ],
+    confirmation_id: Annotated[str, Path(min_length=1, max_length=128)],
+    body: SurgeryPendingResolveTextRequest,
+    pipeline: Annotated[SurgeryPipeline, Depends(get_surgery_pipeline)],
+) -> SurgeryPendingConfirmationResolveResponse:
+    try:
+        result = await pipeline.resolve_pending_confirmation_from_client_text(
+            surgery_id=surgery_id,
+            confirmation_id=confirmation_id,
+            recognized_text=body.recognized_text,
+        )
+    except SurgeryPipelineError as exc:
+        _raise_confirmation_http(exc, surgery_id)
+    return SurgeryPendingConfirmationResolveResponse(
+        surgery_id=surgery_id,
+        confirmation_id=confirmation_id,
+        status="accepted",
+        message=result.message,
+        resolved_label=result.resolved_label,
+        rejected=result.rejected,
+        asr_text=result.asr_text,
+        audio_object_key=result.audio_object_key,
+    )
+
+
+@router.get(
+    "/client/surgeries/{surgery_id}/pending-confirmation/{confirmation_id}/prompt-audio",
+    responses={
+        status.HTTP_404_NOT_FOUND: {"model": SurgeryClientErrorResponse},
+        status.HTTP_422_UNPROCESSABLE_CONTENT: {"model": SurgeryClientErrorResponse},
+        status.HTTP_503_SERVICE_UNAVAILABLE: {"model": SurgeryClientErrorResponse},
+    },
+    tags=["client"],
+    summary="待确认话术的 TTS 音频（MP3）",
+    description="使用百度在线合成，与 prompt_text 一致；供浏览器 MediaElement 直放。未配置百度语音时返回 503。",
+    response_class=Response,
+)
+async def get_pending_prompt_audio_mpeg(
+    surgery_id: Annotated[
+        str,
+        Path(
+            min_length=6,
+            max_length=6,
+            pattern=r"^\d{6}$",
+            description="手术 6 位号，仅允许 6 位数字。",
+        ),
+    ],
+    confirmation_id: Annotated[str, Path(min_length=1, max_length=128)],
+    pipeline: Annotated[SurgeryPipeline, Depends(get_surgery_pipeline)],
+) -> Response:
+    try:
+        data = await pipeline.get_pending_prompt_audio_mp3(
+            surgery_id=surgery_id,
+            confirmation_id=confirmation_id,
+        )
+    except SurgeryPipelineError as exc:
+        _raise_confirmation_http(exc, surgery_id)
+    return Response(
+        content=data,
+        media_type="audio/mpeg",
+        headers={"Cache-Control": "no-store"},
+    )
+
+
@router.get(
    "/internal/surgeries/{surgery_id}/voice-status",
    response_model=SurgeryVoiceStatusResponse,
@@ -417,3 +534,40 @@ async def get_surgery_voice_status(
        last_asr_text=payload.get("last_asr_text"),
        last_error=payload.get("last_error"),
    )
+
+
+@router.get(
+    "/internal/surgeries/{surgery_id}/voice-audits",
+    response_model=SurgeryVoiceAuditsListResponse,
+    tags=["internal"],
+    summary="语音确认审计记录（按手术号分页）",
+    description=(
+        "查询持久化表 `voice_confirmation_audits`：ASR 文本、解析结果、"
+        "候选项快照、MinIO 对象键、失败原因等。用于追溯、对账与报表；"
+        "不区分手术是否仍进行中，只要库里有记录即返回。"
+    ),
+)
+async def get_surgery_voice_audits(
+    surgery_id: Annotated[
+        str,
+        Path(
+            min_length=6,
+            max_length=6,
+            pattern=r"^\d{6}$",
+            description="手术 6 位号，仅允许 6 位数字。",
+        ),
+    ],
+    pipeline: Annotated[SurgeryPipeline, Depends(get_surgery_pipeline)],
+    limit: Annotated[int, Query(ge=1, le=200, description="每页条数。")] = 50,
+    offset: Annotated[int, Query(ge=0, description="跳过前若干条，供分页。")] = 0,
+) -> SurgeryVoiceAuditsListResponse:
+    rows, total = await pipeline.list_voice_audits(
+        surgery_id, limit=limit, offset=offset
+    )
+    return SurgeryVoiceAuditsListResponse(
+        surgery_id=surgery_id,
+        total=total,
+        limit=limit,
+        offset=offset,
+        items=[SurgeryVoiceAuditItem.model_validate(r) for r in rows],
+    )