feat: align surgery API with schemas and extend client tooling

- Refactor app API and schemas; adjust surgery pipeline, repository, and session manager.

- Improve consumption TSV logging and consumable vision integration; trim voice resolution.

- Add Baidu Face 1:N search script, .env.example entries, and client API integration doc.

- Update demo client, staging checklist, surgery interface doc, and related tests; add sample face image.

Made-with: Cursor
This commit is contained in:
Kevin
2026-04-23 16:09:20 +08:00
parent 0c05463617
commit 69980d8073
20 changed files with 994 additions and 610 deletions

View File

@@ -2,6 +2,8 @@
from __future__ import annotations
import base64
from app.database import AsyncSessionLocal
from app.repositories.surgery_results import SurgeryResultRepository
from app.schemas import (
@@ -78,31 +80,22 @@ class SurgeryPipeline:
return persisted
return self._sessions.archived_consumption_fallback(surgery_id)
def voice_status(self, surgery_id: str) -> dict[str, object] | None:
return self._sessions.voice_status(surgery_id)
async def list_voice_audits(
self,
surgery_id: str,
*,
limit: int = 50,
offset: int = 0,
):
"""持久化表 `voice_confirmation_audits` 分页,用于追溯/对账/报表。"""
return await self._voice.list_voice_audits_for_surgery(
surgery_id, limit=limit, offset=offset
)
def get_pending_confirmation_for_client(
async def get_pending_confirmation_for_client(
self, surgery_id: str
) -> SurgeryPendingConfirmationResponse | None:
pending = self._sessions.next_pending_confirmation(surgery_id)
if pending is None:
return None
mp3 = await run_in_threadpool(
self._voice.synthesize_prompt_to_mp3,
pending.prompt_text,
)
b64 = base64.b64encode(mp3).decode("ascii")
return SurgeryPendingConfirmationResponse(
surgery_id=surgery_id,
confirmation_id=pending.id,
prompt_text=pending.prompt_text,
prompt_audio_mp3_base64=b64,
options=[
PendingConfirmationOption(label=a, confidence=b)
for a, b in pending.options
@@ -129,34 +122,3 @@ class SurgeryPipeline:
content_type=content_type,
)
async def resolve_pending_confirmation_from_client_text(
self,
surgery_id: str,
confirmation_id: str,
recognized_text: str,
) -> VoiceResolveResult:
"""浏览器等客户端本机识别后的文本,解析规则与 WAV 路径一致(无需 MinIO/百度)。"""
return await self._voice.resolve_from_recognized_text(
surgery_id=surgery_id,
confirmation_id=confirmation_id,
recognized_text=recognized_text,
)
async def get_pending_prompt_audio_mp3(
self,
surgery_id: str,
confirmation_id: str,
) -> bytes:
"""待确认 `prompt_text` 的百度 TTS MP3供模拟客户端用 Audio 直放。"""
pending = self._sessions.get_pending_confirmation_by_id(
surgery_id, confirmation_id
)
if pending is None or pending.status != "pending":
raise SurgeryPipelineError(
"CONFIRMATION_NOT_FOUND",
"未找到该待确认项或已处理。",
)
return await run_in_threadpool(
self._voice.synthesize_prompt_to_mp3,
pending.prompt_text,
)