feat: 手术视频消耗、待确认与持久化改造

- 新增 Alembic 初始迁移、领域明细模型及归档持久化与重试链路\n- 拆分视频会话注册表、分类处理、推理时间窗聚合与流处理\n- 消耗日志：TSV/Markdown 含 top2/top3；item_id 优先产品编码；待确认记「待确认」行，语音确认后落正式行并更新汇总\n- 待确认时内存/DB 明细为占位行，确认后替换；拒绝时移除占位\n- 分类 probs 先 detach/cpu 再转 NumPy，修复 MPS/CUDA 上推理被静默跳过\n- 补充集成测试、归档与设备张量等单测 Made-with: Cursor
2026-04-23 20:42:21 +08:00
parent 69980d8073
commit 3d7bd70355
55 changed files with 4544 additions and 2050 deletions
--- a/app/services/voice_audit_emitter.py
+++ b/app/services/voice_audit_emitter.py
@@ -0,0 +1,166 @@
+"""统一语音确认的「审计 + trace + 抛错」三段式。
+
+`VoiceConfirmationService` 过去在 `resolve_from_wav` / `resolve_from_recognized_text` 各分支
+中重复执行 `_persist_audit + record_voice_trace + emit_voice_event + raise
+SurgeryPipelineError` 三件套，本类把它们聚合成一个方法，便于线性化主流程。
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Literal
+
+from loguru import logger
+from sqlalchemy.ext.asyncio import async_sessionmaker
+
+from app.config import Settings
+from app.repositories.voice_audits import VoiceAuditRepository
+from app.services.voice_file_log import emit_voice_event
+from app.surgery_errors import SurgeryPipelineError
+
+VoiceSource = Literal["wav", "text", "n/a"]
+
+
+@dataclass(frozen=True)
+class VoiceAuditContext:
+    """审计所需的「音频侧」上下文快照。"""
+
+    audio_object_key: str | None = None
+    audio_content_type: str | None = None
+    audio_size_bytes: int | None = None
+    audio_sha256: str | None = None
+
+
+class VoiceAuditEmitter:
+    def __init__(
+        self,
+        *,
+        settings: Settings,
+        audits: VoiceAuditRepository,
+        session_factory: async_sessionmaker,
+    ) -> None:
+        self._s = settings
+        self._audits = audits
+        self._session_factory = session_factory
+
+    async def _persist_audit(
+        self,
+        *,
+        surgery_id: str,
+        confirmation_id: str,
+        status: str,
+        ctx: VoiceAuditContext,
+        asr_text: str | None,
+        resolved_label: str | None,
+        options_snapshot_json: str | None,
+        error_message: str | None,
+    ) -> None:
+        try:
+            async with self._session_factory() as session:
+                async with session.begin():
+                    await self._audits.save_audit(
+                        session,
+                        surgery_id=surgery_id,
+                        confirmation_id=confirmation_id,
+                        status=status,
+                        audio_object_key=ctx.audio_object_key,
+                        audio_content_type=ctx.audio_content_type,
+                        audio_size_bytes=ctx.audio_size_bytes,
+                        audio_sha256=ctx.audio_sha256,
+                        asr_text=asr_text,
+                        resolved_label=resolved_label,
+                        options_snapshot_json=options_snapshot_json,
+                        error_message=error_message,
+                    )
+        except Exception as exc:
+            logger.error("Persist voice audit failed: {}", exc)
+
+    async def fail(
+        self,
+        *,
+        source: VoiceSource,
+        status: str,
+        code: str,
+        message: str,
+        surgery_id: str,
+        confirmation_id: str,
+        ctx: VoiceAuditContext | None = None,
+        asr_text: str | None = None,
+        options_snapshot_json: str | None = None,
+        record_session_trace: bool = True,
+        session_trace_recorder=None,  # Callable[[str | None, str | None], None]
+        include_extra: dict[str, object] | None = None,
+        persist_audit: bool = True,
+        emit_trace: bool = True,
+    ) -> SurgeryPipelineError:
+        """统一失败路径：audit + trace + session trace + 返回待抛错。
+
+        调用方使用 `raise await emitter.fail(...)` 完成抛出。
+        """
+        ctx = ctx or VoiceAuditContext()
+        if persist_audit:
+            await self._persist_audit(
+                surgery_id=surgery_id,
+                confirmation_id=confirmation_id,
+                status=status,
+                ctx=ctx,
+                asr_text=asr_text,
+                resolved_label=None,
+                options_snapshot_json=options_snapshot_json,
+                error_message=message,
+            )
+        if record_session_trace and session_trace_recorder is not None:
+            try:
+                session_trace_recorder(asr_text, message)
+            except Exception as exc:
+                logger.debug("session trace recorder failed: {}", exc)
+        if emit_trace:
+            emit_voice_event(
+                self._s,
+                surgery_id=surgery_id,
+                source=source,
+                status=status,
+                confirmation_id=confirmation_id,
+                asr_text=asr_text,
+                error_message=message,
+                audio_object_key=ctx.audio_object_key,
+            )
+        if include_extra is not None:
+            return SurgeryPipelineError(code, message, extra=include_extra)
+        return SurgeryPipelineError(code, message)
+
+    async def success(
+        self,
+        *,
+        source: VoiceSource,
+        status: str,
+        surgery_id: str,
+        confirmation_id: str,
+        ctx: VoiceAuditContext | None = None,
+        asr_text: str | None,
+        resolved_label: str | None,
+        rejected: bool,
+        options_snapshot_json: str | None,
+    ) -> None:
+        ctx = ctx or VoiceAuditContext()
+        await self._persist_audit(
+            surgery_id=surgery_id,
+            confirmation_id=confirmation_id,
+            status=status,
+            ctx=ctx,
+            asr_text=asr_text,
+            resolved_label=resolved_label,
+            options_snapshot_json=options_snapshot_json,
+            error_message=None,
+        )
+        emit_voice_event(
+            self._s,
+            surgery_id=surgery_id,
+            source=source,
+            status=status,
+            confirmation_id=confirmation_id,
+            asr_text=asr_text,
+            resolved_label=resolved_label,
+            rejected=rejected,
+            audio_object_key=ctx.audio_object_key,
+        )