Files
operating-room-monitor-server/app/services/baidu_speech.py
Kevin 3d7bd70355 feat: 手术视频消耗、待确认与持久化改造
- 新增 Alembic 初始迁移、领域明细模型及归档持久化与重试链路\n- 拆分视频会话注册表、分类处理、推理时间窗聚合与流处理\n- 消耗日志:TSV/Markdown 含 top2/top3;item_id 优先产品编码;待确认记「待确认」行,语音确认后落正式行并更新汇总\n- 待确认时内存/DB 明细为占位行,确认后替换;拒绝时移除占位\n- 分类 probs 先 detach/cpu 再转 NumPy,修复 MPS/CUDA 上推理被静默跳过\n- 补充集成测试、归档与设备张量等单测

Made-with: Cursor
2026-04-23 20:42:21 +08:00

73 lines
2.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
from threading import Lock
from typing import Any
from aip import AipSpeech
from app.config import Settings, settings as _default_settings
class BaiduSpeechNotConfiguredError(RuntimeError):
"""未配置 BAIDU_SPEECH_APP_ID / API_KEY / SECRET_KEY 时调用接口会抛出。"""
class BaiduSpeechService:
"""百度短语音识别asr与在线语音合成synthesis基于 `baidu-aip` 的 `AipSpeech`。"""
def __init__(self, app_settings: Settings | None = None) -> None:
self._s = app_settings or _default_settings
self._client: AipSpeech | None = None
self._lock = Lock()
@property
def configured(self) -> bool:
return self._s.baidu_speech_configured
def _client_or_raise(self) -> AipSpeech:
if not self.configured:
raise BaiduSpeechNotConfiguredError(
"百度语音未配置:请设置 BAIDU_SPEECH_APP_ID、BAIDU_SPEECH_API_KEY、"
"BAIDU_SPEECH_SECRET_KEY"
)
with self._lock:
if self._client is None:
client = AipSpeech(
self._s.baidu_speech_app_id,
self._s.baidu_speech_api_key,
self._s.baidu_speech_secret_key,
)
if self._s.baidu_speech_connection_timeout_ms is not None:
client.setConnectionTimeoutInMillis(
self._s.baidu_speech_connection_timeout_ms
)
if self._s.baidu_speech_socket_timeout_ms is not None:
client.setSocketTimeoutInMillis(self._s.baidu_speech_socket_timeout_ms)
self._client = client
return self._client
def asr(
self,
speech: bytes | None = None,
format: str = "pcm",
rate: int = 16000,
options: dict[str, Any] | None = None,
) -> dict[str, Any]:
"""短语音识别。返回百度 JSON含 `err_no`、`result` 等)。
固定使用普通话模型(`dev_pid` 来自配置),避免未传参时误用服务端默认导致偏英语等结果。
"""
merged: dict[str, Any] = dict(options or {})
merged["dev_pid"] = int(self._s.baidu_speech_asr_dev_pid)
return self._client_or_raise().asr(speech, format, rate, merged)
def synthesis(
self,
text: str,
lang: str = "zh",
ctp: int = 1,
options: dict[str, Any] | None = None,
) -> bytes | dict[str, Any]:
"""在线语音合成。成功为音频二进制;失败为错误信息 dict。"""
return self._client_or_raise().synthesis(text, lang, ctp, options)