Files
operating-room-monitor-server/app/services/baidu_speech.py

73 lines
2.7 KiB
Python
Raw Normal View History

from __future__ import annotations
from threading import Lock
from typing import Any
from aip import AipSpeech
from app.config import Settings, settings as _default_settings
class BaiduSpeechNotConfiguredError(RuntimeError):
"""未配置 BAIDU_SPEECH_APP_ID / API_KEY / SECRET_KEY 时调用接口会抛出。"""
class BaiduSpeechService:
"""百度短语音识别asr与在线语音合成synthesis基于 `baidu-aip` 的 `AipSpeech`。"""
def __init__(self, app_settings: Settings | None = None) -> None:
self._s = app_settings or _default_settings
self._client: AipSpeech | None = None
self._lock = Lock()
@property
def configured(self) -> bool:
return self._s.baidu_speech_configured
def _client_or_raise(self) -> AipSpeech:
if not self.configured:
raise BaiduSpeechNotConfiguredError(
"百度语音未配置:请设置 BAIDU_SPEECH_APP_ID、BAIDU_SPEECH_API_KEY、"
"BAIDU_SPEECH_SECRET_KEY"
)
with self._lock:
if self._client is None:
client = AipSpeech(
self._s.baidu_speech_app_id,
self._s.baidu_speech_api_key,
self._s.baidu_speech_secret_key,
)
if self._s.baidu_speech_connection_timeout_ms is not None:
client.setConnectionTimeoutInMillis(
self._s.baidu_speech_connection_timeout_ms
)
if self._s.baidu_speech_socket_timeout_ms is not None:
client.setSocketTimeoutInMillis(self._s.baidu_speech_socket_timeout_ms)
self._client = client
return self._client
def asr(
self,
speech: bytes | None = None,
format: str = "pcm",
rate: int = 16000,
options: dict[str, Any] | None = None,
) -> dict[str, Any]:
"""短语音识别。返回百度 JSON含 `err_no`、`result` 等)。
固定使用普通话模型`dev_pid` 来自配置避免未传参时误用服务端默认导致偏英语等结果
"""
merged: dict[str, Any] = dict(options or {})
merged["dev_pid"] = int(self._s.baidu_speech_asr_dev_pid)
return self._client_or_raise().asr(speech, format, rate, merged)
def synthesis(
self,
text: str,
lang: str = "zh",
ctp: int = 1,
options: dict[str, Any] | None = None,
) -> bytes | dict[str, Any]:
"""在线语音合成。成功为音频二进制;失败为错误信息 dict。"""
return self._client_or_raise().synthesis(text, lang, ctp, options)