Files
operating-room-monitor-server/app/services/baidu_speech.py
Kevin 0c05463617 feat: 语音确认、联调与运维增强
- 语音:序数解析(第一个/第二个等)、解析失败计数与 API detail.retry_remaining;
  百度 ASR 固定 dev_pid 为普通话;SurgeryPipelineError 支持 extra 并入 HTTP detail。
- Demo:demo 路由与假 RTSP、客户端 index 与 README;BackendResolver 与配置调整。
- 可观测:消耗 TSV 日志、语音文件日志、终端 Markdown 辅助;相关测试与依赖更新。
- 注意:.env 仍被 gitignore,本地密钥不会进入本提交。

Made-with: Cursor
2026-04-23 14:24:20 +08:00

72 lines
2.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
from threading import Lock
from typing import Any
from aip import AipSpeech
from app.config import settings
class BaiduSpeechNotConfiguredError(RuntimeError):
"""未配置 BAIDU_SPEECH_APP_ID / API_KEY / SECRET_KEY 时调用接口会抛出。"""
class BaiduSpeechService:
"""百度短语音识别asr与在线语音合成synthesis基于 `baidu-aip` 的 `AipSpeech`。"""
def __init__(self) -> None:
self._client: AipSpeech | None = None
self._lock = Lock()
@property
def configured(self) -> bool:
return settings.baidu_speech_configured
def _client_or_raise(self) -> AipSpeech:
if not self.configured:
raise BaiduSpeechNotConfiguredError(
"百度语音未配置:请设置 BAIDU_SPEECH_APP_ID、BAIDU_SPEECH_API_KEY、"
"BAIDU_SPEECH_SECRET_KEY"
)
with self._lock:
if self._client is None:
client = AipSpeech(
settings.baidu_speech_app_id,
settings.baidu_speech_api_key,
settings.baidu_speech_secret_key,
)
if settings.baidu_speech_connection_timeout_ms is not None:
client.setConnectionTimeoutInMillis(
settings.baidu_speech_connection_timeout_ms
)
if settings.baidu_speech_socket_timeout_ms is not None:
client.setSocketTimeoutInMillis(settings.baidu_speech_socket_timeout_ms)
self._client = client
return self._client
def asr(
self,
speech: bytes | None = None,
format: str = "pcm",
rate: int = 16000,
options: dict[str, Any] | None = None,
) -> dict[str, Any]:
"""短语音识别。返回百度 JSON含 `err_no`、`result` 等)。
固定使用普通话模型(`dev_pid` 来自配置),避免未传参时误用服务端默认导致偏英语等结果。
"""
merged: dict[str, Any] = dict(options or {})
merged["dev_pid"] = int(settings.baidu_speech_asr_dev_pid)
return self._client_or_raise().asr(speech, format, rate, merged)
def synthesis(
self,
text: str,
lang: str = "zh",
ctp: int = 1,
options: dict[str, Any] | None = None,
) -> bytes | dict[str, Any]:
"""在线语音合成。成功为音频二进制;失败为错误信息 dict。"""
return self._client_or_raise().synthesis(text, lang, ctp, options)