- 新增 Alembic 初始迁移、领域明细模型及归档持久化与重试链路\n- 拆分视频会话注册表、分类处理、推理时间窗聚合与流处理\n- 消耗日志:TSV/Markdown 含 top2/top3;item_id 优先产品编码;待确认记「待确认」行,语音确认后落正式行并更新汇总\n- 待确认时内存/DB 明细为占位行,确认后替换;拒绝时移除占位\n- 分类 probs 先 detach/cpu 再转 NumPy,修复 MPS/CUDA 上推理被静默跳过\n- 补充集成测试、归档与设备张量等单测 Made-with: Cursor
73 lines
2.7 KiB
Python
73 lines
2.7 KiB
Python
from __future__ import annotations
|
||
|
||
from threading import Lock
|
||
from typing import Any
|
||
|
||
from aip import AipSpeech
|
||
|
||
from app.config import Settings, settings as _default_settings
|
||
|
||
|
||
class BaiduSpeechNotConfiguredError(RuntimeError):
|
||
"""未配置 BAIDU_SPEECH_APP_ID / API_KEY / SECRET_KEY 时调用接口会抛出。"""
|
||
|
||
|
||
class BaiduSpeechService:
|
||
"""百度短语音识别(asr)与在线语音合成(synthesis),基于 `baidu-aip` 的 `AipSpeech`。"""
|
||
|
||
def __init__(self, app_settings: Settings | None = None) -> None:
|
||
self._s = app_settings or _default_settings
|
||
self._client: AipSpeech | None = None
|
||
self._lock = Lock()
|
||
|
||
@property
|
||
def configured(self) -> bool:
|
||
return self._s.baidu_speech_configured
|
||
|
||
def _client_or_raise(self) -> AipSpeech:
|
||
if not self.configured:
|
||
raise BaiduSpeechNotConfiguredError(
|
||
"百度语音未配置:请设置 BAIDU_SPEECH_APP_ID、BAIDU_SPEECH_API_KEY、"
|
||
"BAIDU_SPEECH_SECRET_KEY"
|
||
)
|
||
with self._lock:
|
||
if self._client is None:
|
||
client = AipSpeech(
|
||
self._s.baidu_speech_app_id,
|
||
self._s.baidu_speech_api_key,
|
||
self._s.baidu_speech_secret_key,
|
||
)
|
||
if self._s.baidu_speech_connection_timeout_ms is not None:
|
||
client.setConnectionTimeoutInMillis(
|
||
self._s.baidu_speech_connection_timeout_ms
|
||
)
|
||
if self._s.baidu_speech_socket_timeout_ms is not None:
|
||
client.setSocketTimeoutInMillis(self._s.baidu_speech_socket_timeout_ms)
|
||
self._client = client
|
||
return self._client
|
||
|
||
def asr(
|
||
self,
|
||
speech: bytes | None = None,
|
||
format: str = "pcm",
|
||
rate: int = 16000,
|
||
options: dict[str, Any] | None = None,
|
||
) -> dict[str, Any]:
|
||
"""短语音识别。返回百度 JSON(含 `err_no`、`result` 等)。
|
||
|
||
固定使用普通话模型(`dev_pid` 来自配置),避免未传参时误用服务端默认导致偏英语等结果。
|
||
"""
|
||
merged: dict[str, Any] = dict(options or {})
|
||
merged["dev_pid"] = int(self._s.baidu_speech_asr_dev_pid)
|
||
return self._client_or_raise().asr(speech, format, rate, merged)
|
||
|
||
def synthesis(
|
||
self,
|
||
text: str,
|
||
lang: str = "zh",
|
||
ctp: int = 1,
|
||
options: dict[str, Any] | None = None,
|
||
) -> bytes | dict[str, Any]:
|
||
"""在线语音合成。成功为音频二进制;失败为错误信息 dict。"""
|
||
return self._client_or_raise().synthesis(text, lang, ctp, options)
|