ver0.1
This commit is contained in:
@@ -6,6 +6,7 @@ from typing import Any
|
||||
from aip import AipSpeech
|
||||
|
||||
from app.config import Settings, settings as _default_settings
|
||||
from app.services.audio_wav import pcm_s16le_to_wav_bytes
|
||||
|
||||
|
||||
class BaiduSpeechNotConfiguredError(RuntimeError):
|
||||
@@ -60,6 +61,31 @@ class BaiduSpeechService:
|
||||
merged["dev_pid"] = int(self._s.baidu_speech_asr_dev_pid)
|
||||
return self._client_or_raise().asr(speech, format, rate, merged)
|
||||
|
||||
def asr_16k_mono_pcm_or_wav_fallback(
|
||||
self,
|
||||
pcm_s16le: bytes,
|
||||
*,
|
||||
rate: int = 16000,
|
||||
options: dict[str, Any] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""先按 raw PCM 识别;若返回 err_no=3301(语音质量错误),再用 WAV 封装重试一次。
|
||||
|
||||
部分环境下 PCM 与 WAV 路径对边界样本表现不一致,重试可提高成功率。
|
||||
"""
|
||||
r = self.asr(pcm_s16le, "pcm", rate, options)
|
||||
if not isinstance(r, dict):
|
||||
return r
|
||||
if r.get("err_no") != 3301:
|
||||
return r
|
||||
if len(pcm_s16le) < 1000:
|
||||
return r
|
||||
try:
|
||||
wav = pcm_s16le_to_wav_bytes(pcm_s16le, sample_rate=rate)
|
||||
except Exception:
|
||||
return r
|
||||
r2 = self.asr(wav, "wav", rate, options)
|
||||
return r2 if isinstance(r2, dict) else r
|
||||
|
||||
def synthesis(
|
||||
self,
|
||||
text: str,
|
||||
|
||||
Reference in New Issue
Block a user