This commit is contained in:
Kevin
2026-04-28 10:41:48 +08:00
parent 482b016872
commit 15884bd68e
60 changed files with 2092 additions and 1994 deletions

View File

@@ -6,6 +6,7 @@ from typing import Any
from aip import AipSpeech
from app.config import Settings, settings as _default_settings
from app.services.audio_wav import pcm_s16le_to_wav_bytes
class BaiduSpeechNotConfiguredError(RuntimeError):
@@ -60,6 +61,31 @@ class BaiduSpeechService:
merged["dev_pid"] = int(self._s.baidu_speech_asr_dev_pid)
return self._client_or_raise().asr(speech, format, rate, merged)
def asr_16k_mono_pcm_or_wav_fallback(
self,
pcm_s16le: bytes,
*,
rate: int = 16000,
options: dict[str, Any] | None = None,
) -> dict[str, Any]:
"""先按 raw PCM 识别;若返回 err_no=3301语音质量错误再用 WAV 封装重试一次。
部分环境下 PCM 与 WAV 路径对边界样本表现不一致,重试可提高成功率。
"""
r = self.asr(pcm_s16le, "pcm", rate, options)
if not isinstance(r, dict):
return r
if r.get("err_no") != 3301:
return r
if len(pcm_s16le) < 1000:
return r
try:
wav = pcm_s16le_to_wav_bytes(pcm_s16le, sample_rate=rate)
except Exception:
return r
r2 = self.asr(wav, "wav", rate, options)
return r2 if isinstance(r2, dict) else r
def synthesis(
self,
text: str,