ver0.1
This commit is contained in:
@@ -2,6 +2,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import array
|
||||
import io
|
||||
import shutil
|
||||
import subprocess
|
||||
@@ -9,12 +10,57 @@ import wave
|
||||
from typing import Final
|
||||
|
||||
_BAIDU_RATE: Final[int] = 16000
|
||||
# 诊室麦克风常见音量偏小,百度短语音 3301「语音质量错误」多与有效幅度过低有关。
|
||||
_NORM_TARGET_PEAK: Final[int] = 12000
|
||||
_NORM_MAX_GAIN: Final[float] = 80.0
|
||||
|
||||
|
||||
class WavDecodeError(ValueError):
|
||||
"""Uploaded bytes are not a valid WAV or cannot be converted."""
|
||||
|
||||
|
||||
def pcm_s16le_to_wav_bytes(pcm: bytes, *, sample_rate: int = _BAIDU_RATE) -> bytes:
|
||||
"""将 raw s16le mono PCM 打成标准 WAV,供百度 ``format=wav`` 重试等场景。"""
|
||||
if not pcm:
|
||||
raise WavDecodeError("Empty PCM")
|
||||
buf = io.BytesIO()
|
||||
with wave.open(buf, "wb") as wf:
|
||||
wf.setnchannels(1)
|
||||
wf.setsampwidth(2)
|
||||
wf.setframerate(sample_rate)
|
||||
wf.writeframes(pcm)
|
||||
return buf.getvalue()
|
||||
|
||||
|
||||
def normalize_pcm_s16le_for_baidu(pcm: bytes) -> bytes:
|
||||
"""提升过弱信号幅度,降低 err_no=3301(speech quality)概率;已足够响的音频不改。"""
|
||||
if len(pcm) < 2 or len(pcm) % 2 != 0:
|
||||
return pcm
|
||||
samples = array.array("h")
|
||||
samples.frombytes(pcm)
|
||||
if not samples:
|
||||
return pcm
|
||||
peak = 0
|
||||
for s in samples:
|
||||
a = abs(int(s))
|
||||
if a > peak:
|
||||
peak = a
|
||||
if peak == 0 or peak >= _NORM_TARGET_PEAK:
|
||||
return pcm
|
||||
scale = min(_NORM_MAX_GAIN, float(_NORM_TARGET_PEAK) / float(peak))
|
||||
if scale <= 1.0:
|
||||
return pcm
|
||||
out = array.array("h")
|
||||
for s in samples:
|
||||
v = int(round(float(s) * scale))
|
||||
if v > 32767:
|
||||
v = 32767
|
||||
elif v < -32768:
|
||||
v = -32768
|
||||
out.append(v)
|
||||
return out.tobytes()
|
||||
|
||||
|
||||
def wav_bytes_to_pcm16k_mono_s16le(wav_bytes: bytes) -> bytes:
|
||||
"""
|
||||
Prefer ffmpeg for arbitrary channel count / sample rate.
|
||||
@@ -57,7 +103,7 @@ def _ffmpeg_to_pcm16k(wav_bytes: bytes, ffmpeg: str) -> bytes:
|
||||
raise WavDecodeError(f"ffmpeg wav decode failed: {err or proc.returncode}")
|
||||
if not proc.stdout:
|
||||
raise WavDecodeError("ffmpeg produced empty PCM")
|
||||
return proc.stdout
|
||||
return normalize_pcm_s16le_for_baidu(proc.stdout)
|
||||
|
||||
|
||||
def _stdlib_wave_to_pcm16k(wav_bytes: bytes) -> bytes:
|
||||
@@ -96,6 +142,6 @@ def _stdlib_wave_to_pcm16k(wav_bytes: bytes) -> bytes:
|
||||
l_s, r_s = struct.unpack("<hh", chunk)
|
||||
m = max(min((l_s + r_s) // 2, 32767), -32768)
|
||||
out.extend(struct.pack("<h", m))
|
||||
return bytes(out)
|
||||
return normalize_pcm_s16le_for_baidu(bytes(out))
|
||||
|
||||
return raw
|
||||
return normalize_pcm_s16le_for_baidu(raw)
|
||||
|
||||
Reference in New Issue
Block a user