feat: 新增腾讯云ASR服务,支持按配置切换ASR提供商
- 新增tencent_asr_service.py腾讯云一句话识别 - 优化asr_service.py - 更新services/__init__.py按ASR_PROVIDER切换whisper/tencent - 更新requirements.txt Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -44,7 +44,7 @@ faster-whisper>=1.0.0
|
|||||||
# Image Processing
|
# Image Processing
|
||||||
Pillow>=10.0.0
|
Pillow>=10.0.0
|
||||||
|
|
||||||
# Tencent Cloud SMS
|
# Tencent Cloud SDK(全量包,包含 SMS、ASR 语音识别等模块)
|
||||||
tencentcloud-sdk-python>=3.0.1000
|
tencentcloud-sdk-python>=3.0.1000
|
||||||
|
|
||||||
openai
|
openai
|
||||||
|
|||||||
@@ -1,7 +1,28 @@
|
|||||||
"""
|
"""
|
||||||
服务模块
|
服务模块
|
||||||
|
通过环境变量 ASR_PROVIDER 选择 ASR 实现:
|
||||||
|
- ASR_PROVIDER: whisper(默认,本地 faster-whisper)| tencent(腾讯云一句话识别)
|
||||||
"""
|
"""
|
||||||
|
import os
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# ASR Provider 选择
|
||||||
|
# =============================================================================
|
||||||
|
ASR_PROVIDER = os.getenv("ASR_PROVIDER", "whisper").lower()
|
||||||
|
|
||||||
|
if ASR_PROVIDER == "tencent":
|
||||||
|
from .tencent_asr_service import tencent_asr_service as asr_service
|
||||||
|
logger.info("ASR Provider: 腾讯云一句话识别")
|
||||||
|
else:
|
||||||
from .asr_service import asr_service
|
from .asr_service import asr_service
|
||||||
|
logger.info("ASR Provider: faster-whisper(本地)")
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# TTS 及其他服务
|
||||||
|
# =============================================================================
|
||||||
from .tts_service import tts_service
|
from .tts_service import tts_service
|
||||||
from .llm_service import llm_service
|
from .llm_service import llm_service
|
||||||
from .redis_service import redis_service
|
from .redis_service import redis_service
|
||||||
@@ -12,4 +33,3 @@ __all__ = [
|
|||||||
"llm_service",
|
"llm_service",
|
||||||
"redis_service",
|
"redis_service",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@@ -16,11 +16,16 @@ logger = logging.getLogger(__name__)
|
|||||||
ASR_MODEL_SIZE = os.getenv("ASR_MODEL_SIZE", "small")
|
ASR_MODEL_SIZE = os.getenv("ASR_MODEL_SIZE", "small")
|
||||||
ASR_DEVICE = os.getenv("ASR_DEVICE", "auto") # auto, cpu, cuda
|
ASR_DEVICE = os.getenv("ASR_DEVICE", "auto") # auto, cpu, cuda
|
||||||
ASR_COMPUTE_TYPE = os.getenv("ASR_COMPUTE_TYPE", "auto") # auto, int8, float16, float32
|
ASR_COMPUTE_TYPE = os.getenv("ASR_COMPUTE_TYPE", "auto") # auto, int8, float16, float32
|
||||||
# 镜像内预置模型目录,设置后直接使用本地模型不联网下载(与 Dockerfile 中 download_root 一致)
|
# 模型缓存目录:每次启动优先从该目录加载,不设置则使用默认本地路径(api/models/whisper)
|
||||||
|
# 设置 ASR_MODEL_CACHE_DIR 时仅使用本地模型不联网(与 Dockerfile 中 download_root 一致)
|
||||||
ASR_MODEL_CACHE_DIR = os.getenv("ASR_MODEL_CACHE_DIR")
|
ASR_MODEL_CACHE_DIR = os.getenv("ASR_MODEL_CACHE_DIR")
|
||||||
|
# 默认本地缓存目录(相对 api 目录),确保每次启动都先从本地加载
|
||||||
|
_DEFAULT_ASR_CACHE_DIR = os.path.normpath(
|
||||||
|
os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "models", "whisper")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ASRService:
|
class WhisperASRService:
|
||||||
"""
|
"""
|
||||||
ASR 服务(语音转文字)
|
ASR 服务(语音转文字)
|
||||||
使用 faster-whisper 本地模型
|
使用 faster-whisper 本地模型
|
||||||
@@ -60,8 +65,12 @@ class ASRService:
|
|||||||
else:
|
else:
|
||||||
compute_type = "int8" # CPU 使用 int8 量化,速度更快
|
compute_type = "int8" # CPU 使用 int8 量化,速度更快
|
||||||
|
|
||||||
download_root = ASR_MODEL_CACHE_DIR if ASR_MODEL_CACHE_DIR else None
|
# 每次启动都先从本地目录加载:优先用环境变量,否则用默认 api/models/whisper
|
||||||
local_files_only = bool(ASR_MODEL_CACHE_DIR)
|
download_root = ASR_MODEL_CACHE_DIR if ASR_MODEL_CACHE_DIR else _DEFAULT_ASR_CACHE_DIR
|
||||||
|
local_files_only = bool(ASR_MODEL_CACHE_DIR) # 仅当显式设置缓存目录时禁止联网(如 Docker)
|
||||||
|
if not os.path.isdir(download_root):
|
||||||
|
os.makedirs(download_root, exist_ok=True)
|
||||||
|
logger.info(f"Whisper 模型从本地加载: download_root={download_root}, local_files_only={local_files_only}")
|
||||||
self.model = WhisperModel(
|
self.model = WhisperModel(
|
||||||
ASR_MODEL_SIZE,
|
ASR_MODEL_SIZE,
|
||||||
device=device,
|
device=device,
|
||||||
@@ -156,4 +165,4 @@ class ASRService:
|
|||||||
|
|
||||||
|
|
||||||
# 全局实例
|
# 全局实例
|
||||||
asr_service = ASRService()
|
asr_service = WhisperASRService()
|
||||||
|
|||||||
113
api/services/tencent_asr_service.py
Normal file
113
api/services/tencent_asr_service.py
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
"""
|
||||||
|
ASR 服务:语音转文字(腾讯云一句话识别)
|
||||||
|
使用腾讯云 ASR API 进行语音识别
|
||||||
|
"""
|
||||||
|
import base64
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import uuid
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class TencentASRService:
|
||||||
|
"""
|
||||||
|
ASR 服务(语音转文字)
|
||||||
|
使用腾讯云一句话识别 API(SentenceRecognition)
|
||||||
|
文档:https://cloud.tencent.com/document/product/1093/35646
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._secret_id = os.getenv("TENCENT_SECRET_ID", "")
|
||||||
|
self._secret_key = os.getenv("TENCENT_SECRET_KEY", "")
|
||||||
|
self._app_id = os.getenv("TENCENT_ASR_APP_ID", "")
|
||||||
|
self._ready = bool(self._secret_id and self._secret_key)
|
||||||
|
self._client = None
|
||||||
|
|
||||||
|
if not self._ready:
|
||||||
|
logger.warning("腾讯云 ASR 未配置:缺少 TENCENT_SECRET_ID 或 TENCENT_SECRET_KEY")
|
||||||
|
|
||||||
|
def _get_client(self):
|
||||||
|
"""懒加载腾讯云 ASR 客户端"""
|
||||||
|
if self._client is not None:
|
||||||
|
return self._client
|
||||||
|
|
||||||
|
try:
|
||||||
|
from tencentcloud.common import credential
|
||||||
|
from tencentcloud.common.profile.client_profile import ClientProfile
|
||||||
|
from tencentcloud.common.profile.http_profile import HttpProfile
|
||||||
|
from tencentcloud.asr.v20190614 import asr_client
|
||||||
|
|
||||||
|
cred = credential.Credential(self._secret_id, self._secret_key)
|
||||||
|
|
||||||
|
http_profile = HttpProfile()
|
||||||
|
http_profile.endpoint = "asr.tencentcloudapi.com"
|
||||||
|
|
||||||
|
client_profile = ClientProfile()
|
||||||
|
client_profile.httpProfile = http_profile
|
||||||
|
|
||||||
|
self._client = asr_client.AsrClient(cred, "", client_profile)
|
||||||
|
logger.info("腾讯云 ASR 客户端初始化成功")
|
||||||
|
return self._client
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"腾讯云 ASR 客户端初始化失败: {e}", exc_info=True)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def ensure_ready(self) -> bool:
|
||||||
|
"""
|
||||||
|
确保 ASR 服务已就绪。
|
||||||
|
腾讯云 ASR 是远程 API,无需预加载模型,仅检查凭证配置。
|
||||||
|
"""
|
||||||
|
if not self._ready:
|
||||||
|
return False
|
||||||
|
# 尝试初始化客户端,验证 SDK 可用
|
||||||
|
return self._get_client() is not None
|
||||||
|
|
||||||
|
def is_ready(self) -> bool:
|
||||||
|
"""检查 ASR 服务是否可用。"""
|
||||||
|
return self._ready
|
||||||
|
|
||||||
|
async def transcribe(self, audio_base64: str) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
转写音频为文字(腾讯云一句话识别)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
audio_base64: Base64 编码的音频数据
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
转写文本,失败时返回错误信息
|
||||||
|
"""
|
||||||
|
if not self._ready:
|
||||||
|
return "转写失败: 腾讯云 ASR 未配置"
|
||||||
|
|
||||||
|
client = self._get_client()
|
||||||
|
if not client:
|
||||||
|
return "转写失败: 腾讯云 ASR 客户端初始化失败"
|
||||||
|
|
||||||
|
try:
|
||||||
|
from tencentcloud.asr.v20190614 import models
|
||||||
|
|
||||||
|
req = models.SentenceRecognitionRequest()
|
||||||
|
req.EngSerViceType = "16k_zh" # 16k 中文普通话
|
||||||
|
req.SourceType = 1 # 1 = 语音数据的 Base64 编码
|
||||||
|
req.VoiceFormat = "m4a" # 音频格式
|
||||||
|
req.Data = audio_base64
|
||||||
|
req.DataLen = len(base64.b64decode(audio_base64))
|
||||||
|
|
||||||
|
resp = client.SentenceRecognition(req)
|
||||||
|
result = resp.Result
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"腾讯云 ASR 转写完成: 文本长度={len(result) if result else 0}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return result.strip() if result else ""
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"腾讯云 ASR 转写失败: {e}", exc_info=True)
|
||||||
|
return f"转写失败: {str(e)}"
|
||||||
|
|
||||||
|
|
||||||
|
# 全局实例
|
||||||
|
tencent_asr_service = TencentASRService()
|
||||||
Reference in New Issue
Block a user