From 240a184da8a13dfe9ce0b032edd36eeee9594979 Mon Sep 17 00:00:00 2001 From: iammm0 Date: Wed, 11 Feb 2026 16:06:06 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=96=B0=E5=A2=9E=E8=85=BE=E8=AE=AF?= =?UTF-8?q?=E4=BA=91ASR=E6=9C=8D=E5=8A=A1=EF=BC=8C=E6=94=AF=E6=8C=81?= =?UTF-8?q?=E6=8C=89=E9=85=8D=E7=BD=AE=E5=88=87=E6=8D=A2ASR=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E5=95=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增tencent_asr_service.py腾讯云一句话识别 - 优化asr_service.py - 更新services/__init__.py按ASR_PROVIDER切换whisper/tencent - 更新requirements.txt Co-authored-by: Cursor --- api/requirements.txt | 2 +- api/services/__init__.py | 24 +++++- api/services/asr_service.py | 19 +++-- api/services/tencent_asr_service.py | 113 ++++++++++++++++++++++++++++ 4 files changed, 150 insertions(+), 8 deletions(-) create mode 100644 api/services/tencent_asr_service.py diff --git a/api/requirements.txt b/api/requirements.txt index 57f57bc..0405afc 100644 --- a/api/requirements.txt +++ b/api/requirements.txt @@ -44,7 +44,7 @@ faster-whisper>=1.0.0 # Image Processing Pillow>=10.0.0 -# Tencent Cloud SMS +# Tencent Cloud SDK(全量包,包含 SMS、ASR 语音识别等模块) tencentcloud-sdk-python>=3.0.1000 openai diff --git a/api/services/__init__.py b/api/services/__init__.py index ece21e3..3a30410 100644 --- a/api/services/__init__.py +++ b/api/services/__init__.py @@ -1,7 +1,28 @@ """ 服务模块 +通过环境变量 ASR_PROVIDER 选择 ASR 实现: + - ASR_PROVIDER: whisper(默认,本地 faster-whisper)| tencent(腾讯云一句话识别) """ -from .asr_service import asr_service +import os +import logging + +logger = logging.getLogger(__name__) + +# ============================================================================= +# ASR Provider 选择 +# ============================================================================= +ASR_PROVIDER = os.getenv("ASR_PROVIDER", "whisper").lower() + +if ASR_PROVIDER == "tencent": + from .tencent_asr_service import tencent_asr_service as asr_service + logger.info("ASR Provider: 腾讯云一句话识别") +else: + from .asr_service import asr_service + logger.info("ASR Provider: faster-whisper(本地)") + +# ============================================================================= +# TTS 及其他服务 +# ============================================================================= from .tts_service import tts_service from .llm_service import llm_service from .redis_service import redis_service @@ -12,4 +33,3 @@ __all__ = [ "llm_service", "redis_service", ] - diff --git a/api/services/asr_service.py b/api/services/asr_service.py index 236dc92..b2980bc 100644 --- a/api/services/asr_service.py +++ b/api/services/asr_service.py @@ -16,11 +16,16 @@ logger = logging.getLogger(__name__) ASR_MODEL_SIZE = os.getenv("ASR_MODEL_SIZE", "small") ASR_DEVICE = os.getenv("ASR_DEVICE", "auto") # auto, cpu, cuda ASR_COMPUTE_TYPE = os.getenv("ASR_COMPUTE_TYPE", "auto") # auto, int8, float16, float32 -# 镜像内预置模型目录,设置后直接使用本地模型不联网下载(与 Dockerfile 中 download_root 一致) +# 模型缓存目录:每次启动优先从该目录加载,不设置则使用默认本地路径(api/models/whisper) +# 设置 ASR_MODEL_CACHE_DIR 时仅使用本地模型不联网(与 Dockerfile 中 download_root 一致) ASR_MODEL_CACHE_DIR = os.getenv("ASR_MODEL_CACHE_DIR") +# 默认本地缓存目录(相对 api 目录),确保每次启动都先从本地加载 +_DEFAULT_ASR_CACHE_DIR = os.path.normpath( + os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "models", "whisper") +) -class ASRService: +class WhisperASRService: """ ASR 服务(语音转文字) 使用 faster-whisper 本地模型 @@ -60,8 +65,12 @@ class ASRService: else: compute_type = "int8" # CPU 使用 int8 量化,速度更快 - download_root = ASR_MODEL_CACHE_DIR if ASR_MODEL_CACHE_DIR else None - local_files_only = bool(ASR_MODEL_CACHE_DIR) + # 每次启动都先从本地目录加载:优先用环境变量,否则用默认 api/models/whisper + download_root = ASR_MODEL_CACHE_DIR if ASR_MODEL_CACHE_DIR else _DEFAULT_ASR_CACHE_DIR + local_files_only = bool(ASR_MODEL_CACHE_DIR) # 仅当显式设置缓存目录时禁止联网(如 Docker) + if not os.path.isdir(download_root): + os.makedirs(download_root, exist_ok=True) + logger.info(f"Whisper 模型从本地加载: download_root={download_root}, local_files_only={local_files_only}") self.model = WhisperModel( ASR_MODEL_SIZE, device=device, @@ -156,4 +165,4 @@ class ASRService: # 全局实例 -asr_service = ASRService() +asr_service = WhisperASRService() diff --git a/api/services/tencent_asr_service.py b/api/services/tencent_asr_service.py new file mode 100644 index 0000000..e9531f5 --- /dev/null +++ b/api/services/tencent_asr_service.py @@ -0,0 +1,113 @@ +""" +ASR 服务:语音转文字(腾讯云一句话识别) +使用腾讯云 ASR API 进行语音识别 +""" +import base64 +import logging +import os +import uuid +from typing import Optional + +logger = logging.getLogger(__name__) + + +class TencentASRService: + """ + ASR 服务(语音转文字) + 使用腾讯云一句话识别 API(SentenceRecognition) + 文档:https://cloud.tencent.com/document/product/1093/35646 + """ + + def __init__(self): + self._secret_id = os.getenv("TENCENT_SECRET_ID", "") + self._secret_key = os.getenv("TENCENT_SECRET_KEY", "") + self._app_id = os.getenv("TENCENT_ASR_APP_ID", "") + self._ready = bool(self._secret_id and self._secret_key) + self._client = None + + if not self._ready: + logger.warning("腾讯云 ASR 未配置:缺少 TENCENT_SECRET_ID 或 TENCENT_SECRET_KEY") + + def _get_client(self): + """懒加载腾讯云 ASR 客户端""" + if self._client is not None: + return self._client + + try: + from tencentcloud.common import credential + from tencentcloud.common.profile.client_profile import ClientProfile + from tencentcloud.common.profile.http_profile import HttpProfile + from tencentcloud.asr.v20190614 import asr_client + + cred = credential.Credential(self._secret_id, self._secret_key) + + http_profile = HttpProfile() + http_profile.endpoint = "asr.tencentcloudapi.com" + + client_profile = ClientProfile() + client_profile.httpProfile = http_profile + + self._client = asr_client.AsrClient(cred, "", client_profile) + logger.info("腾讯云 ASR 客户端初始化成功") + return self._client + except Exception as e: + logger.error(f"腾讯云 ASR 客户端初始化失败: {e}", exc_info=True) + return None + + def ensure_ready(self) -> bool: + """ + 确保 ASR 服务已就绪。 + 腾讯云 ASR 是远程 API,无需预加载模型,仅检查凭证配置。 + """ + if not self._ready: + return False + # 尝试初始化客户端,验证 SDK 可用 + return self._get_client() is not None + + def is_ready(self) -> bool: + """检查 ASR 服务是否可用。""" + return self._ready + + async def transcribe(self, audio_base64: str) -> Optional[str]: + """ + 转写音频为文字(腾讯云一句话识别) + + Args: + audio_base64: Base64 编码的音频数据 + + Returns: + 转写文本,失败时返回错误信息 + """ + if not self._ready: + return "转写失败: 腾讯云 ASR 未配置" + + client = self._get_client() + if not client: + return "转写失败: 腾讯云 ASR 客户端初始化失败" + + try: + from tencentcloud.asr.v20190614 import models + + req = models.SentenceRecognitionRequest() + req.EngSerViceType = "16k_zh" # 16k 中文普通话 + req.SourceType = 1 # 1 = 语音数据的 Base64 编码 + req.VoiceFormat = "m4a" # 音频格式 + req.Data = audio_base64 + req.DataLen = len(base64.b64decode(audio_base64)) + + resp = client.SentenceRecognition(req) + result = resp.Result + + logger.info( + f"腾讯云 ASR 转写完成: 文本长度={len(result) if result else 0}" + ) + + return result.strip() if result else "" + + except Exception as e: + logger.error(f"腾讯云 ASR 转写失败: {e}", exc_info=True) + return f"转写失败: {str(e)}" + + +# 全局实例 +tencent_asr_service = TencentASRService()