Files
life-echo/api/app/adapters/asr/tencent_asr.py
Kevin 22d282dc01 feat(api): use Tencent 16k_zh_large ASR and remove local Whisper
Standardize ASR on Tencent's dialect-capable engine across all environments,
drop faster-whisper from dependencies and deployment images, and add an
expo-sqlite iOS vendor sync plus pod install in prebuild to prevent native
build failures after npm install.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-25 10:21:41 +08:00

91 lines
3.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Tencent Cloud ASR adapter — implements ASRProvider port."""
import asyncio
import base64
from app.core.business_telemetry import business_span
from app.core.logging import get_logger
from app.ports.asr import ASRTranscriptionError
logger = get_logger(__name__)
class TencentASRProvider:
def __init__(
self,
secret_id: str,
secret_key: str,
*,
engine_type: str = "16k_zh_large",
):
self._secret_id = secret_id
self._secret_key = secret_key
self._engine_type = engine_type
self._client = None
def _get_client(self):
if self._client is not None:
return self._client
try:
from tencentcloud.asr.v20190614 import asr_client
from tencentcloud.common import credential
from tencentcloud.common.profile.client_profile import ClientProfile
from tencentcloud.common.profile.http_profile import HttpProfile
cred = credential.Credential(self._secret_id, self._secret_key)
http_profile = HttpProfile()
http_profile.endpoint = "asr.tencentcloudapi.com"
client_profile = ClientProfile()
client_profile.httpProfile = http_profile
self._client = asr_client.AsrClient(cred, "", client_profile)
return self._client
except Exception as e:
logger.error("Tencent ASR client init failed: {}", e)
return None
def ensure_ready(self) -> bool:
return bool(self._secret_id and self._secret_key and self._get_client())
async def transcribe(self, audio: bytes, format: str = "m4a") -> str:
with business_span("asr.transcribe", provider="tencent"):
return await self._transcribe_inner(audio, format)
async def _transcribe_inner(self, audio: bytes, format: str) -> str:
client = self._get_client()
if not client:
raise ASRTranscriptionError(
"Tencent ASR client not initialized (check credentials)"
)
try:
from tencentcloud.asr.v20190614 import models
audio_base64 = base64.b64encode(audio).decode("utf-8")
req = models.SentenceRecognitionRequest()
req.EngSerViceType = self._engine_type
req.SourceType = 1
# 小写与文档一致。iOS 常见为 m4a(AAC) 容器,与 16k 引擎匹配
req.VoiceFormat = (format or "m4a").lower()
req.Data = audio_base64
req.DataLen = len(audio)
# 腾讯 SDK 为同步阻塞调用;放到线程池里避免卡住事件循环。
resp = await asyncio.to_thread(client.SentenceRecognition, req)
text = (resp.Result or "").strip()
if text:
return text
err = getattr(resp, "Error", None) or getattr(resp, "Message", None)
logger.warning(
"Tencent ASR empty Result, audio_len={} format={} err={}",
len(audio),
req.VoiceFormat,
err,
)
raise ASRTranscriptionError(
"Tencent ASR empty Result (check sample rate / format / audio)"
)
except ASRTranscriptionError:
raise
except Exception as e:
logger.error("Tencent ASR transcribe failed: {}", e, exc_info=True)
raise ASRTranscriptionError(f"Tencent ASR transcribe failed: {e!s}") from e