feat(api): use Tencent ASR flash with 16k_zh_large and dev transcript logs

Replace CreateRecTask polling with recording-file flash API, add TENCENT_APP_ID,
remove server-side pydub slicing, and log ASR recognition text at INFO in development.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Kevin
2026-05-25 11:28:22 +08:00
parent 22d282dc01
commit 07979bfb09
22 changed files with 354 additions and 185 deletions

View File

@@ -1,7 +1,4 @@
import asyncio
import sys
from types import ModuleType, SimpleNamespace
import httpx
import pytest
from app.adapters.asr.tencent_asr import TencentASRProvider
@@ -58,46 +55,52 @@ def test_post_commit_reuses_singleton_redis_client(
@pytest.mark.asyncio
async def test_tencent_asr_transcribe_uses_to_thread(
async def test_tencent_asr_flash_transcribe(
monkeypatch: pytest.MonkeyPatch,
) -> None:
to_thread_calls: list[tuple[object, tuple[object, ...]]] = []
captured: dict[str, object] = {}
class FakeRequest:
EngSerViceType: str | None = None
SourceType: int | None = None
VoiceFormat: str | None = None
Data: str | None = None
DataLen: int | None = None
class FakeAsyncClient:
async def __aenter__(self):
return self
class FakeClient:
def SentenceRecognition(self, req: FakeRequest) -> SimpleNamespace:
return SimpleNamespace(Result=" 你好,世界 ")
async def __aexit__(self, *args):
return None
async def fake_to_thread(fn, *args):
to_thread_calls.append((fn, args))
return fn(*args)
async def post(self, url, *, headers=None, content=None, timeout=None):
captured["url"] = url
captured["headers"] = headers
captured["content"] = content
captured["timeout"] = timeout
return httpx.Response(
200,
json={
"code": 0,
"request_id": "req-1",
"flash_result": [{"channel_id": 0, "text": " 你好,世界 "}],
},
)
models_module = ModuleType("tencentcloud.asr.v20190614.models")
models_module.SentenceRecognitionRequest = FakeRequest
package_module = ModuleType("tencentcloud.asr.v20190614")
package_module.models = models_module
monkeypatch.setitem(sys.modules, "tencentcloud.asr.v20190614", package_module)
monkeypatch.setattr(asyncio, "to_thread", fake_to_thread)
provider = TencentASRProvider("sid", "skey", engine_type="16k_zh_large")
client = FakeClient()
monkeypatch.setattr(provider, "_get_client", lambda: client)
monkeypatch.setattr(httpx, "AsyncClient", FakeAsyncClient)
provider = TencentASRProvider(
"sid",
"skey",
"1259220000",
engine_type="16k_zh_large",
)
text = await provider.transcribe(b"fake-audio", format="m4a")
assert text == "你好,世界"
assert len(to_thread_calls) == 1
fn, args = to_thread_calls[0]
assert getattr(fn, "__self__", None) is client
assert getattr(fn, "__name__", "") == "SentenceRecognition"
request = args[0]
assert request.EngSerViceType == "16k_zh_large"
assert request.VoiceFormat == "m4a"
assert request.DataLen == len(b"fake-audio")
assert captured["content"] == b"fake-audio"
assert captured["timeout"] == 60.0
url = str(captured["url"])
assert "engine_type=16k_zh_large" in url
assert "voice_format=m4a" in url
assert "/asr/flash/v1/1259220000?" in url
assert "secretid=sid" in url
headers = captured["headers"]
assert headers is not None
assert headers["Authorization"]
assert headers["Content-Type"] == "application/octet-stream"
assert headers["Content-Length"] == str(len(b"fake-audio"))