feat(api): use Tencent ASR flash with 16k_zh_large and dev transcript logs

Replace CreateRecTask polling with recording-file flash API, add TENCENT_APP_ID, remove server-side pydub slicing, and log ASR recognition text at INFO in development. Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-25 11:28:22 +08:00
parent 22d282dc01
commit 07979bfb09
22 changed files with 354 additions and 185 deletions
--- a/api/tests/test_infra_regressions.py
+++ b/api/tests/test_infra_regressions.py
@@ -1,7 +1,4 @@
-import asyncio
-import sys
-from types import ModuleType, SimpleNamespace
-
+import httpx
 import pytest

 from app.adapters.asr.tencent_asr import TencentASRProvider
@@ -58,46 +55,52 @@ def test_post_commit_reuses_singleton_redis_client(


@pytest.mark.asyncio
-async def test_tencent_asr_transcribe_uses_to_thread(
+async def test_tencent_asr_flash_transcribe(
    monkeypatch: pytest.MonkeyPatch,
 ) -> None:
-    to_thread_calls: list[tuple[object, tuple[object, ...]]] = []
+    captured: dict[str, object] = {}

-    class FakeRequest:
-        EngSerViceType: str | None = None
-        SourceType: int | None = None
-        VoiceFormat: str | None = None
-        Data: str | None = None
-        DataLen: int | None = None
+    class FakeAsyncClient:
+        async def __aenter__(self):
+            return self

-    class FakeClient:
-        def SentenceRecognition(self, req: FakeRequest) -> SimpleNamespace:
-            return SimpleNamespace(Result=" 你好，世界 ")
+        async def __aexit__(self, *args):
+            return None

-    async def fake_to_thread(fn, *args):
-        to_thread_calls.append((fn, args))
-        return fn(*args)
+        async def post(self, url, *, headers=None, content=None, timeout=None):
+            captured["url"] = url
+            captured["headers"] = headers
+            captured["content"] = content
+            captured["timeout"] = timeout
+            return httpx.Response(
+                200,
+                json={
+                    "code": 0,
+                    "request_id": "req-1",
+                    "flash_result": [{"channel_id": 0, "text": " 你好，世界 "}],
+                },
+            )

-    models_module = ModuleType("tencentcloud.asr.v20190614.models")
-    models_module.SentenceRecognitionRequest = FakeRequest
-    package_module = ModuleType("tencentcloud.asr.v20190614")
-    package_module.models = models_module
-
-    monkeypatch.setitem(sys.modules, "tencentcloud.asr.v20190614", package_module)
-    monkeypatch.setattr(asyncio, "to_thread", fake_to_thread)
-
-    provider = TencentASRProvider("sid", "skey", engine_type="16k_zh_large")
-    client = FakeClient()
-    monkeypatch.setattr(provider, "_get_client", lambda: client)
+    monkeypatch.setattr(httpx, "AsyncClient", FakeAsyncClient)

+    provider = TencentASRProvider(
+        "sid",
+        "skey",
+        "1259220000",
+        engine_type="16k_zh_large",
+    )
    text = await provider.transcribe(b"fake-audio", format="m4a")

    assert text == "你好，世界"
-    assert len(to_thread_calls) == 1
-    fn, args = to_thread_calls[0]
-    assert getattr(fn, "__self__", None) is client
-    assert getattr(fn, "__name__", "") == "SentenceRecognition"
-    request = args[0]
-    assert request.EngSerViceType == "16k_zh_large"
-    assert request.VoiceFormat == "m4a"
-    assert request.DataLen == len(b"fake-audio")
+    assert captured["content"] == b"fake-audio"
+    assert captured["timeout"] == 60.0
+    url = str(captured["url"])
+    assert "engine_type=16k_zh_large" in url
+    assert "voice_format=m4a" in url
+    assert "/asr/flash/v1/1259220000?" in url
+    assert "secretid=sid" in url
+    headers = captured["headers"]
+    assert headers is not None
+    assert headers["Authorization"]
+    assert headers["Content-Type"] == "application/octet-stream"
+    assert headers["Content-Length"] == str(len(b"fake-audio"))