feat(api): use Tencent ASR flash with 16k_zh_large and dev transcript logs
Replace CreateRecTask polling with recording-file flash API, add TENCENT_APP_ID, remove server-side pydub slicing, and log ASR recognition text at INFO in development. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -1,7 +1,4 @@
|
||||
import asyncio
|
||||
import sys
|
||||
from types import ModuleType, SimpleNamespace
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from app.adapters.asr.tencent_asr import TencentASRProvider
|
||||
@@ -58,46 +55,52 @@ def test_post_commit_reuses_singleton_redis_client(
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tencent_asr_transcribe_uses_to_thread(
|
||||
async def test_tencent_asr_flash_transcribe(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
to_thread_calls: list[tuple[object, tuple[object, ...]]] = []
|
||||
captured: dict[str, object] = {}
|
||||
|
||||
class FakeRequest:
|
||||
EngSerViceType: str | None = None
|
||||
SourceType: int | None = None
|
||||
VoiceFormat: str | None = None
|
||||
Data: str | None = None
|
||||
DataLen: int | None = None
|
||||
class FakeAsyncClient:
|
||||
async def __aenter__(self):
|
||||
return self
|
||||
|
||||
class FakeClient:
|
||||
def SentenceRecognition(self, req: FakeRequest) -> SimpleNamespace:
|
||||
return SimpleNamespace(Result=" 你好,世界 ")
|
||||
async def __aexit__(self, *args):
|
||||
return None
|
||||
|
||||
async def fake_to_thread(fn, *args):
|
||||
to_thread_calls.append((fn, args))
|
||||
return fn(*args)
|
||||
async def post(self, url, *, headers=None, content=None, timeout=None):
|
||||
captured["url"] = url
|
||||
captured["headers"] = headers
|
||||
captured["content"] = content
|
||||
captured["timeout"] = timeout
|
||||
return httpx.Response(
|
||||
200,
|
||||
json={
|
||||
"code": 0,
|
||||
"request_id": "req-1",
|
||||
"flash_result": [{"channel_id": 0, "text": " 你好,世界 "}],
|
||||
},
|
||||
)
|
||||
|
||||
models_module = ModuleType("tencentcloud.asr.v20190614.models")
|
||||
models_module.SentenceRecognitionRequest = FakeRequest
|
||||
package_module = ModuleType("tencentcloud.asr.v20190614")
|
||||
package_module.models = models_module
|
||||
|
||||
monkeypatch.setitem(sys.modules, "tencentcloud.asr.v20190614", package_module)
|
||||
monkeypatch.setattr(asyncio, "to_thread", fake_to_thread)
|
||||
|
||||
provider = TencentASRProvider("sid", "skey", engine_type="16k_zh_large")
|
||||
client = FakeClient()
|
||||
monkeypatch.setattr(provider, "_get_client", lambda: client)
|
||||
monkeypatch.setattr(httpx, "AsyncClient", FakeAsyncClient)
|
||||
|
||||
provider = TencentASRProvider(
|
||||
"sid",
|
||||
"skey",
|
||||
"1259220000",
|
||||
engine_type="16k_zh_large",
|
||||
)
|
||||
text = await provider.transcribe(b"fake-audio", format="m4a")
|
||||
|
||||
assert text == "你好,世界"
|
||||
assert len(to_thread_calls) == 1
|
||||
fn, args = to_thread_calls[0]
|
||||
assert getattr(fn, "__self__", None) is client
|
||||
assert getattr(fn, "__name__", "") == "SentenceRecognition"
|
||||
request = args[0]
|
||||
assert request.EngSerViceType == "16k_zh_large"
|
||||
assert request.VoiceFormat == "m4a"
|
||||
assert request.DataLen == len(b"fake-audio")
|
||||
assert captured["content"] == b"fake-audio"
|
||||
assert captured["timeout"] == 60.0
|
||||
url = str(captured["url"])
|
||||
assert "engine_type=16k_zh_large" in url
|
||||
assert "voice_format=m4a" in url
|
||||
assert "/asr/flash/v1/1259220000?" in url
|
||||
assert "secretid=sid" in url
|
||||
headers = captured["headers"]
|
||||
assert headers is not None
|
||||
assert headers["Authorization"]
|
||||
assert headers["Content-Type"] == "application/octet-stream"
|
||||
assert headers["Content-Length"] == str(len(b"fake-audio"))
|
||||
|
||||
Reference in New Issue
Block a user