Add Volcengine support for TTS and ASR services

- Introduced Volcengine as a new provider for both TTS and ASR services.
- Updated configuration files to include Volcengine-specific parameters such as app_id, resource_id, and uid.
- Enhanced the ASR service to support streaming mode with Volcengine's API.
- Modified existing tests to validate the integration of Volcengine services.
- Updated documentation to reflect the addition of Volcengine as a supported provider for TTS and ASR.
- Refactored service factory to accommodate Volcengine alongside existing providers.
This commit is contained in:
Xin Wang
2026-03-08 23:09:50 +08:00
parent 3604db21eb
commit aeeeee20d1
18 changed files with 1256 additions and 12 deletions

View File

@@ -0,0 +1,86 @@
import gzip
import json
from providers.asr.volcengine import VolcengineRealtimeASRService
def test_volcengine_seed_protocol_defaults_and_headers():
service = VolcengineRealtimeASRService(
api_key="access-token",
api_url="wss://openspeech.bytedance.com/api/v3/sauc/bigmodel",
app_id="app-1",
uid="caller-1",
)
assert service.protocol == "seed"
assert service.resource_id == "volc.bigasr.sauc.duration"
headers = service._build_seed_headers("req-1")
assert headers == {
"X-Api-App-Key": "app-1",
"X-Api-Access-Key": "access-token",
"X-Api-Resource-Id": "volc.bigasr.sauc.duration",
"X-Api-Request-Id": "req-1",
}
def test_volcengine_seed_start_payload_merges_request_params():
service = VolcengineRealtimeASRService(
api_key="access-token",
api_url="wss://openspeech.bytedance.com/api/v3/sauc/bigmodel",
app_id="app-1",
uid="caller-1",
language="zh-CN",
request_params={
"request": {
"end_window_size": 800,
"force_to_speech_time": 1000,
"context": "{\"hotwords\":[{\"word\":\"doubao\"}]}",
},
"audio": {"codec": "raw"},
},
)
payload = service._build_seed_start_payload()
assert payload["user"] == {"uid": "caller-1"}
assert payload["audio"] == {
"format": "pcm",
"rate": 16000,
"bits": 16,
"channels": 1,
"codec": "raw",
"language": "zh-CN",
}
assert payload["request"]["model_name"] == "bigmodel"
assert payload["request"]["end_window_size"] == 800
assert payload["request"]["force_to_speech_time"] == 1000
assert payload["request"]["context"] == "{\"hotwords\":[{\"word\":\"doubao\"}]}"
def test_volcengine_seed_start_request_encodes_gzip_json_payload():
service = VolcengineRealtimeASRService(
api_key="access-token",
api_url="wss://openspeech.bytedance.com/api/v3/sauc/bigmodel",
app_id="app-1",
uid="caller-1",
)
frame = service._build_seed_start_request()
assert frame[0] == 0x11
assert frame[1] == 0x11
payload_length = int.from_bytes(frame[8:12], "big")
payload = json.loads(gzip.decompress(frame[12 : 12 + payload_length]).decode("utf-8"))
assert payload["user"]["uid"] == "caller-1"
assert payload["request"]["model_name"] == "bigmodel"
def test_volcengine_gateway_protocol_keeps_model_query():
service = VolcengineRealtimeASRService(
api_key="access-token",
api_url="wss://ai-gateway.vei.volces.com/v1/realtime",
model="bigmodel",
)
assert service.protocol == "gateway"
assert service.api_url == "wss://ai-gateway.vei.volces.com/v1/realtime?model=bigmodel"