Add Volcengine support for TTS and ASR services
- Introduced Volcengine as a new provider for both TTS and ASR services. - Updated configuration files to include Volcengine-specific parameters such as app_id, resource_id, and uid. - Enhanced the ASR service to support streaming mode with Volcengine's API. - Modified existing tests to validate the integration of Volcengine services. - Updated documentation to reflect the addition of Volcengine as a supported provider for TTS and ASR. - Refactored service factory to accommodate Volcengine alongside existing providers.
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
from providers.asr.buffered import BufferedASRService
|
||||
from providers.asr.dashscope import DashScopeRealtimeASRService
|
||||
from providers.asr.openai_compatible import OpenAICompatibleASRService
|
||||
from providers.asr.volcengine import VolcengineRealtimeASRService
|
||||
from providers.factory.default import DefaultRealtimeServiceFactory
|
||||
from runtime.ports import ASRServiceSpec
|
||||
|
||||
@@ -35,6 +36,29 @@ def test_create_asr_service_openai_compatible_returns_offline_provider():
|
||||
assert service.enable_interim is False
|
||||
|
||||
|
||||
def test_create_asr_service_volcengine_returns_streaming_provider():
|
||||
factory = DefaultRealtimeServiceFactory()
|
||||
service = factory.create_asr_service(
|
||||
ASRServiceSpec(
|
||||
provider="volcengine",
|
||||
mode="streaming",
|
||||
sample_rate=16000,
|
||||
api_key="test-key",
|
||||
api_url="wss://openspeech.bytedance.com/api/v3/sauc/bigmodel",
|
||||
model="bigmodel",
|
||||
app_id="app-1",
|
||||
uid="caller-1",
|
||||
request_params={"end_window_size": 800},
|
||||
)
|
||||
)
|
||||
assert isinstance(service, VolcengineRealtimeASRService)
|
||||
assert service.mode == "streaming"
|
||||
assert service.protocol == "seed"
|
||||
assert service.app_id == "app-1"
|
||||
assert service.uid == "caller-1"
|
||||
assert service.request_params["end_window_size"] == 800
|
||||
|
||||
|
||||
def test_create_asr_service_fallback_buffered_for_unsupported_provider():
|
||||
factory = DefaultRealtimeServiceFactory()
|
||||
service = factory.create_asr_service(
|
||||
|
||||
@@ -227,6 +227,62 @@ async def test_with_backend_url_uses_backend_for_assistant_config(monkeypatch, t
|
||||
assert payload["assistant"]["systemPrompt"] == "backend prompt"
|
||||
|
||||
|
||||
def test_translate_agent_schema_maps_volcengine_fields():
|
||||
payload = {
|
||||
"agent": {
|
||||
"tts": {
|
||||
"provider": "volcengine",
|
||||
"api_key": "tts-key",
|
||||
"api_url": "https://openspeech.bytedance.com/api/v3/tts/unidirectional",
|
||||
"app_id": "app-123",
|
||||
"resource_id": "seed-tts-2.0",
|
||||
"uid": "caller-1",
|
||||
"voice": "zh_female_shuangkuaisisi_moon_bigtts",
|
||||
"speed": 1.1,
|
||||
},
|
||||
"asr": {
|
||||
"provider": "volcengine",
|
||||
"api_key": "asr-key",
|
||||
"api_url": "wss://openspeech.bytedance.com/api/v3/sauc/bigmodel",
|
||||
"model": "bigmodel",
|
||||
"app_id": "app-123",
|
||||
"resource_id": "volc.bigasr.sauc.duration",
|
||||
"uid": "caller-1",
|
||||
"request_params": {
|
||||
"end_window_size": 800,
|
||||
"force_to_speech_time": 1000,
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
translated = LocalYamlAssistantConfigAdapter._translate_agent_schema("assistant_demo", payload)
|
||||
assert translated is not None
|
||||
assert translated["services"]["tts"] == {
|
||||
"provider": "volcengine",
|
||||
"apiKey": "tts-key",
|
||||
"baseUrl": "https://openspeech.bytedance.com/api/v3/tts/unidirectional",
|
||||
"voice": "zh_female_shuangkuaisisi_moon_bigtts",
|
||||
"appId": "app-123",
|
||||
"resourceId": "seed-tts-2.0",
|
||||
"uid": "caller-1",
|
||||
"speed": 1.1,
|
||||
}
|
||||
assert translated["services"]["asr"] == {
|
||||
"provider": "volcengine",
|
||||
"model": "bigmodel",
|
||||
"apiKey": "asr-key",
|
||||
"baseUrl": "wss://openspeech.bytedance.com/api/v3/sauc/bigmodel",
|
||||
"appId": "app-123",
|
||||
"resourceId": "volc.bigasr.sauc.duration",
|
||||
"uid": "caller-1",
|
||||
"requestParams": {
|
||||
"end_window_size": 800,
|
||||
"force_to_speech_time": 1000,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_backend_mode_disabled_uses_local_assistant_config_even_with_url(monkeypatch, tmp_path):
|
||||
class _FailIfCalledClientSession:
|
||||
|
||||
45
engine/tests/test_tts_factory_modes.py
Normal file
45
engine/tests/test_tts_factory_modes.py
Normal file
@@ -0,0 +1,45 @@
|
||||
from providers.factory.default import DefaultRealtimeServiceFactory
|
||||
from providers.tts.mock import MockTTSService
|
||||
from providers.tts.openai_compatible import OpenAICompatibleTTSService
|
||||
from providers.tts.volcengine import VolcengineTTSService
|
||||
from runtime.ports import TTSServiceSpec
|
||||
|
||||
|
||||
def test_create_tts_service_volcengine_returns_native_provider():
|
||||
factory = DefaultRealtimeServiceFactory()
|
||||
service = factory.create_tts_service(
|
||||
TTSServiceSpec(
|
||||
provider="volcengine",
|
||||
api_key="test-key",
|
||||
app_id="app-1",
|
||||
resource_id="seed-tts-2.0",
|
||||
voice="zh_female_shuangkuaisisi_moon_bigtts",
|
||||
sample_rate=16000,
|
||||
)
|
||||
)
|
||||
assert isinstance(service, VolcengineTTSService)
|
||||
|
||||
|
||||
def test_create_tts_service_openai_compatible_returns_provider():
|
||||
factory = DefaultRealtimeServiceFactory()
|
||||
service = factory.create_tts_service(
|
||||
TTSServiceSpec(
|
||||
provider="openai_compatible",
|
||||
api_key="test-key",
|
||||
voice="anna",
|
||||
sample_rate=16000,
|
||||
)
|
||||
)
|
||||
assert isinstance(service, OpenAICompatibleTTSService)
|
||||
|
||||
|
||||
def test_create_tts_service_fallbacks_to_mock_without_key():
|
||||
factory = DefaultRealtimeServiceFactory()
|
||||
service = factory.create_tts_service(
|
||||
TTSServiceSpec(
|
||||
provider="volcengine",
|
||||
voice="anna",
|
||||
sample_rate=16000,
|
||||
)
|
||||
)
|
||||
assert isinstance(service, MockTTSService)
|
||||
86
engine/tests/test_volcengine_asr_provider.py
Normal file
86
engine/tests/test_volcengine_asr_provider.py
Normal file
@@ -0,0 +1,86 @@
|
||||
import gzip
|
||||
import json
|
||||
|
||||
from providers.asr.volcengine import VolcengineRealtimeASRService
|
||||
|
||||
|
||||
def test_volcengine_seed_protocol_defaults_and_headers():
|
||||
service = VolcengineRealtimeASRService(
|
||||
api_key="access-token",
|
||||
api_url="wss://openspeech.bytedance.com/api/v3/sauc/bigmodel",
|
||||
app_id="app-1",
|
||||
uid="caller-1",
|
||||
)
|
||||
|
||||
assert service.protocol == "seed"
|
||||
assert service.resource_id == "volc.bigasr.sauc.duration"
|
||||
|
||||
headers = service._build_seed_headers("req-1")
|
||||
assert headers == {
|
||||
"X-Api-App-Key": "app-1",
|
||||
"X-Api-Access-Key": "access-token",
|
||||
"X-Api-Resource-Id": "volc.bigasr.sauc.duration",
|
||||
"X-Api-Request-Id": "req-1",
|
||||
}
|
||||
|
||||
|
||||
def test_volcengine_seed_start_payload_merges_request_params():
|
||||
service = VolcengineRealtimeASRService(
|
||||
api_key="access-token",
|
||||
api_url="wss://openspeech.bytedance.com/api/v3/sauc/bigmodel",
|
||||
app_id="app-1",
|
||||
uid="caller-1",
|
||||
language="zh-CN",
|
||||
request_params={
|
||||
"request": {
|
||||
"end_window_size": 800,
|
||||
"force_to_speech_time": 1000,
|
||||
"context": "{\"hotwords\":[{\"word\":\"doubao\"}]}",
|
||||
},
|
||||
"audio": {"codec": "raw"},
|
||||
},
|
||||
)
|
||||
|
||||
payload = service._build_seed_start_payload()
|
||||
assert payload["user"] == {"uid": "caller-1"}
|
||||
assert payload["audio"] == {
|
||||
"format": "pcm",
|
||||
"rate": 16000,
|
||||
"bits": 16,
|
||||
"channels": 1,
|
||||
"codec": "raw",
|
||||
"language": "zh-CN",
|
||||
}
|
||||
assert payload["request"]["model_name"] == "bigmodel"
|
||||
assert payload["request"]["end_window_size"] == 800
|
||||
assert payload["request"]["force_to_speech_time"] == 1000
|
||||
assert payload["request"]["context"] == "{\"hotwords\":[{\"word\":\"doubao\"}]}"
|
||||
|
||||
|
||||
def test_volcengine_seed_start_request_encodes_gzip_json_payload():
|
||||
service = VolcengineRealtimeASRService(
|
||||
api_key="access-token",
|
||||
api_url="wss://openspeech.bytedance.com/api/v3/sauc/bigmodel",
|
||||
app_id="app-1",
|
||||
uid="caller-1",
|
||||
)
|
||||
|
||||
frame = service._build_seed_start_request()
|
||||
assert frame[0] == 0x11
|
||||
assert frame[1] == 0x11
|
||||
|
||||
payload_length = int.from_bytes(frame[8:12], "big")
|
||||
payload = json.loads(gzip.decompress(frame[12 : 12 + payload_length]).decode("utf-8"))
|
||||
assert payload["user"]["uid"] == "caller-1"
|
||||
assert payload["request"]["model_name"] == "bigmodel"
|
||||
|
||||
|
||||
def test_volcengine_gateway_protocol_keeps_model_query():
|
||||
service = VolcengineRealtimeASRService(
|
||||
api_key="access-token",
|
||||
api_url="wss://ai-gateway.vei.volces.com/v1/realtime",
|
||||
model="bigmodel",
|
||||
)
|
||||
|
||||
assert service.protocol == "gateway"
|
||||
assert service.api_url == "wss://ai-gateway.vei.volces.com/v1/realtime?model=bigmodel"
|
||||
Reference in New Issue
Block a user