Add Volcengine support for TTS and ASR services
- Introduced Volcengine as a new provider for both TTS and ASR services. - Updated configuration files to include Volcengine-specific parameters such as app_id, resource_id, and uid. - Enhanced the ASR service to support streaming mode with Volcengine's API. - Modified existing tests to validate the integration of Volcengine services. - Updated documentation to reflect the addition of Volcengine as a supported provider for TTS and ASR. - Refactored service factory to accommodate Volcengine alongside existing providers.
This commit is contained in:
@@ -17,14 +17,17 @@ from runtime.ports import (
|
||||
)
|
||||
from providers.asr.buffered import BufferedASRService
|
||||
from providers.asr.dashscope import DashScopeRealtimeASRService
|
||||
from providers.asr.volcengine import VolcengineRealtimeASRService
|
||||
from providers.tts.dashscope import DashScopeTTSService
|
||||
from providers.llm.openai import MockLLMService, OpenAILLMService
|
||||
from providers.asr.openai_compatible import OpenAICompatibleASRService
|
||||
from providers.tts.openai_compatible import OpenAICompatibleTTSService
|
||||
from providers.tts.mock import MockTTSService
|
||||
from providers.tts.volcengine import VolcengineTTSService
|
||||
|
||||
_OPENAI_COMPATIBLE_PROVIDERS = {"openai_compatible", "openai-compatible", "siliconflow"}
|
||||
_DASHSCOPE_PROVIDERS = {"dashscope"}
|
||||
_VOLCENGINE_PROVIDERS = {"volcengine"}
|
||||
_SUPPORTED_LLM_PROVIDERS = {"openai", *_OPENAI_COMPATIBLE_PROVIDERS}
|
||||
|
||||
|
||||
@@ -37,6 +40,10 @@ class DefaultRealtimeServiceFactory(RealtimeServiceFactory):
|
||||
_DEFAULT_DASHSCOPE_ASR_MODEL = "qwen3-asr-flash-realtime"
|
||||
_DEFAULT_OPENAI_COMPATIBLE_TTS_MODEL = "FunAudioLLM/CosyVoice2-0.5B"
|
||||
_DEFAULT_OPENAI_COMPATIBLE_ASR_MODEL = "FunAudioLLM/SenseVoiceSmall"
|
||||
_DEFAULT_VOLCENGINE_TTS_URL = "https://openspeech.bytedance.com/api/v3/tts/unidirectional"
|
||||
_DEFAULT_VOLCENGINE_TTS_RESOURCE_ID = "seed-tts-2.0"
|
||||
_DEFAULT_VOLCENGINE_ASR_REALTIME_URL = "wss://openspeech.bytedance.com/api/v3/sauc/bigmodel"
|
||||
_DEFAULT_VOLCENGINE_ASR_MODEL = "bigmodel"
|
||||
|
||||
@staticmethod
|
||||
def _normalize_provider(provider: Any) -> str:
|
||||
@@ -81,6 +88,19 @@ class DefaultRealtimeServiceFactory(RealtimeServiceFactory):
|
||||
speed=spec.speed,
|
||||
)
|
||||
|
||||
if provider in _VOLCENGINE_PROVIDERS and spec.api_key:
|
||||
return VolcengineTTSService(
|
||||
api_key=spec.api_key,
|
||||
api_url=spec.api_url or self._DEFAULT_VOLCENGINE_TTS_URL,
|
||||
voice=spec.voice,
|
||||
model=spec.model,
|
||||
app_id=spec.app_id,
|
||||
resource_id=spec.resource_id or self._DEFAULT_VOLCENGINE_TTS_RESOURCE_ID,
|
||||
uid=spec.uid,
|
||||
sample_rate=spec.sample_rate,
|
||||
speed=spec.speed,
|
||||
)
|
||||
|
||||
if provider in _OPENAI_COMPATIBLE_PROVIDERS and spec.api_key:
|
||||
return OpenAICompatibleTTSService(
|
||||
api_key=spec.api_key,
|
||||
@@ -110,6 +130,20 @@ class DefaultRealtimeServiceFactory(RealtimeServiceFactory):
|
||||
on_transcript=spec.on_transcript,
|
||||
)
|
||||
|
||||
if provider in _VOLCENGINE_PROVIDERS and spec.api_key:
|
||||
return VolcengineRealtimeASRService(
|
||||
api_key=spec.api_key,
|
||||
api_url=spec.api_url or self._DEFAULT_VOLCENGINE_ASR_REALTIME_URL,
|
||||
model=spec.model or self._DEFAULT_VOLCENGINE_ASR_MODEL,
|
||||
sample_rate=spec.sample_rate,
|
||||
language=spec.language,
|
||||
app_id=spec.app_id,
|
||||
resource_id=spec.resource_id,
|
||||
uid=spec.uid,
|
||||
request_params=spec.request_params,
|
||||
on_transcript=spec.on_transcript,
|
||||
)
|
||||
|
||||
if provider in _OPENAI_COMPATIBLE_PROVIDERS and spec.api_key:
|
||||
return OpenAICompatibleASRService(
|
||||
api_key=spec.api_key,
|
||||
|
||||
Reference in New Issue
Block a user