Add Volcengine support for TTS and ASR services

- Introduced Volcengine as a new provider for both TTS and ASR services.
- Updated configuration files to include Volcengine-specific parameters such as app_id, resource_id, and uid.
- Enhanced the ASR service to support streaming mode with Volcengine's API.
- Modified existing tests to validate the integration of Volcengine services.
- Updated documentation to reflect the addition of Volcengine as a supported provider for TTS and ASR.
- Refactored service factory to accommodate Volcengine alongside existing providers.
This commit is contained in:
Xin Wang
2026-03-08 23:09:50 +08:00
parent 3604db21eb
commit aeeeee20d1
18 changed files with 1256 additions and 12 deletions

View File

@@ -793,6 +793,23 @@ class DuplexPipeline:
return False
return None
@staticmethod
def _coerce_json_object(value: Any) -> Optional[Dict[str, Any]]:
if isinstance(value, dict):
return dict(value)
if isinstance(value, str):
raw = value.strip()
if not raw:
return None
try:
parsed = json.loads(raw)
except json.JSONDecodeError:
logger.warning("Ignoring invalid JSON object config: {}", raw[:120])
return None
if isinstance(parsed, dict):
return parsed
return None
@staticmethod
def _is_dashscope_tts_provider(provider: Any) -> bool:
normalized = str(provider or "").strip().lower()
@@ -804,7 +821,7 @@ class DuplexPipeline:
if normalized_mode in {"offline", "streaming"}:
return normalized_mode # type: ignore[return-value]
normalized_provider = str(provider or "").strip().lower()
if normalized_provider == "dashscope":
if normalized_provider in {"dashscope", "volcengine"}:
return "streaming"
return "offline"
@@ -963,6 +980,10 @@ class DuplexPipeline:
tts_api_url = self._runtime_tts.get("baseUrl") or settings.tts_api_url
tts_voice = self._runtime_tts.get("voice") or settings.tts_voice
tts_model = self._runtime_tts.get("model") or settings.tts_model
tts_app_id = self._runtime_tts.get("appId") or settings.tts_app_id
tts_resource_id = self._runtime_tts.get("resourceId") or settings.tts_resource_id
tts_cluster = self._runtime_tts.get("cluster") or settings.tts_cluster
tts_uid = self._runtime_tts.get("uid") or settings.tts_uid
tts_speed = float(self._runtime_tts.get("speed") or settings.tts_speed)
tts_mode = self._resolved_dashscope_tts_mode()
runtime_mode = str(self._runtime_tts.get("mode") or "").strip()
@@ -978,6 +999,10 @@ class DuplexPipeline:
api_url=str(tts_api_url).strip() if tts_api_url else None,
voice=str(tts_voice),
model=str(tts_model).strip() if tts_model else None,
app_id=str(tts_app_id).strip() if tts_app_id else None,
resource_id=str(tts_resource_id).strip() if tts_resource_id else None,
cluster=str(tts_cluster).strip() if tts_cluster else None,
uid=str(tts_uid).strip() if tts_uid else None,
sample_rate=settings.sample_rate,
speed=tts_speed,
mode=str(tts_mode),
@@ -1006,6 +1031,13 @@ class DuplexPipeline:
asr_api_key = self._runtime_asr.get("apiKey")
asr_api_url = self._runtime_asr.get("baseUrl") or settings.asr_api_url
asr_model = self._runtime_asr.get("model") or settings.asr_model
asr_app_id = self._runtime_asr.get("appId") or settings.asr_app_id
asr_resource_id = self._runtime_asr.get("resourceId") or settings.asr_resource_id
asr_cluster = self._runtime_asr.get("cluster") or settings.asr_cluster
asr_uid = self._runtime_asr.get("uid") or settings.asr_uid
asr_request_params = self._coerce_json_object(self._runtime_asr.get("requestParams"))
if asr_request_params is None:
asr_request_params = self._coerce_json_object(settings.asr_request_params_json)
asr_enable_interim = self._coerce_bool(self._runtime_asr.get("enableInterim"))
if asr_enable_interim is None:
asr_enable_interim = bool(settings.asr_enable_interim)
@@ -1022,6 +1054,11 @@ class DuplexPipeline:
api_key=str(asr_api_key).strip() if asr_api_key else None,
api_url=str(asr_api_url).strip() if asr_api_url else None,
model=str(asr_model).strip() if asr_model else None,
app_id=str(asr_app_id).strip() if asr_app_id else None,
resource_id=str(asr_resource_id).strip() if asr_resource_id else None,
cluster=str(asr_cluster).strip() if asr_cluster else None,
uid=str(asr_uid).strip() if asr_uid else None,
request_params=asr_request_params,
enable_interim=asr_enable_interim,
interim_interval_ms=asr_interim_interval,
min_audio_for_interim_ms=asr_min_audio_ms,