Add Volcengine support for TTS and ASR services

- Introduced Volcengine as a new provider for both TTS and ASR services. - Updated configuration files to include Volcengine-specific parameters such as app_id, resource_id, and uid. - Enhanced the ASR service to support streaming mode with Volcengine's API. - Modified existing tests to validate the integration of Volcengine services. - Updated documentation to reflect the addition of Volcengine as a supported provider for TTS and ASR. - Refactored service factory to accommodate Volcengine alongside existing providers.
2026-03-08 23:09:50 +08:00
parent 3604db21eb
commit aeeeee20d1
18 changed files with 1256 additions and 12 deletions
--- a/engine/config/agents/tools.yaml
+++ b/engine/config/agents/tools.yaml
@@ -18,12 +18,17 @@ agent:
    api_url: https://api.qnaigc.com/v1

  tts:
-    # provider: openai_compatible | siliconflow | dashscope
+    # provider: openai_compatible | siliconflow | dashscope | volcengine
    # dashscope defaults (if omitted):
    #   api_url: wss://dashscope.aliyuncs.com/api-ws/v1/realtime
    #   model: qwen3-tts-flash-realtime
    #   dashscope_mode: commit (engine splits) | server_commit (dashscope splits)
    #   note: dashscope_mode/mode is ONLY used when provider=dashscope.
+    # volcengine defaults (if omitted):
+    #   api_url: https://openspeech.bytedance.com/api/v3/tts/unidirectional
+    #   resource_id: seed-tts-2.0
+    #   app_id: your volcengine app key
+    #   api_key: your volcengine access key
    provider: openai_compatible
    api_key: your_tts_api_key
    api_url: https://api.siliconflow.cn/v1/audio/speech
@@ -32,11 +37,21 @@ agent:
    speed: 1.0

  asr:
-    # provider: buffered | openai_compatible | siliconflow | dashscope
+    # provider: buffered | openai_compatible | siliconflow | dashscope | volcengine
    # dashscope defaults (if omitted):
    #   api_url: wss://dashscope.aliyuncs.com/api-ws/v1/realtime
    #   model: qwen3-asr-flash-realtime
    #   note: dashscope uses streaming ASR mode (chunk-by-chunk).
+    # volcengine defaults (if omitted):
+    #   api_url: wss://openspeech.bytedance.com/api/v3/sauc/bigmodel
+    #   model: bigmodel
+    #   resource_id: volc.bigasr.sauc.duration
+    #   app_id: your volcengine app key
+    #   api_key: your volcengine access key
+    #   request_params:
+    #     end_window_size: 800
+    #     force_to_speech_time: 1000
+    #   note: volcengine uses streaming ASR mode (chunk-by-chunk).
    provider: openai_compatible
    api_key: your_asr_api_key
    api_url: https://api.siliconflow.cn/v1/audio/transcriptions