Add DashScope agent configuration files for VAD, LLM, TTS, and ASR services

- Introduced new YAML configuration files for DashScope, detailing agent behavior settings for VAD, LLM, TTS, and ASR.
- Configured parameters including model paths, API keys, and service URLs for real-time processing.
- Ensured compatibility with existing agent-side behavior management while providing specific settings for DashScope integration.
This commit is contained in:
Xin Wang
2026-03-08 23:28:08 +08:00
parent aeeeee20d1
commit e41d34fe23
2 changed files with 115 additions and 0 deletions

View File

@@ -0,0 +1,68 @@
# Agent behavior configuration (safe to edit per profile)
# This file only controls agent-side behavior (VAD/LLM/TTS/ASR providers).
# Infra/server/network settings should stay in .env.
agent:
vad:
type: silero
model_path: data/vad/silero_vad.onnx
threshold: 0.5
min_speech_duration_ms: 100
eou_threshold_ms: 800
llm:
# provider: openai | openai_compatible | siliconflow
provider: openai_compatible
model: deepseek-v3
temperature: 0.7
# Required: no fallback. You can still reference env explicitly.
api_key: your_llm_api_key
# Optional for OpenAI-compatible endpoints:
api_url: https://api.qnaigc.com/v1
tts:
# provider: edge | openai_compatible | siliconflow | dashscope
# dashscope defaults (if omitted):
# api_url: wss://dashscope.aliyuncs.com/api-ws/v1/realtime
# model: qwen3-tts-flash-realtime
# dashscope_mode: commit (engine splits) | server_commit (dashscope splits)
# note: dashscope_mode/mode is ONLY used when provider=dashscope.
# volcengine defaults (if omitted):
provider: volcengine
api_url: https://openspeech.bytedance.com/api/v3/tts/unidirectional
resource_id: seed-tts-2.0
app_id: your_tts_app_id
api_key: your_tts_api_key
speed: 1.1
voice: zh_female_vv_uranus_bigtts
asr:
asr:
provider: volcengine
api_url: wss://openspeech.bytedance.com/api/v3/sauc/bigmodel
app_id: your_asr_app_id
api_key: your_asr_api_key
resource_id: volc.bigasr.sauc.duration
uid: caller-1
model: bigmodel
request_params:
end_window_size: 800
force_to_speech_time: 1000
enable_punc: true
enable_itn: false
enable_ddc: false
show_utterance: true
result_type: single
interim_interval_ms: 500
min_audio_ms: 300
start_min_speech_ms: 160
pre_speech_ms: 240
final_tail_ms: 120
duplex:
enabled: true
system_prompt: 你是一个人工智能助手你用简答语句回答避免使用标点符号和emoji。
barge_in:
min_duration_ms: 200
silence_tolerance_ms: 60