diff --git a/engine/config/agents/dashscope.yaml b/engine/config/agents/dashscope.yaml new file mode 100644 index 0000000..3491d68 --- /dev/null +++ b/engine/config/agents/dashscope.yaml @@ -0,0 +1,47 @@ +# Agent behavior configuration for DashScope realtime ASR/TTS. +# This file only controls agent-side behavior (VAD/LLM/TTS/ASR providers). +# Infra/server/network settings should stay in .env. + +agent: + vad: + type: silero + model_path: data/vad/silero_vad.onnx + threshold: 0.5 + min_speech_duration_ms: 100 + eou_threshold_ms: 800 + + llm: + # provider: openai | openai_compatible | siliconflow + provider: openai_compatible + model: deepseek-v3 + temperature: 0.7 + api_key: your_llm_api_key + api_url: https://api.qnaigc.com/v1 + + tts: + provider: dashscope + api_key: your_tts_api_key + api_url: wss://dashscope.aliyuncs.com/api-ws/v1/realtime + model: qwen3-tts-flash-realtime + voice: Cherry + dashscope_mode: commit + speed: 1.0 + + asr: + provider: dashscope + api_key: your_asr_api_key + api_url: wss://dashscope.aliyuncs.com/api-ws/v1/realtime + model: qwen3-asr-flash-realtime + interim_interval_ms: 500 + min_audio_ms: 300 + start_min_speech_ms: 160 + pre_speech_ms: 240 + final_tail_ms: 120 + + duplex: + enabled: true + system_prompt: You are a helpful, friendly voice assistant. Keep your responses concise and conversational. + + barge_in: + min_duration_ms: 200 + silence_tolerance_ms: 60 diff --git a/engine/config/agents/volcengine.yaml b/engine/config/agents/volcengine.yaml new file mode 100644 index 0000000..acd66b3 --- /dev/null +++ b/engine/config/agents/volcengine.yaml @@ -0,0 +1,68 @@ +# Agent behavior configuration (safe to edit per profile) +# This file only controls agent-side behavior (VAD/LLM/TTS/ASR providers). +# Infra/server/network settings should stay in .env. + +agent: + vad: + type: silero + model_path: data/vad/silero_vad.onnx + threshold: 0.5 + min_speech_duration_ms: 100 + eou_threshold_ms: 800 + + llm: + # provider: openai | openai_compatible | siliconflow + provider: openai_compatible + model: deepseek-v3 + temperature: 0.7 + # Required: no fallback. You can still reference env explicitly. + api_key: your_llm_api_key + # Optional for OpenAI-compatible endpoints: + api_url: https://api.qnaigc.com/v1 + + tts: + # provider: edge | openai_compatible | siliconflow | dashscope + # dashscope defaults (if omitted): + # api_url: wss://dashscope.aliyuncs.com/api-ws/v1/realtime + # model: qwen3-tts-flash-realtime + # dashscope_mode: commit (engine splits) | server_commit (dashscope splits) + # note: dashscope_mode/mode is ONLY used when provider=dashscope. + # volcengine defaults (if omitted): + provider: volcengine + api_url: https://openspeech.bytedance.com/api/v3/tts/unidirectional + resource_id: seed-tts-2.0 + app_id: your_tts_app_id + api_key: your_tts_api_key + speed: 1.1 + voice: zh_female_vv_uranus_bigtts + + asr: + asr: + provider: volcengine + api_url: wss://openspeech.bytedance.com/api/v3/sauc/bigmodel + app_id: your_asr_app_id + api_key: your_asr_api_key + resource_id: volc.bigasr.sauc.duration + uid: caller-1 + model: bigmodel + request_params: + end_window_size: 800 + force_to_speech_time: 1000 + enable_punc: true + enable_itn: false + enable_ddc: false + show_utterance: true + result_type: single + interim_interval_ms: 500 + min_audio_ms: 300 + start_min_speech_ms: 160 + pre_speech_ms: 240 + final_tail_ms: 120 + + duplex: + enabled: true + system_prompt: 你是一个人工智能助手,你用简答语句回答,避免使用标点符号和emoji。 + + barge_in: + min_duration_ms: 200 + silence_tolerance_ms: 60