Voice library support dashscope
This commit is contained in:
62
engine/agents/default.yaml
Normal file
62
engine/agents/default.yaml
Normal file
@@ -0,0 +1,62 @@
|
||||
# Agent behavior configuration (safe to edit per profile)
|
||||
# This file only controls agent-side behavior (VAD/LLM/TTS/ASR providers).
|
||||
# Infra/server/network settings should stay in .env.
|
||||
|
||||
agent:
|
||||
vad:
|
||||
type: silero
|
||||
model_path: data/vad/silero_vad.onnx
|
||||
threshold: 0.5
|
||||
min_speech_duration_ms: 100
|
||||
eou_threshold_ms: 800
|
||||
|
||||
llm:
|
||||
# provider: openai | openai_compatible | siliconflow
|
||||
provider: openai_compatible
|
||||
model: deepseek-v3
|
||||
temperature: 0.7
|
||||
# Required: no fallback. You can still reference env explicitly.
|
||||
api_key: sk-fc4d59b360475f53401a864db8ce0985010acc4e696723d20a90d6569f38d80a
|
||||
# Optional for OpenAI-compatible endpoints:
|
||||
api_url: https://api.qnaigc.com/v1
|
||||
|
||||
tts:
|
||||
# provider: edge | openai_compatible | siliconflow | dashscope
|
||||
# dashscope defaults (if omitted):
|
||||
provider: dashscope
|
||||
api_url: wss://dashscope.aliyuncs.com/api-ws/v1/realtime
|
||||
model: qwen3-tts-flash-realtime
|
||||
api_key: sk-391f5126d18345d497c6e8717c8c9ad7
|
||||
mode: commit
|
||||
voice: Cherry
|
||||
speed: 1.0
|
||||
# provider: openai_compatible
|
||||
# api_key: sk-thmzysdpqqmhqxxshyqoxvjeiflexjdgaftyufrsgrhpjnyx
|
||||
# api_url: https://api.siliconflow.cn/v1/audio/speech
|
||||
# model: FunAudioLLM/CosyVoice2-0.5B
|
||||
# voice: anna
|
||||
# speed: 1.0
|
||||
|
||||
asr:
|
||||
# provider: buffered | openai_compatible | siliconflow
|
||||
provider: openai_compatible
|
||||
api_key: sk-thmzysdpqqmhqxxshyqoxvjeiflexjdgaftyufrsgrhpjnyx
|
||||
api_url: https://api.siliconflow.cn/v1/audio/transcriptions
|
||||
model: FunAudioLLM/SenseVoiceSmall
|
||||
interim_interval_ms: 500
|
||||
min_audio_ms: 300
|
||||
start_min_speech_ms: 160
|
||||
pre_speech_ms: 240
|
||||
final_tail_ms: 120
|
||||
|
||||
duplex:
|
||||
enabled: true
|
||||
system_prompt: You are a helpful, friendly voice assistant. Keep your responses concise and conversational.
|
||||
|
||||
barge_in:
|
||||
min_duration_ms: 200
|
||||
silence_tolerance_ms: 60
|
||||
|
||||
tools:
|
||||
- calculator
|
||||
- current_time
|
||||
55
engine/agents/example.yaml
Normal file
55
engine/agents/example.yaml
Normal file
@@ -0,0 +1,55 @@
|
||||
# Agent behavior configuration (safe to edit per profile)
|
||||
# This file only controls agent-side behavior (VAD/LLM/TTS/ASR providers).
|
||||
# Infra/server/network settings should stay in .env.
|
||||
|
||||
agent:
|
||||
vad:
|
||||
type: silero
|
||||
model_path: data/vad/silero_vad.onnx
|
||||
threshold: 0.5
|
||||
min_speech_duration_ms: 100
|
||||
eou_threshold_ms: 800
|
||||
|
||||
llm:
|
||||
# provider: openai | openai_compatible | siliconflow
|
||||
provider: openai_compatible
|
||||
model: deepseek-v3
|
||||
temperature: 0.7
|
||||
# Required: no fallback. You can still reference env explicitly.
|
||||
api_key: your_llm_api_key
|
||||
# Optional for OpenAI-compatible endpoints:
|
||||
api_url: https://api.qnaigc.com/v1
|
||||
|
||||
tts:
|
||||
# provider: edge | openai_compatible | siliconflow | dashscope
|
||||
# dashscope defaults (if omitted):
|
||||
# api_url: wss://dashscope.aliyuncs.com/api-ws/v1/realtime
|
||||
# model: qwen3-tts-flash-realtime
|
||||
# dashscope_mode: commit (engine splits) | server_commit (dashscope splits)
|
||||
# note: dashscope_mode/mode is ONLY used when provider=dashscope.
|
||||
provider: openai_compatible
|
||||
api_key: your_tts_api_key
|
||||
api_url: https://api.siliconflow.cn/v1/audio/speech
|
||||
model: FunAudioLLM/CosyVoice2-0.5B
|
||||
voice: anna
|
||||
speed: 1.0
|
||||
|
||||
asr:
|
||||
# provider: buffered | openai_compatible | siliconflow
|
||||
provider: openai_compatible
|
||||
api_key: you_asr_api_key
|
||||
api_url: https://api.siliconflow.cn/v1/audio/transcriptions
|
||||
model: FunAudioLLM/SenseVoiceSmall
|
||||
interim_interval_ms: 500
|
||||
min_audio_ms: 300
|
||||
start_min_speech_ms: 160
|
||||
pre_speech_ms: 240
|
||||
final_tail_ms: 120
|
||||
|
||||
duplex:
|
||||
enabled: true
|
||||
system_prompt: You are a helpful, friendly voice assistant. Keep your responses concise and conversational.
|
||||
|
||||
barge_in:
|
||||
min_duration_ms: 200
|
||||
silence_tolerance_ms: 60
|
||||
78
engine/agents/tools.yaml
Normal file
78
engine/agents/tools.yaml
Normal file
@@ -0,0 +1,78 @@
|
||||
# Agent behavior configuration with tool declarations.
|
||||
# This profile is an example only.
|
||||
|
||||
agent:
|
||||
vad:
|
||||
type: silero
|
||||
model_path: data/vad/silero_vad.onnx
|
||||
threshold: 0.5
|
||||
min_speech_duration_ms: 100
|
||||
eou_threshold_ms: 800
|
||||
|
||||
llm:
|
||||
# provider: openai | openai_compatible | siliconflow
|
||||
provider: openai_compatible
|
||||
model: deepseek-v3
|
||||
temperature: 0.7
|
||||
api_key: your_llm_api_key
|
||||
api_url: https://api.qnaigc.com/v1
|
||||
|
||||
tts:
|
||||
# provider: edge | openai_compatible | siliconflow | dashscope
|
||||
# dashscope defaults (if omitted):
|
||||
# api_url: wss://dashscope.aliyuncs.com/api-ws/v1/realtime
|
||||
# model: qwen3-tts-flash-realtime
|
||||
# dashscope_mode: commit (engine splits) | server_commit (dashscope splits)
|
||||
# note: dashscope_mode/mode is ONLY used when provider=dashscope.
|
||||
provider: openai_compatible
|
||||
api_key: your_tts_api_key
|
||||
api_url: https://api.siliconflow.cn/v1/audio/speech
|
||||
model: FunAudioLLM/CosyVoice2-0.5B
|
||||
voice: anna
|
||||
speed: 1.0
|
||||
|
||||
asr:
|
||||
# provider: buffered | openai_compatible | siliconflow
|
||||
provider: openai_compatible
|
||||
api_key: your_asr_api_key
|
||||
api_url: https://api.siliconflow.cn/v1/audio/transcriptions
|
||||
model: FunAudioLLM/SenseVoiceSmall
|
||||
interim_interval_ms: 500
|
||||
min_audio_ms: 300
|
||||
start_min_speech_ms: 160
|
||||
pre_speech_ms: 240
|
||||
final_tail_ms: 120
|
||||
|
||||
duplex:
|
||||
enabled: true
|
||||
system_prompt: You are a helpful voice assistant with tool-calling support.
|
||||
|
||||
barge_in:
|
||||
min_duration_ms: 200
|
||||
silence_tolerance_ms: 60
|
||||
|
||||
# Tool declarations consumed by the engine at startup.
|
||||
# - String form enables built-in/default tool schema when available.
|
||||
# - Object form provides OpenAI function schema + executor hint.
|
||||
tools:
|
||||
- current_time
|
||||
- calculator
|
||||
- name: weather
|
||||
description: Get weather by city name.
|
||||
parameters:
|
||||
type: object
|
||||
properties:
|
||||
city:
|
||||
type: string
|
||||
description: City name, for example "San Francisco".
|
||||
required: [city]
|
||||
executor: server
|
||||
- name: open_map
|
||||
description: Open map app on the client device.
|
||||
parameters:
|
||||
type: object
|
||||
properties:
|
||||
query:
|
||||
type: string
|
||||
required: [query]
|
||||
executor: client
|
||||
Reference in New Issue
Block a user