Organize config

This commit is contained in:
Xin Wang
2026-02-25 15:52:55 +08:00
parent 2b2193557d
commit 8b9064f6e6
12 changed files with 1248 additions and 92 deletions

View File

@@ -23,57 +23,21 @@ CHUNK_SIZE_MS=20
DEFAULT_CODEC=pcm
MAX_AUDIO_BUFFER_SECONDS=30
# VAD / EOU
VAD_TYPE=silero
VAD_MODEL_PATH=data/vad/silero_vad.onnx
# Higher = stricter speech detection (fewer false positives, more misses).
VAD_THRESHOLD=0.5
# Require this much continuous speech before utterance can be valid.
VAD_MIN_SPEECH_DURATION_MS=100
# Silence duration required to finalize one user turn.
VAD_EOU_THRESHOLD_MS=800
# Agent profile selection (optional fallback when CLI args are not used)
# Prefer CLI:
# python -m app.main --agent-config config/agents/default.yaml
# python -m app.main --agent-profile default
# AGENT_CONFIG_PATH=config/agents/default.yaml
# AGENT_PROFILE=default
AGENT_CONFIG_DIR=config/agents
# LLM
OPENAI_API_KEY=your_openai_api_key_here
# Optional for OpenAI-compatible providers.
# OPENAI_API_URL=https://api.openai.com/v1
LLM_MODEL=gpt-4o-mini
LLM_TEMPERATURE=0.7
# TTS
# edge: no API key needed
# openai_compatible: compatible with SiliconFlow-style endpoints
TTS_PROVIDER=openai_compatible
TTS_VOICE=anna
TTS_SPEED=1.0
# SiliconFlow (used by TTS and/or ASR when provider=openai_compatible)
SILICONFLOW_API_KEY=your_siliconflow_api_key_here
SILICONFLOW_TTS_MODEL=FunAudioLLM/CosyVoice2-0.5B
SILICONFLOW_ASR_MODEL=FunAudioLLM/SenseVoiceSmall
# ASR
ASR_PROVIDER=openai_compatible
# Interim cadence and minimum audio before interim decode.
ASR_INTERIM_INTERVAL_MS=500
ASR_MIN_AUDIO_MS=300
# ASR start gate: ignore micro-noise, then commit to one turn once started.
ASR_START_MIN_SPEECH_MS=160
# Pre-roll protects beginning phonemes.
ASR_PRE_SPEECH_MS=240
# Tail silence protects ending phonemes.
ASR_FINAL_TAIL_MS=120
# Duplex behavior
DUPLEX_ENABLED=true
# DUPLEX_GREETING=Hello! How can I help you today?
DUPLEX_SYSTEM_PROMPT=You are a helpful, friendly voice assistant. Keep your responses concise and conversational.
# Barge-in (user interrupting assistant)
# Min user speech duration needed to interrupt assistant audio.
BARGE_IN_MIN_DURATION_MS=200
# Allowed silence during potential barge-in (ms) before reset.
BARGE_IN_SILENCE_TOLERANCE_MS=60
# Optional: provider credentials referenced from YAML, e.g. ${LLM_API_KEY}
# LLM_API_KEY=your_llm_api_key_here
# LLM_API_URL=https://api.openai.com/v1
# TTS_API_KEY=your_tts_api_key_here
# TTS_API_URL=https://api.example.com/v1/audio/speech
# ASR_API_KEY=your_asr_api_key_here
# ASR_API_URL=https://api.example.com/v1/audio/transcriptions
# Logging
LOG_LEVEL=INFO