diff --git a/engine/.env.example b/engine/.env.example index 0d7ae7f..db4aa5a 100644 --- a/engine/.env.example +++ b/engine/.env.example @@ -1,53 +1,92 @@ -# Server Configuration +# ----------------------------------------------------------------------------- +# Engine .env example (safe template) +# Notes: +# - Never commit real API keys. +# - Start with defaults below, then tune from logs. +# ----------------------------------------------------------------------------- + +# Server HOST=0.0.0.0 PORT=8000 +# EXTERNAL_IP=1.2.3.4 +# Backend bridge (optional) BACKEND_URL=http://127.0.0.1:8100 +BACKEND_TIMEOUT_SEC=10 +HISTORY_DEFAULT_USER_ID=1 -# Audio Configuration +# Audio SAMPLE_RATE=16000 +# 20ms is recommended for VAD stability and latency. +# 100ms works but usually worsens start-of-speech accuracy. CHUNK_SIZE_MS=20 +DEFAULT_CODEC=pcm +MAX_AUDIO_BUFFER_SECONDS=30 -# VAD Configuration +# VAD / EOU +VAD_TYPE=silero +VAD_MODEL_PATH=data/vad/silero_vad.onnx +# Higher = stricter speech detection (fewer false positives, more misses). VAD_THRESHOLD=0.5 -VAD_EOU_THRESHOLD_MS=600 -VAD_MIN_SPEECH_DURATION_MS=160 +# Require this much continuous speech before utterance can be valid. +VAD_MIN_SPEECH_DURATION_MS=100 +# Silence duration required to finalize one user turn. +VAD_EOU_THRESHOLD_MS=800 -# OpenAI / LLM Configuration (required for duplex voice) -OPENAI_API_KEY=sk-fc4d59b360475f53401a864db8ce0985010acc4e696723d20a90d6569f38d80a -OPENAI_API_URL=https://api.qnaigc.com/v1 # Optional: for Azure or compatible APIs -LLM_MODEL=deepseek-v3 +# LLM +OPENAI_API_KEY=your_openai_api_key_here +# Optional for OpenAI-compatible providers. +# OPENAI_API_URL=https://api.openai.com/v1 +LLM_MODEL=gpt-4o-mini LLM_TEMPERATURE=0.7 -# TTS Configuration +# TTS +# edge: no SiliconFlow key needed +# siliconflow: requires SILICONFLOW_API_KEY TTS_PROVIDER=siliconflow TTS_VOICE=anna TTS_SPEED=1.0 -# SiliconFlow Configuration (for TTS and ASR) -SILICONFLOW_API_KEY=sk-thmzysdpqqmhqxxshyqoxvjeiflexjdgaftyufrsgrhpjnyx +# SiliconFlow (used by TTS and/or ASR when provider=siliconflow) +SILICONFLOW_API_KEY=your_siliconflow_api_key_here SILICONFLOW_TTS_MODEL=FunAudioLLM/CosyVoice2-0.5B - -# ASR Configuration -ASR_PROVIDER=siliconflow SILICONFLOW_ASR_MODEL=FunAudioLLM/SenseVoiceSmall + +# ASR +ASR_PROVIDER=siliconflow +# Interim cadence and minimum audio before interim decode. ASR_INTERIM_INTERVAL_MS=500 ASR_MIN_AUDIO_MS=300 +# ASR start gate: ignore micro-noise, then commit to one turn once started. +ASR_START_MIN_SPEECH_MS=160 +# Pre-roll protects beginning phonemes. +ASR_PRE_SPEECH_MS=240 +# Tail silence protects ending phonemes. +ASR_FINAL_TAIL_MS=120 -# Duplex Pipeline Configuration +# Duplex behavior DUPLEX_ENABLED=true # DUPLEX_GREETING=Hello! How can I help you today? DUPLEX_SYSTEM_PROMPT=You are a helpful, friendly voice assistant. Keep your responses concise and conversational. -# Barge-in Configuration -# Minimum speech duration (ms) to trigger interruption - filters out brief noises -# Lower = more sensitive (50-100ms recommended), Higher = filters more noise -BARGE_IN_MIN_DURATION_MS=100 - -ASR_START_MIN_SPEECH_MS=100 -ASR_PRE_SPEECH_MS=320 +# Barge-in (user interrupting assistant) +# Min user speech duration needed to interrupt assistant audio. +BARGE_IN_MIN_DURATION_MS=200 +# Allowed silence during potential barge-in (ms) before reset. +BARGE_IN_SILENCE_TOLERANCE_MS=60 # Logging LOG_LEVEL=INFO -LOG_FORMAT=text +# json is better for production/observability; text is easier locally. +LOG_FORMAT=json +# WebSocket behavior +INACTIVITY_TIMEOUT_SEC=60 +HEARTBEAT_INTERVAL_SEC=50 +WS_PROTOCOL_VERSION=v1 +# WS_API_KEY=replace_with_shared_secret +WS_REQUIRE_AUTH=false + +# CORS / ICE (JSON strings) +CORS_ORIGINS=["http://localhost:3000","http://localhost:8080"] +ICE_SERVERS=[{"urls":"stun:stun.l.google.com:19302"}]