93 lines
2.7 KiB
Plaintext
93 lines
2.7 KiB
Plaintext
# -----------------------------------------------------------------------------
|
|
# Engine .env example (safe template)
|
|
# Notes:
|
|
# - Never commit real API keys.
|
|
# - Start with defaults below, then tune from logs.
|
|
# -----------------------------------------------------------------------------
|
|
|
|
# Server
|
|
HOST=0.0.0.0
|
|
PORT=8000
|
|
# EXTERNAL_IP=1.2.3.4
|
|
|
|
# Backend bridge (optional)
|
|
BACKEND_URL=http://127.0.0.1:8100
|
|
BACKEND_TIMEOUT_SEC=10
|
|
HISTORY_DEFAULT_USER_ID=1
|
|
|
|
# Audio
|
|
SAMPLE_RATE=16000
|
|
# 20ms is recommended for VAD stability and latency.
|
|
# 100ms works but usually worsens start-of-speech accuracy.
|
|
CHUNK_SIZE_MS=20
|
|
DEFAULT_CODEC=pcm
|
|
MAX_AUDIO_BUFFER_SECONDS=30
|
|
|
|
# VAD / EOU
|
|
VAD_TYPE=silero
|
|
VAD_MODEL_PATH=data/vad/silero_vad.onnx
|
|
# Higher = stricter speech detection (fewer false positives, more misses).
|
|
VAD_THRESHOLD=0.5
|
|
# Require this much continuous speech before utterance can be valid.
|
|
VAD_MIN_SPEECH_DURATION_MS=100
|
|
# Silence duration required to finalize one user turn.
|
|
VAD_EOU_THRESHOLD_MS=800
|
|
|
|
# LLM
|
|
OPENAI_API_KEY=your_openai_api_key_here
|
|
# Optional for OpenAI-compatible providers.
|
|
# OPENAI_API_URL=https://api.openai.com/v1
|
|
LLM_MODEL=gpt-4o-mini
|
|
LLM_TEMPERATURE=0.7
|
|
|
|
# TTS
|
|
# edge: no SiliconFlow key needed
|
|
# siliconflow: requires SILICONFLOW_API_KEY
|
|
TTS_PROVIDER=siliconflow
|
|
TTS_VOICE=anna
|
|
TTS_SPEED=1.0
|
|
|
|
# SiliconFlow (used by TTS and/or ASR when provider=siliconflow)
|
|
SILICONFLOW_API_KEY=your_siliconflow_api_key_here
|
|
SILICONFLOW_TTS_MODEL=FunAudioLLM/CosyVoice2-0.5B
|
|
SILICONFLOW_ASR_MODEL=FunAudioLLM/SenseVoiceSmall
|
|
|
|
# ASR
|
|
ASR_PROVIDER=siliconflow
|
|
# Interim cadence and minimum audio before interim decode.
|
|
ASR_INTERIM_INTERVAL_MS=500
|
|
ASR_MIN_AUDIO_MS=300
|
|
# ASR start gate: ignore micro-noise, then commit to one turn once started.
|
|
ASR_START_MIN_SPEECH_MS=160
|
|
# Pre-roll protects beginning phonemes.
|
|
ASR_PRE_SPEECH_MS=240
|
|
# Tail silence protects ending phonemes.
|
|
ASR_FINAL_TAIL_MS=120
|
|
|
|
# Duplex behavior
|
|
DUPLEX_ENABLED=true
|
|
# DUPLEX_GREETING=Hello! How can I help you today?
|
|
DUPLEX_SYSTEM_PROMPT=You are a helpful, friendly voice assistant. Keep your responses concise and conversational.
|
|
|
|
# Barge-in (user interrupting assistant)
|
|
# Min user speech duration needed to interrupt assistant audio.
|
|
BARGE_IN_MIN_DURATION_MS=200
|
|
# Allowed silence during potential barge-in (ms) before reset.
|
|
BARGE_IN_SILENCE_TOLERANCE_MS=60
|
|
|
|
# Logging
|
|
LOG_LEVEL=INFO
|
|
# json is better for production/observability; text is easier locally.
|
|
LOG_FORMAT=json
|
|
|
|
# WebSocket behavior
|
|
INACTIVITY_TIMEOUT_SEC=60
|
|
HEARTBEAT_INTERVAL_SEC=50
|
|
WS_PROTOCOL_VERSION=v1
|
|
# WS_API_KEY=replace_with_shared_secret
|
|
WS_REQUIRE_AUTH=false
|
|
|
|
# CORS / ICE (JSON strings)
|
|
CORS_ORIGINS=["http://localhost:3000","http://localhost:8080"]
|
|
ICE_SERVERS=[{"urls":"stun:stun.l.google.com:19302"}]
|