Init commit

2026-02-17 10:39:23 +08:00
commit 30eb4397c2
56 changed files with 11983 additions and 0 deletions
--- a/.env.example
+++ b/.env.example
@@ -0,0 +1,92 @@
+# -----------------------------------------------------------------------------
+# Engine .env example (safe template)
+# Notes:
+# - Never commit real API keys.
+# - Start with defaults below, then tune from logs.
+# -----------------------------------------------------------------------------
+
+# Server
+HOST=0.0.0.0
+PORT=8000
+# EXTERNAL_IP=1.2.3.4
+
+# Backend bridge (optional)
+BACKEND_URL=http://127.0.0.1:8100
+BACKEND_TIMEOUT_SEC=10
+HISTORY_DEFAULT_USER_ID=1
+
+# Audio
+SAMPLE_RATE=16000
+# 20ms is recommended for VAD stability and latency.
+# 100ms works but usually worsens start-of-speech accuracy.
+CHUNK_SIZE_MS=20
+DEFAULT_CODEC=pcm
+MAX_AUDIO_BUFFER_SECONDS=30
+
+# VAD / EOU
+VAD_TYPE=silero
+VAD_MODEL_PATH=data/vad/silero_vad.onnx
+# Higher = stricter speech detection (fewer false positives, more misses).
+VAD_THRESHOLD=0.5
+# Require this much continuous speech before utterance can be valid.
+VAD_MIN_SPEECH_DURATION_MS=100
+# Silence duration required to finalize one user turn.
+VAD_EOU_THRESHOLD_MS=800
+
+# LLM
+OPENAI_API_KEY=your_openai_api_key_here
+# Optional for OpenAI-compatible providers.
+# OPENAI_API_URL=https://api.openai.com/v1
+LLM_MODEL=gpt-4o-mini
+LLM_TEMPERATURE=0.7
+
+# TTS
+# edge: no API key needed
+# openai_compatible: compatible with SiliconFlow-style endpoints
+TTS_PROVIDER=openai_compatible
+TTS_VOICE=anna
+TTS_SPEED=1.0
+
+# SiliconFlow (used by TTS and/or ASR when provider=openai_compatible)
+SILICONFLOW_API_KEY=your_siliconflow_api_key_here
+SILICONFLOW_TTS_MODEL=FunAudioLLM/CosyVoice2-0.5B
+SILICONFLOW_ASR_MODEL=FunAudioLLM/SenseVoiceSmall
+
+# ASR
+ASR_PROVIDER=openai_compatible
+# Interim cadence and minimum audio before interim decode.
+ASR_INTERIM_INTERVAL_MS=500
+ASR_MIN_AUDIO_MS=300
+# ASR start gate: ignore micro-noise, then commit to one turn once started.
+ASR_START_MIN_SPEECH_MS=160
+# Pre-roll protects beginning phonemes.
+ASR_PRE_SPEECH_MS=240
+# Tail silence protects ending phonemes.
+ASR_FINAL_TAIL_MS=120
+
+# Duplex behavior
+DUPLEX_ENABLED=true
+# DUPLEX_GREETING=Hello! How can I help you today?
+DUPLEX_SYSTEM_PROMPT=You are a helpful, friendly voice assistant. Keep your responses concise and conversational.
+
+# Barge-in (user interrupting assistant)
+# Min user speech duration needed to interrupt assistant audio.
+BARGE_IN_MIN_DURATION_MS=200
+# Allowed silence during potential barge-in (ms) before reset.
+BARGE_IN_SILENCE_TOLERANCE_MS=60
+
+# Logging
+LOG_LEVEL=INFO
+# json is better for production/observability; text is easier locally.
+LOG_FORMAT=json
+
+# WebSocket behavior
+INACTIVITY_TIMEOUT_SEC=60
+HEARTBEAT_INTERVAL_SEC=50
+WS_PROTOCOL_VERSION=v1
+# WS_API_KEY=replace_with_shared_secret
+WS_REQUIRE_AUTH=false
+
+# CORS / ICE (JSON strings)
+CORS_ORIGINS=["http://localhost:3000","http://localhost:8080"]
+ICE_SERVERS=[{"urls":"stun:stun.l.google.com:19302"}]