"""Configuration management using Pydantic settings.""" from typing import List, Optional from pydantic import Field from pydantic_settings import BaseSettings, SettingsConfigDict import json class Settings(BaseSettings): """Application settings loaded from environment variables.""" model_config = SettingsConfigDict( env_file=".env", env_file_encoding="utf-8", case_sensitive=False, extra="ignore" ) # Server Configuration host: str = Field(default="0.0.0.0", description="Server host address") port: int = Field(default=8000, description="Server port") external_ip: Optional[str] = Field(default=None, description="External IP for NAT traversal") # Audio Configuration sample_rate: int = Field(default=16000, description="Audio sample rate in Hz") chunk_size_ms: int = Field(default=20, description="Audio chunk duration in milliseconds") default_codec: str = Field(default="pcm", description="Default audio codec") # VAD Configuration vad_type: str = Field(default="silero", description="VAD algorithm type") vad_model_path: str = Field(default="data/vad/silero_vad.onnx", description="Path to VAD model") vad_threshold: float = Field(default=0.5, description="VAD detection threshold") vad_min_speech_duration_ms: int = Field(default=250, description="Minimum speech duration in milliseconds") vad_eou_threshold_ms: int = Field(default=800, description="End of utterance (silence) threshold in milliseconds") # OpenAI / LLM Configuration openai_api_key: Optional[str] = Field(default=None, description="OpenAI API key") openai_api_url: Optional[str] = Field(default=None, description="OpenAI API base URL (for Azure/compatible)") llm_model: str = Field(default="gpt-4o-mini", description="LLM model name") llm_temperature: float = Field(default=0.7, description="LLM temperature for response generation") # TTS Configuration tts_provider: str = Field(default="siliconflow", description="TTS provider (edge, siliconflow)") tts_voice: str = Field(default="anna", description="TTS voice name") tts_speed: float = Field(default=1.0, description="TTS speech speed multiplier") # SiliconFlow Configuration siliconflow_api_key: Optional[str] = Field(default=None, description="SiliconFlow API key") siliconflow_tts_model: str = Field(default="FunAudioLLM/CosyVoice2-0.5B", description="SiliconFlow TTS model") # ASR Configuration asr_provider: str = Field(default="siliconflow", description="ASR provider (siliconflow, buffered)") siliconflow_asr_model: str = Field(default="FunAudioLLM/SenseVoiceSmall", description="SiliconFlow ASR model") asr_interim_interval_ms: int = Field(default=500, description="Interval for interim ASR results in ms") asr_min_audio_ms: int = Field(default=300, description="Minimum audio duration before first ASR result") # Duplex Pipeline Configuration duplex_enabled: bool = Field(default=True, description="Enable duplex voice pipeline") duplex_greeting: Optional[str] = Field(default=None, description="Optional greeting message") duplex_system_prompt: Optional[str] = Field( default="You are a helpful, friendly voice assistant. Keep your responses concise and conversational.", description="System prompt for LLM" ) # Barge-in (interruption) Configuration barge_in_min_duration_ms: int = Field( default=200, description="Minimum speech duration (ms) required to trigger barge-in. Lower=more sensitive." ) # Logging log_level: str = Field(default="INFO", description="Logging level") log_format: str = Field(default="json", description="Log format (json or text)") # CORS cors_origins: str = Field( default='["http://localhost:3000", "http://localhost:8080"]', description="CORS allowed origins" ) # ICE Servers (WebRTC) ice_servers: str = Field( default='[{"urls": "stun:stun.l.google.com:19302"}]', description="ICE servers configuration" ) # WebSocket heartbeat and inactivity inactivity_timeout_sec: int = Field(default=60, description="Close connection after no message from client (seconds)") heartbeat_interval_sec: int = Field(default=50, description="Send heartBeat event to client every N seconds") @property def chunk_size_bytes(self) -> int: """Calculate chunk size in bytes based on sample rate and duration.""" # 16-bit (2 bytes) per sample, mono channel return int(self.sample_rate * 2 * (self.chunk_size_ms / 1000.0)) @property def cors_origins_list(self) -> List[str]: """Parse CORS origins from JSON string.""" try: return json.loads(self.cors_origins) except json.JSONDecodeError: return ["http://localhost:3000", "http://localhost:8080"] @property def ice_servers_list(self) -> List[dict]: """Parse ICE servers from JSON string.""" try: return json.loads(self.ice_servers) except json.JSONDecodeError: return [{"urls": "stun:stun.l.google.com:19302"}] # Global settings instance settings = Settings() def get_settings() -> Settings: """Get application settings instance.""" return settings