"""Configuration management using Pydantic settings.""" import json import os from pathlib import Path from typing import Any, List, Optional from pydantic import Field from pydantic_settings import BaseSettings, SettingsConfigDict try: from dotenv import load_dotenv except ImportError: # pragma: no cover - optional dependency in some runtimes load_dotenv = None def _prime_process_env_from_dotenv() -> None: """Load .env into process env early.""" if load_dotenv is None: return cwd_env = Path.cwd() / ".env" engine_env = Path(__file__).resolve().parent.parent / ".env" load_dotenv(dotenv_path=cwd_env, override=False) if engine_env != cwd_env: load_dotenv(dotenv_path=engine_env, override=False) _prime_process_env_from_dotenv() class Settings(BaseSettings): """Application settings loaded from environment variables.""" model_config = SettingsConfigDict( env_file=".env", env_file_encoding="utf-8", case_sensitive=False, extra="ignore" ) # Server Configuration host: str = Field(default="0.0.0.0", description="Server host address") port: int = Field(default=8000, description="Server port") external_ip: Optional[str] = Field(default=None, description="External IP for NAT traversal") # Audio Configuration sample_rate: int = Field(default=16000, description="Audio sample rate in Hz") chunk_size_ms: int = Field(default=20, description="Audio chunk duration in milliseconds") default_codec: str = Field(default="pcm", description="Default audio codec") max_audio_buffer_seconds: int = Field( default=30, description="Maximum buffered user audio duration kept in memory for current turn" ) # VAD Configuration vad_type: str = Field(default="silero", description="VAD algorithm type") vad_model_path: str = Field(default="data/vad/silero_vad.onnx", description="Path to VAD model") vad_threshold: float = Field(default=0.5, description="VAD detection threshold") vad_min_speech_duration_ms: int = Field(default=100, description="Minimum speech duration in milliseconds") vad_eou_threshold_ms: int = Field(default=800, description="End of utterance (silence) threshold in milliseconds") # LLM Configuration llm_provider: str = Field( default="openai", description="LLM provider (openai, openai_compatible, siliconflow)" ) llm_api_url: Optional[str] = Field(default=None, description="LLM provider API base URL") llm_model: str = Field(default="gpt-4o-mini", description="LLM model name") llm_temperature: float = Field(default=0.7, description="LLM temperature for response generation") # TTS Configuration tts_provider: str = Field( default="openai_compatible", description="TTS provider (openai_compatible, siliconflow, dashscope)" ) tts_api_url: Optional[str] = Field(default=None, description="TTS provider API URL") tts_model: Optional[str] = Field(default=None, description="TTS model name") tts_voice: str = Field(default="anna", description="TTS voice name") tts_mode: str = Field( default="commit", description="DashScope-only TTS mode (commit, server_commit). Ignored for non-dashscope providers." ) tts_speed: float = Field(default=1.0, description="TTS speech speed multiplier") # ASR Configuration asr_provider: str = Field( default="openai_compatible", description="ASR provider (openai_compatible, buffered, siliconflow, dashscope)" ) asr_api_url: Optional[str] = Field(default=None, description="ASR provider API URL") asr_model: Optional[str] = Field(default=None, description="ASR model name") asr_interim_interval_ms: int = Field(default=500, description="Interval for interim ASR results in ms") asr_min_audio_ms: int = Field(default=300, description="Minimum audio duration before first ASR result") asr_start_min_speech_ms: int = Field( default=160, description="Minimum continuous speech duration before ASR capture starts" ) asr_pre_speech_ms: int = Field( default=240, description="Audio context (ms) prepended before detected speech to avoid clipping first phoneme" ) asr_final_tail_ms: int = Field( default=120, description="Silence tail (ms) appended before final ASR decode to protect utterance ending" ) # Duplex Pipeline Configuration duplex_enabled: bool = Field(default=True, description="Enable duplex voice pipeline") duplex_greeting: Optional[str] = Field(default=None, description="Optional greeting message") duplex_system_prompt: Optional[str] = Field( default="You are a helpful, friendly voice assistant. Keep your responses concise and conversational.", description="System prompt for LLM" ) duplex_opener_audio_file: Optional[str] = Field( default=None, description="Optional opener audio file path for standalone engine mode (.pcm or .wav)" ) # Barge-in (interruption) Configuration barge_in_min_duration_ms: int = Field( default=200, description="Minimum speech duration (ms) required to trigger barge-in. Lower=more sensitive." ) barge_in_silence_tolerance_ms: int = Field( default=60, description="How much silence (ms) is tolerated during potential barge-in before reset" ) # Optional tool declarations from agent YAML. # Supports OpenAI function schema style entries and/or shorthand string names. tools: List[Any] = Field(default_factory=list, description="Default tool definitions for runtime") # Logging log_level: str = Field(default="INFO", description="Logging level") log_format: str = Field(default="json", description="Log format (json or text)") # CORS cors_origins: str = Field( default='["http://localhost:3000", "http://localhost:8080"]', description="CORS allowed origins" ) # ICE Servers (WebRTC) ice_servers: str = Field( default='[{"urls": "stun:stun.l.google.com:19302"}]', description="ICE servers configuration" ) # WebSocket heartbeat and inactivity inactivity_timeout_sec: int = Field(default=60, description="Close connection after no message from client (seconds)") heartbeat_interval_sec: int = Field(default=50, description="Send heartBeat event to client every N seconds") ws_protocol_version: str = Field(default="v1", description="Public WS protocol version") ws_emit_config_resolved: bool = Field( default=False, description="Emit config.resolved after session.started (debug/internal use; disabled for public SaaS by default)", ) # Backend bridge configuration (for call/transcript persistence) backend_mode: str = Field( default="auto", description="Backend integration mode: auto | http | disabled" ) backend_url: Optional[str] = Field(default=None, description="Backend API base URL (e.g. http://localhost:8787)") backend_timeout_sec: int = Field(default=10, description="Backend API request timeout in seconds") assistant_local_config_dir: str = Field( default="engine/config/agents", description="Directory containing local assistant runtime YAML files" ) history_enabled: bool = Field(default=True, description="Enable history write bridge") history_default_user_id: int = Field(default=1, description="Fallback user_id for history records") history_queue_max_size: int = Field(default=256, description="Max buffered transcript writes per session") history_retry_max_attempts: int = Field(default=2, description="Retry attempts for each transcript write") history_retry_backoff_sec: float = Field(default=0.2, description="Base retry backoff for transcript writes") history_finalize_drain_timeout_sec: float = Field( default=1.5, description="Max wait before finalizing history when queue is still draining" ) @property def chunk_size_bytes(self) -> int: """Calculate chunk size in bytes based on sample rate and duration.""" # 16-bit (2 bytes) per sample, mono channel return int(self.sample_rate * 2 * (self.chunk_size_ms / 1000.0)) @property def cors_origins_list(self) -> List[str]: """Parse CORS origins from JSON string.""" try: return json.loads(self.cors_origins) except json.JSONDecodeError: return ["http://localhost:3000", "http://localhost:8080"] @property def ice_servers_list(self) -> List[dict]: """Parse ICE servers from JSON string.""" try: return json.loads(self.ice_servers) except json.JSONDecodeError: return [{"urls": "stun:stun.l.google.com:19302"}] # Global settings instance settings = Settings() def get_settings() -> Settings: """Get application settings instance.""" return settings