"""Configuration models for call options.""" from typing import Optional, Dict, Any, List from pydantic import BaseModel, Field class VADOption(BaseModel): """Voice Activity Detection configuration.""" type: str = Field(default="silero", description="VAD algorithm type (silero, webrtc)") samplerate: int = Field(default=16000, description="Audio sample rate for VAD") speech_padding: int = Field(default=250, description="Speech padding in milliseconds") silence_padding: int = Field(default=100, description="Silence padding in milliseconds") ratio: float = Field(default=0.5, description="Voice detection ratio threshold") voice_threshold: float = Field(default=0.5, description="Voice energy threshold") max_buffer_duration_secs: int = Field(default=50, description="Maximum buffer duration in seconds") silence_timeout: Optional[int] = Field(default=None, description="Silence timeout in milliseconds") endpoint: Optional[str] = Field(default=None, description="Custom VAD service endpoint") secret_key: Optional[str] = Field(default=None, description="VAD service secret key") secret_id: Optional[str] = Field(default=None, description="VAD service secret ID") class ASROption(BaseModel): """Automatic Speech Recognition configuration.""" provider: str = Field(..., description="ASR provider (tencent, aliyun, openai, etc.)") language: Optional[str] = Field(default=None, description="Language code (zh-CN, en-US)") app_id: Optional[str] = Field(default=None, description="Application ID") secret_id: Optional[str] = Field(default=None, description="Secret ID for authentication") secret_key: Optional[str] = Field(default=None, description="Secret key for authentication") model_type: Optional[str] = Field(default=None, description="ASR model type (16k_zh, 8k_en)") buffer_size: Optional[int] = Field(default=None, description="Audio buffer size in bytes") samplerate: Optional[int] = Field(default=None, description="Audio sample rate") endpoint: Optional[str] = Field(default=None, description="Custom ASR service endpoint") extra: Optional[Dict[str, Any]] = Field(default=None, description="Additional parameters") start_when_answer: bool = Field(default=False, description="Start ASR when call is answered") class TTSOption(BaseModel): """Text-to-Speech configuration.""" samplerate: Optional[int] = Field(default=None, description="TTS output sample rate") provider: str = Field(default="msedge", description="TTS provider (tencent, aliyun, deepgram, msedge)") speed: float = Field(default=1.0, description="Speech speed multiplier") app_id: Optional[str] = Field(default=None, description="Application ID") secret_id: Optional[str] = Field(default=None, description="Secret ID for authentication") secret_key: Optional[str] = Field(default=None, description="Secret key for authentication") volume: Optional[int] = Field(default=None, description="Speech volume level (1-10)") speaker: Optional[str] = Field(default=None, description="Voice speaker name") codec: Optional[str] = Field(default=None, description="Audio codec") subtitle: bool = Field(default=False, description="Enable subtitle generation") emotion: Optional[str] = Field(default=None, description="Speech emotion") endpoint: Optional[str] = Field(default=None, description="Custom TTS service endpoint") extra: Optional[Dict[str, Any]] = Field(default=None, description="Additional parameters") max_concurrent_tasks: Optional[int] = Field(default=None, description="Max concurrent tasks") class RecorderOption(BaseModel): """Call recording configuration.""" recorder_file: str = Field(..., description="Path to recording file") samplerate: int = Field(default=16000, description="Recording sample rate") ptime: int = Field(default=200, description="Packet time in milliseconds") class MediaPassOption(BaseModel): """Media pass-through configuration for external audio processing.""" url: str = Field(..., description="WebSocket URL for media streaming") input_sample_rate: int = Field(default=16000, description="Sample rate of audio received from WebSocket") output_sample_rate: int = Field(default=16000, description="Sample rate of audio sent to WebSocket") packet_size: int = Field(default=2560, description="Packet size in bytes") ptime: Optional[int] = Field(default=None, description="Buffered playback period in milliseconds") class SipOption(BaseModel): """SIP protocol configuration.""" username: Optional[str] = Field(default=None, description="SIP username") password: Optional[str] = Field(default=None, description="SIP password") realm: Optional[str] = Field(default=None, description="SIP realm/domain") headers: Optional[Dict[str, str]] = Field(default=None, description="Additional SIP headers") class HandlerRule(BaseModel): """Handler routing rule.""" caller: Optional[str] = Field(default=None, description="Caller pattern (regex)") callee: Optional[str] = Field(default=None, description="Callee pattern (regex)") playbook: Optional[str] = Field(default=None, description="Playbook file path") webhook: Optional[str] = Field(default=None, description="Webhook URL") class CallOption(BaseModel): """Comprehensive call configuration options.""" # Basic options denoise: bool = Field(default=False, description="Enable noise reduction") offer: Optional[str] = Field(default=None, description="SDP offer string") callee: Optional[str] = Field(default=None, description="Callee SIP URI or phone number") caller: Optional[str] = Field(default=None, description="Caller SIP URI or phone number") # Audio codec codec: str = Field(default="pcm", description="Audio codec (pcm, pcma, pcmu, g722)") # Component configurations recorder: Optional[RecorderOption] = Field(default=None, description="Call recording config") asr: Optional[ASROption] = Field(default=None, description="ASR configuration") vad: Optional[VADOption] = Field(default=None, description="VAD configuration") tts: Optional[TTSOption] = Field(default=None, description="TTS configuration") media_pass: Optional[MediaPassOption] = Field(default=None, description="Media pass-through config") sip: Optional[SipOption] = Field(default=None, description="SIP configuration") # Timeouts and networking handshake_timeout: Optional[int] = Field(default=None, description="Handshake timeout in seconds") enable_ipv6: bool = Field(default=False, description="Enable IPv6 support") inactivity_timeout: Optional[int] = Field(default=None, description="Inactivity timeout in seconds") # EOU configuration eou: Optional[Dict[str, Any]] = Field(default=None, description="End of utterance detection config") # Extra parameters extra: Optional[Dict[str, Any]] = Field(default=None, description="Additional custom parameters") class Config: populate_by_name = True