- Introduced Volcengine as a new provider for both TTS and ASR services. - Updated configuration files to include Volcengine-specific parameters such as app_id, resource_id, and uid. - Enhanced the ASR service to support streaming mode with Volcengine's API. - Modified existing tests to validate the integration of Volcengine services. - Updated documentation to reflect the addition of Volcengine as a supported provider for TTS and ASR. - Refactored service factory to accommodate Volcengine alongside existing providers.
222 lines
9.9 KiB
Python
222 lines
9.9 KiB
Python
"""Configuration management using Pydantic settings."""
|
|
|
|
import json
|
|
import os
|
|
from pathlib import Path
|
|
from typing import Any, List, Optional
|
|
|
|
from pydantic import Field
|
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
|
|
try:
|
|
from dotenv import load_dotenv
|
|
except ImportError: # pragma: no cover - optional dependency in some runtimes
|
|
load_dotenv = None
|
|
|
|
def _prime_process_env_from_dotenv() -> None:
|
|
"""Load .env into process env early."""
|
|
if load_dotenv is None:
|
|
return
|
|
|
|
cwd_env = Path.cwd() / ".env"
|
|
engine_env = Path(__file__).resolve().parent.parent / ".env"
|
|
load_dotenv(dotenv_path=cwd_env, override=False)
|
|
if engine_env != cwd_env:
|
|
load_dotenv(dotenv_path=engine_env, override=False)
|
|
|
|
|
|
_prime_process_env_from_dotenv()
|
|
|
|
|
|
class Settings(BaseSettings):
|
|
"""Application settings loaded from environment variables."""
|
|
|
|
model_config = SettingsConfigDict(
|
|
env_file=".env",
|
|
env_file_encoding="utf-8",
|
|
case_sensitive=False,
|
|
extra="ignore"
|
|
)
|
|
|
|
# Server Configuration
|
|
host: str = Field(default="0.0.0.0", description="Server host address")
|
|
port: int = Field(default=8000, description="Server port")
|
|
external_ip: Optional[str] = Field(default=None, description="External IP for NAT traversal")
|
|
|
|
# Audio Configuration
|
|
sample_rate: int = Field(default=16000, description="Audio sample rate in Hz")
|
|
chunk_size_ms: int = Field(default=20, description="Audio chunk duration in milliseconds")
|
|
default_codec: str = Field(default="pcm", description="Default audio codec")
|
|
max_audio_buffer_seconds: int = Field(
|
|
default=30,
|
|
description="Maximum buffered user audio duration kept in memory for current turn"
|
|
)
|
|
|
|
# VAD Configuration
|
|
vad_type: str = Field(default="silero", description="VAD algorithm type")
|
|
vad_model_path: str = Field(default="data/vad/silero_vad.onnx", description="Path to VAD model")
|
|
vad_threshold: float = Field(default=0.5, description="VAD detection threshold")
|
|
vad_min_speech_duration_ms: int = Field(default=100, description="Minimum speech duration in milliseconds")
|
|
vad_eou_threshold_ms: int = Field(default=800, description="End of utterance (silence) threshold in milliseconds")
|
|
|
|
# LLM Configuration
|
|
llm_provider: str = Field(
|
|
default="openai",
|
|
description="LLM provider (openai, openai_compatible, siliconflow)"
|
|
)
|
|
llm_api_url: Optional[str] = Field(default=None, description="LLM provider API base URL")
|
|
llm_model: str = Field(default="gpt-4o-mini", description="LLM model name")
|
|
llm_temperature: float = Field(default=0.7, description="LLM temperature for response generation")
|
|
|
|
# TTS Configuration
|
|
tts_provider: str = Field(
|
|
default="openai_compatible",
|
|
description="TTS provider (openai_compatible, siliconflow, dashscope, volcengine)"
|
|
)
|
|
tts_api_url: Optional[str] = Field(default=None, description="TTS provider API URL")
|
|
tts_model: Optional[str] = Field(default=None, description="TTS model name")
|
|
tts_voice: str = Field(default="anna", description="TTS voice name")
|
|
tts_app_id: Optional[str] = Field(default=None, description="Provider-specific TTS app ID")
|
|
tts_resource_id: Optional[str] = Field(default=None, description="Provider-specific TTS resource ID")
|
|
tts_cluster: Optional[str] = Field(default=None, description="Provider-specific TTS cluster")
|
|
tts_uid: Optional[str] = Field(default=None, description="Provider-specific TTS user ID")
|
|
tts_mode: str = Field(
|
|
default="commit",
|
|
description="DashScope-only TTS mode (commit, server_commit). Ignored for non-dashscope providers."
|
|
)
|
|
tts_speed: float = Field(default=1.0, description="TTS speech speed multiplier")
|
|
|
|
# ASR Configuration
|
|
asr_provider: str = Field(
|
|
default="openai_compatible",
|
|
description="ASR provider (openai_compatible, buffered, siliconflow, dashscope, volcengine)"
|
|
)
|
|
asr_api_url: Optional[str] = Field(default=None, description="ASR provider API URL")
|
|
asr_model: Optional[str] = Field(default=None, description="ASR model name")
|
|
asr_app_id: Optional[str] = Field(default=None, description="Provider-specific ASR app ID")
|
|
asr_resource_id: Optional[str] = Field(default=None, description="Provider-specific ASR resource ID")
|
|
asr_cluster: Optional[str] = Field(default=None, description="Provider-specific ASR cluster")
|
|
asr_uid: Optional[str] = Field(default=None, description="Provider-specific ASR user ID")
|
|
asr_request_params_json: Optional[str] = Field(
|
|
default=None,
|
|
description="Provider-specific ASR request params as JSON string"
|
|
)
|
|
asr_enable_interim: bool = Field(default=False, description="Enable interim transcripts for offline ASR")
|
|
asr_interim_interval_ms: int = Field(default=500, description="Interval for interim ASR results in ms")
|
|
asr_min_audio_ms: int = Field(default=300, description="Minimum audio duration before first ASR result")
|
|
asr_start_min_speech_ms: int = Field(
|
|
default=160,
|
|
description="Minimum continuous speech duration before ASR capture starts"
|
|
)
|
|
asr_pre_speech_ms: int = Field(
|
|
default=240,
|
|
description="Audio context (ms) prepended before detected speech to avoid clipping first phoneme"
|
|
)
|
|
asr_final_tail_ms: int = Field(
|
|
default=120,
|
|
description="Silence tail (ms) appended before final ASR decode to protect utterance ending"
|
|
)
|
|
|
|
# Duplex Pipeline Configuration
|
|
duplex_enabled: bool = Field(default=True, description="Enable duplex voice pipeline")
|
|
duplex_greeting: Optional[str] = Field(default=None, description="Optional greeting message")
|
|
duplex_system_prompt: Optional[str] = Field(
|
|
default="You are a helpful, friendly voice assistant. Keep your responses concise and conversational.",
|
|
description="System prompt for LLM"
|
|
)
|
|
duplex_opener_audio_file: Optional[str] = Field(
|
|
default=None,
|
|
description="Optional opener audio file path for standalone engine mode (.pcm or .wav)"
|
|
)
|
|
|
|
# Barge-in (interruption) Configuration
|
|
barge_in_min_duration_ms: int = Field(
|
|
default=200,
|
|
description="Minimum speech duration (ms) required to trigger barge-in. Lower=more sensitive."
|
|
)
|
|
barge_in_silence_tolerance_ms: int = Field(
|
|
default=60,
|
|
description="How much silence (ms) is tolerated during potential barge-in before reset"
|
|
)
|
|
|
|
# Optional tool declarations from agent YAML.
|
|
# Supports OpenAI function schema style entries and/or shorthand string names.
|
|
tools: List[Any] = Field(default_factory=list, description="Default tool definitions for runtime")
|
|
|
|
# Logging
|
|
log_level: str = Field(default="INFO", description="Logging level")
|
|
log_format: str = Field(default="json", description="Log format (json or text)")
|
|
|
|
# CORS
|
|
cors_origins: str = Field(
|
|
default='["http://localhost:3000", "http://localhost:8080"]',
|
|
description="CORS allowed origins"
|
|
)
|
|
|
|
# ICE Servers (WebRTC)
|
|
ice_servers: str = Field(
|
|
default='[{"urls": "stun:stun.l.google.com:19302"}]',
|
|
description="ICE servers configuration"
|
|
)
|
|
|
|
# WebSocket heartbeat and inactivity
|
|
inactivity_timeout_sec: int = Field(default=60, description="Close connection after no message from client (seconds)")
|
|
heartbeat_interval_sec: int = Field(default=50, description="Send heartBeat event to client every N seconds")
|
|
ws_protocol_version: str = Field(default="v1", description="Public WS protocol version")
|
|
ws_emit_config_resolved: bool = Field(
|
|
default=False,
|
|
description="Emit config.resolved after session.started (debug/internal use; disabled for public SaaS by default)",
|
|
)
|
|
|
|
# Backend bridge configuration (for call/transcript persistence)
|
|
backend_mode: str = Field(
|
|
default="auto",
|
|
description="Backend integration mode: auto | http | disabled"
|
|
)
|
|
backend_url: Optional[str] = Field(default=None, description="Backend API base URL (e.g. http://localhost:8787)")
|
|
backend_timeout_sec: int = Field(default=10, description="Backend API request timeout in seconds")
|
|
assistant_local_config_dir: str = Field(
|
|
default="engine/config/agents",
|
|
description="Directory containing local assistant runtime YAML files"
|
|
)
|
|
history_enabled: bool = Field(default=True, description="Enable history write bridge")
|
|
history_default_user_id: int = Field(default=1, description="Fallback user_id for history records")
|
|
history_queue_max_size: int = Field(default=256, description="Max buffered transcript writes per session")
|
|
history_retry_max_attempts: int = Field(default=2, description="Retry attempts for each transcript write")
|
|
history_retry_backoff_sec: float = Field(default=0.2, description="Base retry backoff for transcript writes")
|
|
history_finalize_drain_timeout_sec: float = Field(
|
|
default=1.5,
|
|
description="Max wait before finalizing history when queue is still draining"
|
|
)
|
|
|
|
@property
|
|
def chunk_size_bytes(self) -> int:
|
|
"""Calculate chunk size in bytes based on sample rate and duration."""
|
|
# 16-bit (2 bytes) per sample, mono channel
|
|
return int(self.sample_rate * 2 * (self.chunk_size_ms / 1000.0))
|
|
|
|
@property
|
|
def cors_origins_list(self) -> List[str]:
|
|
"""Parse CORS origins from JSON string."""
|
|
try:
|
|
return json.loads(self.cors_origins)
|
|
except json.JSONDecodeError:
|
|
return ["http://localhost:3000", "http://localhost:8080"]
|
|
|
|
@property
|
|
def ice_servers_list(self) -> List[dict]:
|
|
"""Parse ICE servers from JSON string."""
|
|
try:
|
|
return json.loads(self.ice_servers)
|
|
except json.JSONDecodeError:
|
|
return [{"urls": "stun:stun.l.google.com:19302"}]
|
|
|
|
|
|
# Global settings instance
|
|
settings = Settings()
|
|
|
|
|
|
def get_settings() -> Settings:
|
|
"""Get application settings instance."""
|
|
return settings
|