diff --git a/examples/foundational/07ag-interruptible-hathora.py b/examples/foundational/07ag-interruptible-hathora.py index 9a90efb35..f4bd169b1 100644 --- a/examples/foundational/07ag-interruptible-hathora.py +++ b/examples/foundational/07ag-interruptible-hathora.py @@ -9,7 +9,6 @@ import os from dotenv import load_dotenv from loguru import logger -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams diff --git a/src/pipecat/services/hathora/stt.py b/src/pipecat/services/hathora/stt.py index 0608cb39e..b9de7ac8d 100644 --- a/src/pipecat/services/hathora/stt.py +++ b/src/pipecat/services/hathora/stt.py @@ -40,18 +40,18 @@ class HathoraSTTService(SegmentedSTTService): config: Some models support additional config, refer to [docs](https://models.hathora.dev) for each model to see what is supported. - base_url: Base API URL for the Hathora STT service. """ language: Optional[str] = None config: Optional[list[ConfigOption]] = None - base_url: str = "https://api.models.hathora.dev/inference/v1/stt", def __init__( self, *, model: str, + sample_rate: Optional[int] = None, api_key: Optional[str] = None, + base_url: str = "https://api.models.hathora.dev/inference/v1/stt", params: Optional[InputParams] = None, **kwargs, ): @@ -60,17 +60,21 @@ class HathoraSTTService(SegmentedSTTService): Args: model: Model to use; find available models [here](https://models.hathora.dev). + sample_rate: The sample rate for audio input. If None, will be determined + from the start frame. api_key: API key for authentication with the Hathora service; provision one [here](https://models.hathora.dev/tokens). + base_url: Base API URL for the Hathora STT service. params: Configuration parameters. **kwargs: Additional arguments passed to the parent class. """ super().__init__( + sample_rate=sample_rate, **kwargs, ) self._model = model self._api_key = api_key or os.getenv("HATHORA_API_KEY") - self._base_url = params.base_url + self._base_url = base_url params = params or HathoraSTTService.InputParams() diff --git a/src/pipecat/services/hathora/tts.py b/src/pipecat/services/hathora/tts.py index 749cb46c8..43c1cfeca 100644 --- a/src/pipecat/services/hathora/tts.py +++ b/src/pipecat/services/hathora/tts.py @@ -59,19 +59,19 @@ class HathoraTTSService(TTSService): config: Some models support additional config, refer to [docs](https://models.hathora.dev) for each model to see what is supported. - base_url: Base API URL for the Hathora TTS service. """ speed: Optional[float] = None config: Optional[list[ConfigOption]] = None - base_url: str = "https://api.models.hathora.dev/inference/v1/tts", def __init__( self, *, model: str, voice_id: Optional[str] = None, + sample_rate: Optional[int] = None, api_key: Optional[str] = None, + base_url: str = "https://api.models.hathora.dev/inference/v1/tts", params: Optional[InputParams] = None, **kwargs, ): @@ -81,17 +81,20 @@ class HathoraTTSService(TTSService): model: Model to use; find available models [here](https://models.hathora.dev). voice_id: Voice to use for synthesis (if supported by model). + sample_rate: Output sample rate for generated audio. api_key: API key for authentication with the Hathora service; provision one [here](https://models.hathora.dev/tokens). + base_url: Base API URL for the Hathora TTS service. params: Configuration parameters. **kwargs: Additional arguments passed to the parent class. """ super().__init__( + sample_rate=sample_rate, **kwargs, ) self._model = model self._api_key = api_key or os.getenv("HATHORA_API_KEY") - self._base_url = params.base_url + self._base_url = base_url params = params or HathoraTTSService.InputParams() @@ -155,7 +158,7 @@ class HathoraTTSService(TTSService): frame = TTSAudioRawFrame( audio=pcm_audio, - sample_rate=sample_rate, + sample_rate=self.sample_rate, num_channels=num_channels, )