diff --git a/CHANGELOG.md b/CHANGELOG.md index 362116d5d..9d2f77647 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Deprecated +- `TransportParams.vad_enabled` parameter is now deprecated, use + `TransportParams.audio_in_enabled` and `TransportParams.vad_analyzer` instead. + - `TransportParams.vad_audio_passthrough` parameter is now deprecated, use `TransportParams.audio_in_passthrough` instead. diff --git a/src/pipecat/transports/base_input.py b/src/pipecat/transports/base_input.py index 0f206f08e..2d8027262 100644 --- a/src/pipecat/transports/base_input.py +++ b/src/pipecat/transports/base_input.py @@ -55,6 +55,17 @@ class BaseInputTransport(FrameProcessor): # if passthrough is enabled. self._audio_task = None + if self._params.vad_enabled: + import warnings + + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "Parameter 'vad_enabled' is deprecated, use 'audio_in_enabled' and 'vad_analyzer' instead.", + DeprecationWarning, + ) + self._params.audio_in_enabled = True + if self._params.vad_audio_passthrough: import warnings @@ -89,7 +100,7 @@ class BaseInputTransport(FrameProcessor): self._sample_rate = self._params.audio_in_sample_rate or frame.audio_in_sample_rate # Configure VAD analyzer. - if self._params.vad_enabled and self._params.vad_analyzer: + if self._params.vad_analyzer: self._params.vad_analyzer.set_sample_rate(self._sample_rate) # Configure End of turn analyzer. if self._params.turn_analyzer: @@ -99,13 +110,13 @@ class BaseInputTransport(FrameProcessor): if self._params.audio_in_filter: await self._params.audio_in_filter.start(self._sample_rate) # Create audio input queue and task if needed. - if not self._audio_task and (self._params.audio_in_enabled or self._params.vad_enabled): + if not self._audio_task and self._params.audio_in_enabled: self._audio_in_queue = asyncio.Queue() self._audio_task = self.create_task(self._audio_task_handler()) async def stop(self, frame: EndFrame): # Cancel and wait for the audio input task to finish. - if self._audio_task and (self._params.audio_in_enabled or self._params.vad_enabled): + if self._audio_task and self._params.audio_in_enabled: await self.cancel_task(self._audio_task) self._audio_task = None # Stop audio filter. @@ -114,12 +125,12 @@ class BaseInputTransport(FrameProcessor): async def cancel(self, frame: CancelFrame): # Cancel and wait for the audio input task to finish. - if self._audio_task and (self._params.audio_in_enabled or self._params.vad_enabled): + if self._audio_task and self._params.audio_in_enabled: await self.cancel_task(self._audio_task) self._audio_task = None async def push_audio_frame(self, frame: InputAudioRawFrame): - if self._params.audio_in_enabled or self._params.vad_enabled: + if self._params.audio_in_enabled: await self._audio_in_queue.put(frame) # @@ -265,7 +276,7 @@ class BaseInputTransport(FrameProcessor): # Check VAD and push event if necessary. We just care about # changes from QUIET to SPEAKING and vice versa. previous_vad_state = vad_state - if self._params.vad_enabled: + if self._params.vad_analyzer: vad_state = await self._handle_vad(frame, vad_state) if self._params.turn_analyzer: diff --git a/src/pipecat/transports/network/small_webrtc.py b/src/pipecat/transports/network/small_webrtc.py index 343469a71..80463c924 100644 --- a/src/pipecat/transports/network/small_webrtc.py +++ b/src/pipecat/transports/network/small_webrtc.py @@ -389,9 +389,7 @@ class SmallWebRTCInputTransport(BaseInputTransport): await super().start(frame) await self._client.setup(self._params, frame) await self._client.connect() - if not self._receive_audio_task and ( - self._params.audio_in_enabled or self._params.vad_enabled - ): + if not self._receive_audio_task and self._params.audio_in_enabled: self._receive_audio_task = self.create_task(self._receive_audio()) if not self._receive_video_task and self._params.camera_in_enabled: self._receive_video_task = self.create_task(self._receive_video()) diff --git a/src/pipecat/transports/services/daily.py b/src/pipecat/transports/services/daily.py index 6c71662a8..2efd7c433 100644 --- a/src/pipecat/transports/services/daily.py +++ b/src/pipecat/transports/services/daily.py @@ -399,7 +399,7 @@ class DailyTransportClient(EventHandler): non_blocking=True, ) - if (self._params.audio_in_enabled or self._params.vad_enabled) and not self._speaker: + if self._params.audio_in_enabled and not self._speaker: self._speaker = Daily.create_speaker_device( self._speaker_name(), sample_rate=self._in_sample_rate, @@ -846,7 +846,7 @@ class DailyInputTransport(BaseInputTransport): def start_audio_in_streaming(self): # Create audio task. It reads audio frames from Daily and push them # internally for VAD processing. - if not self._audio_in_task and (self._params.audio_in_enabled or self._params.vad_enabled): + if not self._audio_in_task and self._params.audio_in_enabled: logger.debug(f"Start receiving audio") self._audio_in_task = self.create_task(self._audio_in_task_handler()) @@ -863,9 +863,6 @@ class DailyInputTransport(BaseInputTransport): await self._client.setup(frame) # Join the room. await self._client.join() - # Inialize WebRTC VAD if needed. - if self._params.vad_enabled and not self._params.vad_analyzer: - self._vad_analyzer = WebRTCVADAnalyzer(sample_rate=self.sample_rate) if self._params.audio_in_stream_on_start: self.start_audio_in_streaming() @@ -875,7 +872,7 @@ class DailyInputTransport(BaseInputTransport): # Leave the room. await self._client.leave() # Stop audio thread. - if self._audio_in_task and (self._params.audio_in_enabled or self._params.vad_enabled): + if self._audio_in_task and self._params.audio_in_enabled: await self.cancel_task(self._audio_in_task) self._audio_in_task = None @@ -885,7 +882,7 @@ class DailyInputTransport(BaseInputTransport): # Leave the room. await self._client.leave() # Stop audio thread. - if self._audio_in_task and (self._params.audio_in_enabled or self._params.vad_enabled): + if self._audio_in_task and self._params.audio_in_enabled: await self.cancel_task(self._audio_in_task) self._audio_in_task = None diff --git a/src/pipecat/transports/services/livekit.py b/src/pipecat/transports/services/livekit.py index 8ce5c885c..2e56ebddf 100644 --- a/src/pipecat/transports/services/livekit.py +++ b/src/pipecat/transports/services/livekit.py @@ -368,7 +368,7 @@ class LiveKitInputTransport(BaseInputTransport): await super().start(frame) await self._client.setup(frame) await self._client.connect() - if not self._audio_in_task and (self._params.audio_in_enabled or self._params.vad_enabled): + if not self._audio_in_task and self._params.audio_in_enabled: self._audio_in_task = self.create_task(self._audio_in_task_handler()) logger.info("LiveKitInputTransport started") @@ -382,7 +382,7 @@ class LiveKitInputTransport(BaseInputTransport): async def cancel(self, frame: CancelFrame): await super().cancel(frame) await self._client.disconnect() - if self._audio_in_task and (self._params.audio_in_enabled or self._params.vad_enabled): + if self._audio_in_task and self._params.audio_in_enabled: await self.cancel_task(self._audio_in_task) async def cleanup(self):