diff --git a/src/pipecat/transports/smallwebrtc/transport.py b/src/pipecat/transports/smallwebrtc/transport.py index 7e22fb00c..4389ff5d9 100644 --- a/src/pipecat/transports/smallwebrtc/transport.py +++ b/src/pipecat/transports/smallwebrtc/transport.py @@ -233,9 +233,8 @@ class SmallWebRTCClient: self._out_sample_rate = None self._leave_counter = 0 - # We are always resampling it for 16000 if the sample_rate that we receive is bigger than that. - # otherwise we face issues with Silero VAD - self._pipecat_resampler = AudioResampler("s16", "mono", 16000) + # Audio resampler - will be configured during setup with target sample rate + self._audio_in_resampler = None @self._webrtc_connection.event_handler("connected") async def on_connected(connection: SmallWebRTCConnection): @@ -375,32 +374,22 @@ class SmallWebRTCClient: await asyncio.sleep(0.01) continue - if frame.sample_rate > self._in_sample_rate: - resampled_frames = self._pipecat_resampler.resample(frame) - for resampled_frame in resampled_frames: - # 16-bit PCM bytes - pcm_array = resampled_frame.to_ndarray().astype(np.int16) - pcm_bytes = pcm_array.tobytes() - del pcm_array # free NumPy array immediately + # Resample if needed, otherwise use the frame as-is + frames_to_process = ( + self._audio_in_resampler.resample(frame) + if frame.sample_rate != self._in_sample_rate + else [frame] + ) - audio_frame = InputAudioRawFrame( - audio=pcm_bytes, - sample_rate=resampled_frame.sample_rate, - num_channels=self._audio_in_channels, - ) - audio_frame.pts = frame.pts - del pcm_bytes # reference kept in audio_frame - - yield audio_frame - else: - # 16-bit PCM bytes - pcm_array = frame.to_ndarray().astype(np.int16) + for processed_frame in frames_to_process: + # Convert to 16-bit PCM bytes + pcm_array = processed_frame.to_ndarray().astype(np.int16) pcm_bytes = pcm_array.tobytes() del pcm_array # free NumPy array immediately audio_frame = InputAudioRawFrame( audio=pcm_bytes, - sample_rate=frame.sample_rate, + sample_rate=self._in_sample_rate, num_channels=self._audio_in_channels, ) audio_frame.pts = frame.pts @@ -450,6 +439,7 @@ class SmallWebRTCClient: self._out_sample_rate = _params.audio_out_sample_rate or frame.audio_out_sample_rate self._params = _params self._leave_counter += 1 + self._audio_in_resampler = AudioResampler("s16", "mono", self._in_sample_rate) async def connect(self): """Establish the WebRTC connection."""