From f44e2c86ea3d60d2a5eb3d017a1fafcbca3e3fca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= <aleix@daily.co>
Date: Thu, 1 May 2025 05:27:51 -0700
Subject: [PATCH] BaseOutputTransport: compute sample_rate and audio_chunk_size
 in main class

---
 src/pipecat/transports/base_output.py | 52 ++++++++++++++++-----------
 1 file changed, 32 insertions(+), 20 deletions(-)

diff --git a/src/pipecat/transports/base_output.py b/src/pipecat/transports/base_output.py
index 7e893d042..31a86f92d 100644
--- a/src/pipecat/transports/base_output.py
+++ b/src/pipecat/transports/base_output.py
@@ -47,6 +47,15 @@ class BaseOutputTransport(FrameProcessor):
 
         self._params = params
 
+        # Output sample rate. It will be initialized on StartFrame.
+        self._sample_rate = 0
+
+        # We write 10ms*CHUNKS of audio at a time (where CHUNKS is the
+        # `audio_out_10ms_chunks` parameter). If we receive long audio frames we
+        # will chunk them. This helps with interruption handling. It will be
+        # initialized on StartFrame.
+        self._audio_chunk_size = 0
+
         # We will have one media sender per output frame destination. This allow
         # us to send multiple streams at the same time if the transport allows
         # it.
@@ -54,15 +63,21 @@ class BaseOutputTransport(FrameProcessor):
 
     @property
     def sample_rate(self) -> int:
-        sender = self._media_senders.get(None, None)
-        return sender.sample_rate if sender else 0
+        return self._sample_rate
 
     @property
     def audio_chunk_size(self) -> int:
-        sender = self._media_senders.get(None, None)
-        return sender.audio_chunk_size if sender else 0
+        return self._audio_chunk_size
 
     async def start(self, frame: StartFrame):
+        self._sample_rate = self._params.audio_out_sample_rate or frame.audio_out_sample_rate
+
+        # We will write 10ms*CHUNKS of audio at a time (where CHUNKS is the
+        # `audio_out_10ms_chunks` parameter). If we receive long audio frames we
+        # will chunk them. This will help with interruption handling.
+        audio_bytes_10ms = int(self._sample_rate / 100) * self._params.audio_out_channels * 2
+        self._audio_chunk_size = audio_bytes_10ms * self._params.audio_out_10ms_chunks
+
         # Register destinations.
         for destination in self._params.audio_out_destinations:
             await self.register_audio_destination(destination)
@@ -72,7 +87,11 @@ class BaseOutputTransport(FrameProcessor):
 
         # Start default media sender.
         self._media_senders[None] = BaseOutputTransport.MediaSender(
-            self, destination=None, sample_rate=self.sample_rate, params=self._params
+            self,
+            destination=None,
+            sample_rate=self.sample_rate,
+            audio_chunk_size=self.audio_chunk_size,
+            params=self._params,
         )
         await self._media_senders[None].start(frame)
 
@@ -85,7 +104,11 @@ class BaseOutputTransport(FrameProcessor):
         # Start media senders.
         for destination in destinations:
             self._media_senders[destination] = BaseOutputTransport.MediaSender(
-                self, destination=destination, sample_rate=self.sample_rate, params=self._params
+                self,
+                destination=destination,
+                sample_rate=self.sample_rate,
+                audio_chunk_size=self.audio_chunk_size,
+                params=self._params,
             )
             await self._media_senders[destination].start(frame)
 
@@ -200,20 +223,16 @@ class BaseOutputTransport(FrameProcessor):
             *,
             destination: Optional[str],
             sample_rate: int,
+            audio_chunk_size: int,
             params: TransportParams,
         ):
             self._transport = transport
             self._destination = destination
             self._sample_rate = sample_rate
+            self._audio_chunk_size = audio_chunk_size
             self._params = params
 
-            # Output sample rate. It will be initialized on StartFrame.
-            self._sample_rate = 0
-
-            # We write 10ms*CHUNKS of audio at a time (where CHUNKS is the
-            # `audio_out_10ms_chunks` parameter). If we receive long audio
-            # frames we will chunk them. This helps with interruption handling.
-            self._audio_chunk_size = 0
+            # Buffer to keep track of incoming audio.
             self._audio_buffer = bytearray()
 
             # This will be used to resample incoming audio to the output sample rate.
@@ -242,13 +261,6 @@ class BaseOutputTransport(FrameProcessor):
             return self._audio_chunk_size
 
         async def start(self, frame: StartFrame):
-            self._sample_rate = self._params.audio_out_sample_rate or frame.audio_out_sample_rate
-
-            # We will write 10ms*CHUNKS of audio at a time (where CHUNKS is the
-            # `audio_out_10ms_chunks` parameter). If we receive long audio frames we
-            # will chunk them. This will help with interruption handling.
-            audio_bytes_10ms = int(self._sample_rate / 100) * self._params.audio_out_channels * 2
-            self._audio_chunk_size = audio_bytes_10ms * self._params.audio_out_10ms_chunks
             self._audio_buffer = bytearray()
 
             # Create all tasks.