From 6cea71270ef1358309b25452d87f3aa842d29600 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Wed, 16 Apr 2025 11:35:36 -0700 Subject: [PATCH] tts: use smaller audio chunk sizes --- src/pipecat/services/aws/tts.py | 6 +++--- src/pipecat/services/elevenlabs/tts.py | 2 +- src/pipecat/services/google/tts.py | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/pipecat/services/aws/tts.py b/src/pipecat/services/aws/tts.py index cc9ce6457..db6e168ab 100644 --- a/src/pipecat/services/aws/tts.py +++ b/src/pipecat/services/aws/tts.py @@ -231,9 +231,9 @@ class PollyTTSService(TTSService): yield TTSStartedFrame() - chunk_size = 8192 - for i in range(0, len(audio_data), chunk_size): - chunk = audio_data[i : i + chunk_size] + CHUNK_SIZE = 1024 + for i in range(0, len(audio_data), CHUNK_SIZE): + chunk = audio_data[i : i + CHUNK_SIZE] if len(chunk) > 0: await self.stop_ttfb_metrics() frame = TTSAudioRawFrame(chunk, self.sample_rate, 1) diff --git a/src/pipecat/services/elevenlabs/tts.py b/src/pipecat/services/elevenlabs/tts.py index d3a066882..cc9a72889 100644 --- a/src/pipecat/services/elevenlabs/tts.py +++ b/src/pipecat/services/elevenlabs/tts.py @@ -550,7 +550,7 @@ class ElevenLabsHttpTTSService(TTSService): if self._settings["optimize_streaming_latency"] is not None: params["optimize_streaming_latency"] = self._settings["optimize_streaming_latency"] - logger.debug(f"ElevenLabs request - payload: {payload}, params: {params}") + logger.debug(f"{self} ElevenLabs request - payload: {payload}, params: {params}") try: await self.start_ttfb_metrics() diff --git a/src/pipecat/services/google/tts.py b/src/pipecat/services/google/tts.py index ef9023a8c..5bfdada21 100644 --- a/src/pipecat/services/google/tts.py +++ b/src/pipecat/services/google/tts.py @@ -346,9 +346,9 @@ class GoogleTTSService(TTSService): audio_content = response.audio_content[44:] # Read and yield audio data in chunks - chunk_size = 8192 - for i in range(0, len(audio_content), chunk_size): - chunk = audio_content[i : i + chunk_size] + CHUNK_SIZE = 1024 + for i in range(0, len(audio_content), CHUNK_SIZE): + chunk = audio_content[i : i + CHUNK_SIZE] if not chunk: break await self.stop_ttfb_metrics()