From 92cc6d39f2728a06d46a527992ccdd1dd5a7c2a0 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Tue, 25 Feb 2025 12:48:38 -0500 Subject: [PATCH] TTSService: Remove newlines before sending text to TTS service to generate --- CHANGELOG.md | 4 ++++ src/pipecat/services/ai_services.py | 3 +++ 2 files changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index aba68530d..72f00278c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- The base `TTSService` class now strips leading newlines before sending text + to the TTS provider. This change is to solve issues where some TTS providers, + like Azure, would not output text due to newlines. + - `GrokLLMSService` now uses `grok-2` as the default model. - `AnthropicLLMService` now uses `claude-3-7-sonnet-20250219` as the default diff --git a/src/pipecat/services/ai_services.py b/src/pipecat/services/ai_services.py index 8f0df1b37..4fdcd0afa 100644 --- a/src/pipecat/services/ai_services.py +++ b/src/pipecat/services/ai_services.py @@ -399,6 +399,9 @@ class TTSService(AIService): await self._push_tts_frames(text) async def _push_tts_frames(self, text: str): + # Remove leading newlines only + text = text.lstrip("\n") + # Don't send only whitespace. This causes problems for some TTS models. But also don't # strip all whitespace, as whitespace can influence prosody. if not text.strip():