diff --git a/changelog/3247.fixed.md b/changelog/3247.fixed.md new file mode 100644 index 000000000..a3b5d80f7 --- /dev/null +++ b/changelog/3247.fixed.md @@ -0,0 +1 @@ +- Fixed an issue in `SimpleTextAggreagtor` where spaces were not being stripped before returning the aggregation. This resulted in an extra space for TTS services that don't support word-timestamp alignment data. diff --git a/src/pipecat/utils/text/simple_text_aggregator.py b/src/pipecat/utils/text/simple_text_aggregator.py index 1d123e8fb..e58b785fc 100644 --- a/src/pipecat/utils/text/simple_text_aggregator.py +++ b/src/pipecat/utils/text/simple_text_aggregator.py @@ -40,7 +40,7 @@ class SimpleTextAggregator(BaseTextAggregator): Returns: The text that has been accumulated in the buffer. """ - return Aggregation(text=self._text.strip(), type=AggregationType.SENTENCE) + return Aggregation(text=self._text.strip(" "), type=AggregationType.SENTENCE) async def aggregate(self, text: str) -> AsyncIterator[Aggregation]: """Aggregate text and yield completed sentences. @@ -97,7 +97,7 @@ class SimpleTextAggregator(BaseTextAggregator): # NLTK confirmed a sentence - return it result = self._text[:eos_marker] self._text = self._text[eos_marker:] - return Aggregation(text=result, type=AggregationType.SENTENCE) + return Aggregation(text=result.strip(" "), type=AggregationType.SENTENCE) # No sentence found - keep accumulating return None # Still whitespace, keep waiting @@ -123,7 +123,7 @@ class SimpleTextAggregator(BaseTextAggregator): # Return whatever we have in the buffer result = self._text await self.reset() - return Aggregation(text=result.strip(), type=AggregationType.SENTENCE) + return Aggregation(text=result.strip(" "), type=AggregationType.SENTENCE) return None async def handle_interruption(self):