Merge pull request #3247 from pipecat-ai/mb/strip-whitespace-simple-text-agg

SimpleTextAggregator: Strip whitespace in the returned aggregation
This commit is contained in:
Mark Backman
2025-12-17 11:43:37 -05:00
committed by GitHub
2 changed files with 4 additions and 3 deletions

1
changelog/3247.fixed.md Normal file
View File

@@ -0,0 +1 @@
- Fixed an issue in `SimpleTextAggreagtor` where spaces were not being stripped before returning the aggregation. This resulted in an extra space for TTS services that don't support word-timestamp alignment data.

View File

@@ -40,7 +40,7 @@ class SimpleTextAggregator(BaseTextAggregator):
Returns:
The text that has been accumulated in the buffer.
"""
return Aggregation(text=self._text.strip(), type=AggregationType.SENTENCE)
return Aggregation(text=self._text.strip(" "), type=AggregationType.SENTENCE)
async def aggregate(self, text: str) -> AsyncIterator[Aggregation]:
"""Aggregate text and yield completed sentences.
@@ -97,7 +97,7 @@ class SimpleTextAggregator(BaseTextAggregator):
# NLTK confirmed a sentence - return it
result = self._text[:eos_marker]
self._text = self._text[eos_marker:]
return Aggregation(text=result, type=AggregationType.SENTENCE)
return Aggregation(text=result.strip(" "), type=AggregationType.SENTENCE)
# No sentence found - keep accumulating
return None
# Still whitespace, keep waiting
@@ -123,7 +123,7 @@ class SimpleTextAggregator(BaseTextAggregator):
# Return whatever we have in the buffer
result = self._text
await self.reset()
return Aggregation(text=result.strip(), type=AggregationType.SENTENCE)
return Aggregation(text=result.strip(" "), type=AggregationType.SENTENCE)
return None
async def handle_interruption(self):