diff --git a/CHANGELOG.md b/CHANGELOG.md index 7cda253c7..6a4a64f2d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,10 +12,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added `ElevenLabsRealtimeSTTService` which implements the Realtime STT service from ElevenLabs. -- Added a `TTSService.includes_inter_frame_spaces` property getter, so that TTS - services that subclass `TTSService` can indicate whether the text in the - `TTSTextFrame`s they push already contain any necessary inter-frame spaces. - ### Changed - Updated all STT and TTS services to use consistent error handling pattern with @@ -56,8 +52,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -- Added ai-coustics integrated VAD (`AICVADAnalyzer`) with `AICFilter` factory and - example wiring; leverages the enhancement model for robust detection with no +- Added ai-coustics integrated VAD (`AICVADAnalyzer`) with `AICFilter` factory and + example wiring; leverages the enhancement model for robust detection with no ONNX dependency or added processing complexity. ## [0.0.94] - 2025-11-10 diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py index 6f48f79f7..ddb4e5a14 100644 --- a/src/pipecat/frames/frames.py +++ b/src/pipecat/frames/frames.py @@ -352,7 +352,10 @@ class TextFrame(DataFrame): class LLMTextFrame(TextFrame): """Text frame generated by LLM services.""" - pass + def __post_init__(self): + super().__post_init__() + # LLM services send text frames with all necessary spaces included + self.includes_inter_frame_spaces = True @dataclass diff --git a/src/pipecat/services/anthropic/llm.py b/src/pipecat/services/anthropic/llm.py index a5c9ee791..2e3e0272d 100644 --- a/src/pipecat/services/anthropic/llm.py +++ b/src/pipecat/services/anthropic/llm.py @@ -373,9 +373,7 @@ class AnthropicLLMService(LLMService): if event.type == "content_block_delta": if hasattr(event.delta, "text"): - frame = LLMTextFrame(event.delta.text) - frame.includes_inter_frame_spaces = True - await self.push_frame(frame) + await self.push_frame(LLMTextFrame(event.delta.text)) completion_tokens_estimate += self._estimate_tokens(event.delta.text) elif hasattr(event.delta, "partial_json") and tool_use_block: json_accumulator += event.delta.partial_json diff --git a/src/pipecat/services/asyncai/tts.py b/src/pipecat/services/asyncai/tts.py index 8df597c56..f916d9bdd 100644 --- a/src/pipecat/services/asyncai/tts.py +++ b/src/pipecat/services/asyncai/tts.py @@ -146,15 +146,6 @@ class AsyncAITTSService(InterruptibleTTSService): """ return True - @property - def includes_inter_frame_spaces(self) -> bool: - """Indicates that AsyncAI TTSTextFrames include necessary inter-frame spaces. - - Returns: - True, indicating that AsyncAI's text frames include necessary inter-frame spaces. - """ - return True - def language_to_service_language(self, language: Language) -> Optional[str]: """Convert a Language enum to Async language format. @@ -433,15 +424,6 @@ class AsyncAIHttpTTSService(TTSService): """ return True - @property - def includes_inter_frame_spaces(self) -> bool: - """Indicates that AsyncAI TTSTextFrames include necessary inter-frame spaces. - - Returns: - True, indicating that AsyncAI's text frames include necessary inter-frame spaces. - """ - return True - def language_to_service_language(self, language: Language) -> Optional[str]: """Convert a Language enum to Async language format. diff --git a/src/pipecat/services/aws/llm.py b/src/pipecat/services/aws/llm.py index 147a8c12a..ccbac43b7 100644 --- a/src/pipecat/services/aws/llm.py +++ b/src/pipecat/services/aws/llm.py @@ -1078,9 +1078,7 @@ class AWSBedrockLLMService(LLMService): if "contentBlockDelta" in event: delta = event["contentBlockDelta"]["delta"] if "text" in delta: - frame = LLMTextFrame(delta["text"]) - frame.includes_inter_frame_spaces = True - await self.push_frame(frame) + await self.push_frame(LLMTextFrame(delta["text"])) completion_tokens_estimate += self._estimate_tokens(delta["text"]) elif "toolUse" in delta and "input" in delta["toolUse"]: # Handle partial JSON for tool use diff --git a/src/pipecat/services/aws/tts.py b/src/pipecat/services/aws/tts.py index cbc35b123..f22c42399 100644 --- a/src/pipecat/services/aws/tts.py +++ b/src/pipecat/services/aws/tts.py @@ -209,15 +209,6 @@ class AWSPollyTTSService(TTSService): """ return True - @property - def includes_inter_frame_spaces(self) -> bool: - """Indicates that AWS TTSTextFrames include necessary inter-frame spaces. - - Returns: - True, indicating that AWS's text frames include necessary inter-frame spaces. - """ - return True - def language_to_service_language(self, language: Language) -> Optional[str]: """Convert a Language enum to AWS Polly language format. diff --git a/src/pipecat/services/azure/tts.py b/src/pipecat/services/azure/tts.py index 8ac8b70e3..a1040f312 100644 --- a/src/pipecat/services/azure/tts.py +++ b/src/pipecat/services/azure/tts.py @@ -151,15 +151,6 @@ class AzureBaseTTSService(TTSService): """ return True - @property - def includes_inter_frame_spaces(self) -> bool: - """Indicates that Azure TTSTextFrames include necessary inter-frame spaces. - - Returns: - True, indicating that Azure's text frames include necessary inter-frame spaces. - """ - return True - def language_to_service_language(self, language: Language) -> Optional[str]: """Convert a Language enum to Azure language format. diff --git a/src/pipecat/services/deepgram/tts.py b/src/pipecat/services/deepgram/tts.py index f6045c1f3..f75d40b09 100644 --- a/src/pipecat/services/deepgram/tts.py +++ b/src/pipecat/services/deepgram/tts.py @@ -79,15 +79,6 @@ class DeepgramTTSService(TTSService): """ return True - @property - def includes_inter_frame_spaces(self) -> bool: - """Indicates that Deepgram TTSTextFrames include necessary inter-frame spaces. - - Returns: - True, indicating that Deepgram's text frames include necessary inter-frame spaces. - """ - return True - @traced_tts async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]: """Generate speech from text using Deepgram's TTS API. @@ -177,15 +168,6 @@ class DeepgramHttpTTSService(TTSService): """ return True - @property - def includes_inter_frame_spaces(self) -> bool: - """Indicates that Deepgram TTSTextFrames include necessary inter-frame spaces. - - Returns: - True, indicating that Deepgram's text frames include necessary inter-frame spaces. - """ - return True - @traced_tts async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]: """Generate speech from text using Deepgram's TTS API. diff --git a/src/pipecat/services/fish/tts.py b/src/pipecat/services/fish/tts.py index 0acb12a96..5fe129998 100644 --- a/src/pipecat/services/fish/tts.py +++ b/src/pipecat/services/fish/tts.py @@ -159,15 +159,6 @@ class FishAudioTTSService(InterruptibleTTSService): """ return True - @property - def includes_inter_frame_spaces(self) -> bool: - """Indicates that Fish Audio TTSTextFrames include necessary inter-frame spaces. - - Returns: - True, indicating that Fish Audio's text frames include necessary inter-frame spaces. - """ - return True - async def set_model(self, model: str): """Set the TTS model and reconnect. diff --git a/src/pipecat/services/google/gemini_live/llm.py b/src/pipecat/services/google/gemini_live/llm.py index 11632968e..7e0b0f494 100644 --- a/src/pipecat/services/google/gemini_live/llm.py +++ b/src/pipecat/services/google/gemini_live/llm.py @@ -1452,8 +1452,6 @@ class GeminiLiveLLMService(LLMService): self._bot_text_buffer += text self._search_result_buffer += text # Also accumulate for grounding frame = LLMTextFrame(text=text) - # Gemini Live text already includes any necessary inter-chunk spaces - frame.includes_inter_frame_spaces = True await self.push_frame(frame) # Check for grounding metadata in server content diff --git a/src/pipecat/services/google/llm.py b/src/pipecat/services/google/llm.py index ad5fd70a7..883932b76 100644 --- a/src/pipecat/services/google/llm.py +++ b/src/pipecat/services/google/llm.py @@ -920,9 +920,7 @@ class GoogleLLMService(LLMService): for part in candidate.content.parts: if not part.thought and part.text: search_result += part.text - frame = LLMTextFrame(part.text) - frame.includes_inter_frame_spaces = True - await self.push_frame(frame) + await self.push_frame(LLMTextFrame(part.text)) elif part.function_call: function_call = part.function_call id = function_call.id or str(uuid.uuid4()) diff --git a/src/pipecat/services/google/tts.py b/src/pipecat/services/google/tts.py index 449b2bca2..cf03e2d52 100644 --- a/src/pipecat/services/google/tts.py +++ b/src/pipecat/services/google/tts.py @@ -596,15 +596,6 @@ class GoogleHttpTTSService(TTSService): """ return True - @property - def includes_inter_frame_spaces(self) -> bool: - """Indicates that Google TTSTextFrames include necessary inter-frame spaces. - - Returns: - True, indicating that Google's text frames include necessary inter-frame spaces. - """ - return True - def language_to_service_language(self, language: Language) -> Optional[str]: """Convert a Language enum to Google TTS language format. @@ -803,15 +794,6 @@ class GoogleBaseTTSService(TTSService): """ return True - @property - def includes_inter_frame_spaces(self) -> bool: - """Indicates that Google and Gemini TTSTextFrames include necessary inter-frame spaces. - - Returns: - True, indicating that Google's text frames include necessary inter-frame spaces. - """ - return True - def language_to_service_language(self, language: Language) -> Optional[str]: """Convert a Language enum to Google TTS language format. diff --git a/src/pipecat/services/groq/tts.py b/src/pipecat/services/groq/tts.py index d2dcb73a1..9026c4c4c 100644 --- a/src/pipecat/services/groq/tts.py +++ b/src/pipecat/services/groq/tts.py @@ -111,15 +111,6 @@ class GroqTTSService(TTSService): """ return True - @property - def includes_inter_frame_spaces(self) -> bool: - """Indicates that Groq TTSTextFrames include necessary inter-frame spaces. - - Returns: - True, indicating that Groq's text frames include necessary inter-frame spaces. - """ - return True - @traced_tts async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]: """Generate speech from text using Groq's TTS API. diff --git a/src/pipecat/services/hume/tts.py b/src/pipecat/services/hume/tts.py index 6760c8121..a3a7e9a4c 100644 --- a/src/pipecat/services/hume/tts.py +++ b/src/pipecat/services/hume/tts.py @@ -110,15 +110,6 @@ class HumeTTSService(TTSService): """ return True - @property - def includes_inter_frame_spaces(self) -> bool: - """Indicates that Hume TTSTextFrames include necessary inter-frame spaces. - - Returns: - True, indicating that Hume's text frames include necessary inter-frame spaces. - """ - return True - async def start(self, frame: StartFrame) -> None: """Start the service. diff --git a/src/pipecat/services/inworld/tts.py b/src/pipecat/services/inworld/tts.py index 067ea6ab6..dc2282b91 100644 --- a/src/pipecat/services/inworld/tts.py +++ b/src/pipecat/services/inworld/tts.py @@ -250,15 +250,6 @@ class InworldTTSService(TTSService): """ return True - @property - def includes_inter_frame_spaces(self) -> bool: - """Indicates that Inworld TTSTextFrames include necessary inter-frame spaces. - - Returns: - True, indicating that Inworld's text frames include necessary inter-frame spaces. - """ - return True - async def start(self, frame: StartFrame): """Start the Inworld TTS service. diff --git a/src/pipecat/services/lmnt/tts.py b/src/pipecat/services/lmnt/tts.py index d98d3f76c..ebcad0f20 100644 --- a/src/pipecat/services/lmnt/tts.py +++ b/src/pipecat/services/lmnt/tts.py @@ -124,15 +124,6 @@ class LmntTTSService(InterruptibleTTSService): """ return True - @property - def includes_inter_frame_spaces(self) -> bool: - """Indicates that LMNT TTSTextFrames include necessary inter-frame spaces. - - Returns: - True, indicating that LMNT's text frames include necessary inter-frame spaces. - """ - return True - def language_to_service_language(self, language: Language) -> Optional[str]: """Convert a Language enum to LMNT service language format. diff --git a/src/pipecat/services/minimax/tts.py b/src/pipecat/services/minimax/tts.py index b8e984eeb..05e2dac3b 100644 --- a/src/pipecat/services/minimax/tts.py +++ b/src/pipecat/services/minimax/tts.py @@ -194,15 +194,6 @@ class MiniMaxHttpTTSService(TTSService): """ return True - @property - def includes_inter_frame_spaces(self) -> bool: - """Indicates that MiniMax TTSTextFrames include necessary inter-frame spaces. - - Returns: - True, indicating that MiniMax's text frames include necessary inter-frame spaces. - """ - return True - def language_to_service_language(self, language: Language) -> Optional[str]: """Convert a Language enum to MiniMax service language format. diff --git a/src/pipecat/services/neuphonic/tts.py b/src/pipecat/services/neuphonic/tts.py index 992abbe67..60b0ebcb1 100644 --- a/src/pipecat/services/neuphonic/tts.py +++ b/src/pipecat/services/neuphonic/tts.py @@ -151,15 +151,6 @@ class NeuphonicTTSService(InterruptibleTTSService): """ return True - @property - def includes_inter_frame_spaces(self) -> bool: - """Indicates that Neuphonic TTSTextFrames include necessary inter-frame spaces. - - Returns: - True, indicating that Neuphonic's text frames include necessary inter-frame spaces. - """ - return True - def language_to_service_language(self, language: Language) -> Optional[str]: """Convert a Language enum to Neuphonic service language format. @@ -449,15 +440,6 @@ class NeuphonicHttpTTSService(TTSService): """ return True - @property - def includes_inter_frame_spaces(self) -> bool: - """Indicates that Neuphonic TTSTextFrames include necessary inter-frame spaces. - - Returns: - True, indicating that Neuphonic's text frames include necessary inter-frame spaces. - """ - return True - def language_to_service_language(self, language: Language) -> Optional[str]: """Convert a Language enum to Neuphonic service language format. diff --git a/src/pipecat/services/openai/base_llm.py b/src/pipecat/services/openai/base_llm.py index 5a8c1ab31..d020e1106 100644 --- a/src/pipecat/services/openai/base_llm.py +++ b/src/pipecat/services/openai/base_llm.py @@ -390,9 +390,7 @@ class BaseOpenAILLMService(LLMService): # Keep iterating through the response to collect all the argument fragments arguments += tool_call.function.arguments elif chunk.choices[0].delta.content: - frame = LLMTextFrame(chunk.choices[0].delta.content) - frame.includes_inter_frame_spaces = True - await self.push_frame(frame) + await self.push_frame(LLMTextFrame(chunk.choices[0].delta.content)) # When gpt-4o-audio / gpt-4o-mini-audio is used for llm or stt+llm # we need to get LLMTextFrame for the transcript diff --git a/src/pipecat/services/openai/realtime/llm.py b/src/pipecat/services/openai/realtime/llm.py index f66e6e8e1..8eaa3d6fa 100644 --- a/src/pipecat/services/openai/realtime/llm.py +++ b/src/pipecat/services/openai/realtime/llm.py @@ -678,8 +678,6 @@ class OpenAIRealtimeLLMService(LLMService): # the output modality is "text" if evt.delta: frame = LLMTextFrame(evt.delta) - # OpenAI Realtime text already includes any necessary inter-chunk spaces - frame.includes_inter_frame_spaces = True await self.push_frame(frame) async def _handle_evt_audio_transcript_delta(self, evt): diff --git a/src/pipecat/services/openai/tts.py b/src/pipecat/services/openai/tts.py index a5231170e..23cb75324 100644 --- a/src/pipecat/services/openai/tts.py +++ b/src/pipecat/services/openai/tts.py @@ -131,15 +131,6 @@ class OpenAITTSService(TTSService): """ return True - @property - def includes_inter_frame_spaces(self) -> bool: - """Indicates that OpenAI TTSTextFrames include necessary inter-frame spaces. - - Returns: - True, indicating that OpenAI's text frames include necessary inter-frame spaces. - """ - return True - async def set_model(self, model: str): """Set the TTS model to use. diff --git a/src/pipecat/services/piper/tts.py b/src/pipecat/services/piper/tts.py index 3752418b5..dd842ff11 100644 --- a/src/pipecat/services/piper/tts.py +++ b/src/pipecat/services/piper/tts.py @@ -66,15 +66,6 @@ class PiperTTSService(TTSService): """ return True - @property - def includes_inter_frame_spaces(self) -> bool: - """Indicates that Piper TTSTextFrames include necessary inter-frame spaces. - - Returns: - True, indicating that Piper's text frames include necessary inter-frame spaces. - """ - return True - @traced_tts async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]: """Generate speech from text using Piper's HTTP API. diff --git a/src/pipecat/services/rime/tts.py b/src/pipecat/services/rime/tts.py index 329aecd7d..7b62f20fa 100644 --- a/src/pipecat/services/rime/tts.py +++ b/src/pipecat/services/rime/tts.py @@ -501,15 +501,6 @@ class RimeHttpTTSService(TTSService): """ return True - @property - def includes_inter_frame_spaces(self) -> bool: - """Indicates that Rime TTSTextFrames include necessary inter-frame spaces. - - Returns: - True, indicating that Rime's text frames include necessary inter-frame spaces. - """ - return True - def language_to_service_language(self, language: Language) -> str | None: """Convert pipecat language to Rime language code. diff --git a/src/pipecat/services/riva/tts.py b/src/pipecat/services/riva/tts.py index 07aa4d105..6de6f9332 100644 --- a/src/pipecat/services/riva/tts.py +++ b/src/pipecat/services/riva/tts.py @@ -113,15 +113,6 @@ class RivaTTSService(TTSService): riva.client.proto.riva_tts_pb2.RivaSynthesisConfigRequest() ) - @property - def includes_inter_frame_spaces(self) -> bool: - """Indicates that Riva TTSTextFrames include necessary inter-frame spaces. - - Returns: - True, indicating that Riva's text frames include necessary inter-frame spaces. - """ - return True - async def set_model(self, model: str): """Attempt to set the TTS model. diff --git a/src/pipecat/services/sambanova/llm.py b/src/pipecat/services/sambanova/llm.py index 76f11e81c..5ed600457 100644 --- a/src/pipecat/services/sambanova/llm.py +++ b/src/pipecat/services/sambanova/llm.py @@ -176,9 +176,7 @@ class SambaNovaLLMService(OpenAILLMService): # type: ignore # Keep iterating through the response to collect all the argument fragments arguments += tool_call.function.arguments elif chunk.choices[0].delta.content: - frame = LLMTextFrame(chunk.choices[0].delta.content) - frame.includes_inter_frame_spaces = True - await self.push_frame(frame) + await self.push_frame(LLMTextFrame(chunk.choices[0].delta.content)) # When gpt-4o-audio / gpt-4o-mini-audio is used for llm or stt+llm # we need to get LLMTextFrame for the transcript diff --git a/src/pipecat/services/sarvam/tts.py b/src/pipecat/services/sarvam/tts.py index 6ce45d27d..127a0d589 100644 --- a/src/pipecat/services/sarvam/tts.py +++ b/src/pipecat/services/sarvam/tts.py @@ -195,15 +195,6 @@ class SarvamHttpTTSService(TTSService): """ return True - @property - def includes_inter_frame_spaces(self) -> bool: - """Indicates that Sarvam TTSTextFrames include necessary inter-frame spaces. - - Returns: - True, indicating that Sarvam's text frames include necessary inter-frame spaces. - """ - return True - def language_to_service_language(self, language: Language) -> Optional[str]: """Convert a Language enum to Sarvam AI language format. @@ -467,15 +458,6 @@ class SarvamTTSService(InterruptibleTTSService): """ return True - @property - def includes_inter_frame_spaces(self) -> bool: - """Indicates that Sarvam TTSTextFrames include necessary inter-frame spaces. - - Returns: - True, indicating that Sarvam's text frames include necessary inter-frame spaces. - """ - return True - def language_to_service_language(self, language: Language) -> Optional[str]: """Convert a Language enum to Sarvam AI language format. diff --git a/src/pipecat/services/speechmatics/tts.py b/src/pipecat/services/speechmatics/tts.py index e115d5a7c..b8fe172e7 100644 --- a/src/pipecat/services/speechmatics/tts.py +++ b/src/pipecat/services/speechmatics/tts.py @@ -105,15 +105,6 @@ class SpeechmaticsTTSService(TTSService): """ return True - @property - def includes_inter_frame_spaces(self) -> bool: - """Indicates that Speechmatics TTSTextFrames include necessary inter-frame spaces. - - Returns: - True, indicating that Speechmatics's text frames include necessary inter-frame spaces. - """ - return True - @traced_tts async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]: """Generate speech from text using Speechmatics' HTTP API. diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py index 29c54f497..f0d602a40 100644 --- a/src/pipecat/services/tts_service.py +++ b/src/pipecat/services/tts_service.py @@ -142,6 +142,7 @@ class TTSService(AIService): self._voice_id: str = "" self._settings: Dict[str, Any] = {} self._text_aggregator: BaseTextAggregator = text_aggregator or SimpleTextAggregator() + self._aggregated_text_includes_inter_frame_spaces: bool = False self._text_filters: Sequence[BaseTextFilter] = text_filters or [] self._transport_destination: Optional[str] = transport_destination self._tracing_enabled: bool = False @@ -192,23 +193,6 @@ class TTSService(AIService): CHUNK_SECONDS = 0.5 return int(self.sample_rate * CHUNK_SECONDS * 2) # 2 bytes/sample - @property - def includes_inter_frame_spaces(self) -> bool: - """Indicates whether TTSTextFrames include necesary inter-frame spaces. - - When True, the TTSTextFrame objects pushed by this service already - include all necessary spaces between subsequent frames. When False, - downstream processors (like the assistant context aggregator) may need - to add spacing. - - Subclasses should override this property to return True if their text - generation process already includes necessary inter-frame spaces. - - Returns: - False by default. Subclasses can override to return True. - """ - return False - async def set_model(self, model: str): """Set the TTS model to use. @@ -369,9 +353,16 @@ class TTSService(AIService): await self._maybe_pause_frame_processing() sentence = self._text_aggregator.text + includes_inter_frame_spaces = self._aggregated_text_includes_inter_frame_spaces + + # Reset aggregator state await self._text_aggregator.reset() self._processing_text = False - await self._push_tts_frames(sentence) + self._aggregated_text_includes_inter_frame_spaces = False + + await self._push_tts_frames( + sentence, includes_inter_frame_spaces=includes_inter_frame_spaces + ) if isinstance(frame, LLMFullResponseEndFrame): if self._push_text_frames: await self.push_frame(frame, direction) @@ -380,7 +371,8 @@ class TTSService(AIService): elif isinstance(frame, TTSSpeakFrame): # Store if we were processing text or not so we can set it back. processing_text = self._processing_text - await self._push_tts_frames(frame.text) + # Assumption: text in TTSSpeakFrame does not include inter-frame spaces + await self._push_tts_frames(frame.text, includes_inter_frame_spaces=False) # We pause processing incoming frames because we are sending data to # the TTS. We pause to avoid audio overlapping. await self._maybe_pause_frame_processing() @@ -474,11 +466,17 @@ class TTSService(AIService): text = frame.text else: text = await self._text_aggregator.aggregate(frame.text) + # Assumption: whether inter-frame spaces are included shouldn't + # change during aggregation, so we can just use the latest frame's + # value + self._aggregated_text_includes_inter_frame_spaces = frame.includes_inter_frame_spaces if text: - await self._push_tts_frames(text) + await self._push_tts_frames( + text, includes_inter_frame_spaces=frame.includes_inter_frame_spaces + ) - async def _push_tts_frames(self, text: str): + async def _push_tts_frames(self, text: str, includes_inter_frame_spaces: bool): # Remove leading newlines only text = text.lstrip("\n") @@ -508,7 +506,7 @@ class TTSService(AIService): # We send the original text after the audio. This way, if we are # interrupted, the text is not added to the assistant context. frame = TTSTextFrame(text) - frame.includes_inter_frame_spaces = self.includes_inter_frame_spaces + frame.includes_inter_frame_spaces = includes_inter_frame_spaces await self.push_frame(frame) async def _stop_frame_handler(self): @@ -635,6 +633,8 @@ class WordTTSService(TTSService): frame = TTSStoppedFrame() frame.pts = last_pts else: + # Assumption: word-by-word text frames don't include spaces, so + # we can rely on the default includes_inter_frame_spaces=False frame = TTSTextFrame(word) frame.pts = self._initial_word_timestamp + timestamp if frame: