Merge pull request #3041 from pipecat-ai/pk/apply-includes-inter-frame-spaces-wherever-necessary

Apply `includes_inter_frame_spaces = True` in all LLM and TTS service…
2025-11-12 16:49:14 -05:00
parent 203a627707 5222ff99de
commit 4c25555396
28 changed files with 527 additions and 10 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,8 +7,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ## [Unreleased]

+### Added
+
+- Added a `TTSService.includes_inter_frame_spaces` property getter, so that TTS
+  services that subclass `TTSService` can indicate whether the text in the
+  `TTSTextFrame`s they push already contain any necessary inter-frame spaces.
+
 ### Fixed

+- Fixed subtle issue of assistant context messages ending up with double spaces
+  between words or sentences.
+
+- Fixed an issue where `NeuphonicTTSService` wasn't pushing `TTSTextFrame`s,
+  meaning assistant messages weren't being written to context.
+
 - Fixed an issue with OpenTelemetry where tracing wasn't correctly displaying
  LLM completions and tools when using the universal `LLMContext`.

--- a/examples/foundational/07f-interruptible-azure-http.py
+++ b/examples/foundational/07f-interruptible-azure-http.py
@@ -0,0 +1,135 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+
+import os
+
+from dotenv import load_dotenv
+from loguru import logger
+
+from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
+from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.audio.vad.vad_analyzer import VADParams
+from pipecat.frames.frames import LLMRunFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.runner.types import RunnerArguments
+from pipecat.runner.utils import create_transport
+from pipecat.services.azure.llm import AzureLLMService
+from pipecat.services.azure.stt import AzureSTTService
+from pipecat.services.azure.tts import AzureHttpTTSService
+from pipecat.transports.base_transport import BaseTransport, TransportParams
+from pipecat.transports.daily.transport import DailyParams
+from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
+
+load_dotenv(override=True)
+
+# We store functions so objects (e.g. SileroVADAnalyzer) don't get
+# instantiated. The function will be called when the desired transport gets
+# selected.
+transport_params = {
+    "daily": lambda: DailyParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "twilio": lambda: FastAPIWebsocketParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "webrtc": lambda: TransportParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+}
+
+
+async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
+    logger.info(f"Starting bot")
+
+    stt = AzureSTTService(
+        api_key=os.getenv("AZURE_SPEECH_API_KEY"),
+        region=os.getenv("AZURE_SPEECH_REGION"),
+    )
+
+    tts = AzureHttpTTSService(
+        api_key=os.getenv("AZURE_SPEECH_API_KEY"),
+        region=os.getenv("AZURE_SPEECH_REGION"),
+    )
+
+    llm = AzureLLMService(
+        api_key=os.getenv("AZURE_CHATGPT_API_KEY"),
+        endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"),
+        model=os.getenv("AZURE_CHATGPT_MODEL"),
+    )
+
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
+        },
+    ]
+
+    context = LLMContext(messages)
+    context_aggregator = LLMContextAggregatorPair(context)
+
+    pipeline = Pipeline(
+        [
+            transport.input(),  # Transport user input
+            stt,  # STT
+            context_aggregator.user(),  # User responses
+            llm,  # LLM
+            tts,  # TTS
+            transport.output(),  # Transport bot output
+            context_aggregator.assistant(),  # Assistant spoken responses
+        ]
+    )
+
+    task = PipelineTask(
+        pipeline,
+        params=PipelineParams(
+            enable_metrics=True,
+            enable_usage_metrics=True,
+        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )
+
+    @transport.event_handler("on_client_connected")
+    async def on_client_connected(transport, client):
+        logger.info(f"Client connected")
+        # Kick off the conversation.
+        messages.append({"role": "system", "content": "Please introduce yourself to the user."})
+        await task.queue_frames([LLMRunFrame()])
+
+    @transport.event_handler("on_client_disconnected")
+    async def on_client_disconnected(transport, client):
+        logger.info(f"Client disconnected")
+        await task.cancel()
+
+    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+    await runner.run(task)
+
+
+async def bot(runner_args: RunnerArguments):
+    """Main bot entry point compatible with Pipecat Cloud."""
+    transport = await create_transport(runner_args, transport_params)
+    await run_bot(transport, runner_args)
+
+
+if __name__ == "__main__":
+    from pipecat.runner.run import main
+
+    main()
--- a/examples/foundational/07n-interruptible-google-http.py
+++ b/examples/foundational/07n-interruptible-google-http.py
@@ -0,0 +1,139 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+
+import os
+
+from dotenv import load_dotenv
+from loguru import logger
+
+from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
+from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.audio.vad.vad_analyzer import VADParams
+from pipecat.frames.frames import LLMRunFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
+from pipecat.runner.types import RunnerArguments
+from pipecat.runner.utils import create_transport
+from pipecat.services.google.llm import GoogleLLMService
+from pipecat.services.google.stt import GoogleSTTService
+from pipecat.services.google.tts import GoogleHttpTTSService, GoogleTTSService
+from pipecat.transcriptions.language import Language
+from pipecat.transports.base_transport import BaseTransport, TransportParams
+from pipecat.transports.daily.transport import DailyParams
+from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
+
+load_dotenv(override=True)
+
+# We store functions so objects (e.g. SileroVADAnalyzer) don't get
+# instantiated. The function will be called when the desired transport gets
+# selected.
+transport_params = {
+    "daily": lambda: DailyParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "twilio": lambda: FastAPIWebsocketParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+    "webrtc": lambda: TransportParams(
+        audio_in_enabled=True,
+        audio_out_enabled=True,
+        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
+    ),
+}
+
+
+async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
+    logger.info(f"Starting bot")
+
+    stt = GoogleSTTService(
+        params=GoogleSTTService.InputParams(languages=Language.EN_US, model="chirp_3"),
+        credentials=os.getenv("GOOGLE_TEST_CREDENTIALS"),
+        location="us",
+    )
+
+    tts = GoogleHttpTTSService(
+        voice_id="en-US-Chirp3-HD-Charon",
+        params=GoogleHttpTTSService.InputParams(language=Language.EN_US),
+        credentials=os.getenv("GOOGLE_TEST_CREDENTIALS"),
+    )
+
+    llm = GoogleLLMService(
+        api_key=os.getenv("GOOGLE_API_KEY"),
+        model="gemini-2.5-flash",
+        # turn on thinking if you want it
+        # params=GoogleLLMService.InputParams(extra={"thinking_config": {"thinking_budget": 4096}}),)
+    )
+
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
+        },
+    ]
+
+    context = LLMContext(messages)
+    context_aggregator = LLMContextAggregatorPair(context)
+
+    pipeline = Pipeline(
+        [
+            transport.input(),  # Transport user input
+            stt,  # STT
+            context_aggregator.user(),  # User respones
+            llm,  # LLM
+            tts,  # TTS
+            transport.output(),  # Transport bot output
+            context_aggregator.assistant(),  # Assistant spoken responses
+        ]
+    )
+
+    task = PipelineTask(
+        pipeline,
+        params=PipelineParams(
+            enable_metrics=True,
+            enable_usage_metrics=True,
+        ),
+        idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
+    )
+
+    @transport.event_handler("on_client_connected")
+    async def on_client_connected(transport, client):
+        logger.info(f"Client connected")
+        # Kick off the conversation.
+        messages.append({"role": "system", "content": "Please introduce yourself to the user."})
+        await task.queue_frames([LLMRunFrame()])
+
+    @transport.event_handler("on_client_disconnected")
+    async def on_client_disconnected(transport, client):
+        logger.info(f"Client disconnected")
+        await task.cancel()
+
+    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
+
+    await runner.run(task)
+
+
+async def bot(runner_args: RunnerArguments):
+    """Main bot entry point compatible with Pipecat Cloud."""
+    transport = await create_transport(runner_args, transport_params)
+    await run_bot(transport, runner_args)
+
+
+if __name__ == "__main__":
+    from pipecat.runner.run import main
+
+    main()
--- a/examples/foundational/07z-interruptible-sarvam-http.py
+++ b/examples/foundational/07z-interruptible-sarvam-http.py
@@ -15,6 +15,7 @@ from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
 from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
 from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.audio.vad.vad_analyzer import VADParams
+from pipecat.frames.frames import LLMRunFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -112,7 +113,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
            logger.info(f"Client connected")
            # Kick off the conversation.
            messages.append({"role": "system", "content": "Please introduce yourself to the user."})
-            await task.queue_frames([context_aggregator.user().get_context_frame()])
+            await task.queue_frames([LLMRunFrame()])

        @transport.event_handler("on_client_disconnected")
        async def on_client_disconnected(transport, client):
--- a/src/pipecat/services/anthropic/llm.py
+++ b/src/pipecat/services/anthropic/llm.py
@@ -373,7 +373,9 @@ class AnthropicLLMService(LLMService):

                if event.type == "content_block_delta":
                    if hasattr(event.delta, "text"):
-                        await self.push_frame(LLMTextFrame(event.delta.text))
+                        frame = LLMTextFrame(event.delta.text)
+                        frame.includes_inter_frame_spaces = True
+                        await self.push_frame(frame)
                        completion_tokens_estimate += self._estimate_tokens(event.delta.text)
                    elif hasattr(event.delta, "partial_json") and tool_use_block:
                        json_accumulator += event.delta.partial_json
--- a/src/pipecat/services/asyncai/tts.py
+++ b/src/pipecat/services/asyncai/tts.py
@@ -146,6 +146,15 @@ class AsyncAITTSService(InterruptibleTTSService):
        """
        return True

+    @property
+    def includes_inter_frame_spaces(self) -> bool:
+        """Indicates that AsyncAI TTSTextFrames include necessary inter-frame spaces.
+
+        Returns:
+            True, indicating that AsyncAI's text frames include necessary inter-frame spaces.
+        """
+        return True
+
    def language_to_service_language(self, language: Language) -> Optional[str]:
        """Convert a Language enum to Async language format.

@@ -420,6 +429,15 @@ class AsyncAIHttpTTSService(TTSService):
        """
        return True

+    @property
+    def includes_inter_frame_spaces(self) -> bool:
+        """Indicates that AsyncAI TTSTextFrames include necessary inter-frame spaces.
+
+        Returns:
+            True, indicating that AsyncAI's text frames include necessary inter-frame spaces.
+        """
+        return True
+
    def language_to_service_language(self, language: Language) -> Optional[str]:
        """Convert a Language enum to Async language format.

--- a/src/pipecat/services/aws/llm.py
+++ b/src/pipecat/services/aws/llm.py
@@ -1078,7 +1078,9 @@ class AWSBedrockLLMService(LLMService):
                    if "contentBlockDelta" in event:
                        delta = event["contentBlockDelta"]["delta"]
                        if "text" in delta:
-                            await self.push_frame(LLMTextFrame(delta["text"]))
+                            frame = LLMTextFrame(delta["text"])
+                            frame.includes_inter_frame_spaces = True
+                            await self.push_frame(frame)
                            completion_tokens_estimate += self._estimate_tokens(delta["text"])
                        elif "toolUse" in delta and "input" in delta["toolUse"]:
                            # Handle partial JSON for tool use
--- a/src/pipecat/services/aws/nova_sonic/llm.py
+++ b/src/pipecat/services/aws/nova_sonic/llm.py
@@ -1027,7 +1027,9 @@ class AWSNovaSonicLLMService(LLMService):
        logger.debug(f"Assistant response text added: {text}")

        # Report the text of the assistant response.
-        await self.push_frame(TTSTextFrame(text))
+        frame = TTSTextFrame(text)
+        frame.includes_inter_frame_spaces = True
+        await self.push_frame(frame)

        # HACK: here we're also buffering the assistant text ourselves as a
        # backup rather than relying solely on the assistant context aggregator
@@ -1060,7 +1062,9 @@ class AWSNovaSonicLLMService(LLMService):
                # TTSTextFrame would be ignored otherwise (the interruption frame
                # would have cleared the assistant aggregator state).
                await self.push_frame(LLMFullResponseStartFrame())
-                await self.push_frame(TTSTextFrame(self._assistant_text_buffer))
+                frame = TTSTextFrame(self._assistant_text_buffer)
+                frame.includes_inter_frame_spaces = True
+                await self.push_frame(frame)
            self._may_need_repush_assistant_text = False

        # Report the end of the assistant response.
--- a/src/pipecat/services/aws/tts.py
+++ b/src/pipecat/services/aws/tts.py
@@ -209,6 +209,15 @@ class AWSPollyTTSService(TTSService):
        """
        return True

+    @property
+    def includes_inter_frame_spaces(self) -> bool:
+        """Indicates that AWS TTSTextFrames include necessary inter-frame spaces.
+
+        Returns:
+            True, indicating that AWS's text frames include necessary inter-frame spaces.
+        """
+        return True
+
    def language_to_service_language(self, language: Language) -> Optional[str]:
        """Convert a Language enum to AWS Polly language format.

--- a/src/pipecat/services/azure/tts.py
+++ b/src/pipecat/services/azure/tts.py
@@ -151,6 +151,15 @@ class AzureBaseTTSService(TTSService):
        """
        return True

+    @property
+    def includes_inter_frame_spaces(self) -> bool:
+        """Indicates that Azure TTSTextFrames include necessary inter-frame spaces.
+
+        Returns:
+            True, indicating that Azure's text frames include necessary inter-frame spaces.
+        """
+        return True
+
    def language_to_service_language(self, language: Language) -> Optional[str]:
        """Convert a Language enum to Azure language format.

--- a/src/pipecat/services/deepgram/tts.py
+++ b/src/pipecat/services/deepgram/tts.py
@@ -79,6 +79,15 @@ class DeepgramTTSService(TTSService):
        """
        return True

+    @property
+    def includes_inter_frame_spaces(self) -> bool:
+        """Indicates that Deepgram TTSTextFrames include necessary inter-frame spaces.
+
+        Returns:
+            True, indicating that Deepgram's text frames include necessary inter-frame spaces.
+        """
+        return True
+
    @traced_tts
    async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
        """Generate speech from text using Deepgram's TTS API.
@@ -168,6 +177,15 @@ class DeepgramHttpTTSService(TTSService):
        """
        return True

+    @property
+    def includes_inter_frame_spaces(self) -> bool:
+        """Indicates that Deepgram TTSTextFrames include necessary inter-frame spaces.
+
+        Returns:
+            True, indicating that Deepgram's text frames include necessary inter-frame spaces.
+        """
+        return True
+
    @traced_tts
    async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
        """Generate speech from text using Deepgram's TTS API.
--- a/src/pipecat/services/fish/tts.py
+++ b/src/pipecat/services/fish/tts.py
@@ -159,6 +159,15 @@ class FishAudioTTSService(InterruptibleTTSService):
        """
        return True

+    @property
+    def includes_inter_frame_spaces(self) -> bool:
+        """Indicates that Fish Audio TTSTextFrames include necessary inter-frame spaces.
+
+        Returns:
+            True, indicating that Fish Audio's text frames include necessary inter-frame spaces.
+        """
+        return True
+
    async def set_model(self, model: str):
        """Set the TTS model and reconnect.

--- a/src/pipecat/services/google/llm.py
+++ b/src/pipecat/services/google/llm.py
@@ -920,7 +920,9 @@ class GoogleLLMService(LLMService):
                        for part in candidate.content.parts:
                            if not part.thought and part.text:
                                search_result += part.text
-                                await self.push_frame(LLMTextFrame(part.text))
+                                frame = LLMTextFrame(part.text)
+                                frame.includes_inter_frame_spaces = True
+                                await self.push_frame(frame)
                            elif part.function_call:
                                function_call = part.function_call
                                id = function_call.id or str(uuid.uuid4())
--- a/src/pipecat/services/google/tts.py
+++ b/src/pipecat/services/google/tts.py
@@ -606,6 +606,15 @@ class GoogleTTSService(TTSService):
        """
        return True

+    @property
+    def includes_inter_frame_spaces(self) -> bool:
+        """Indicates that Google TTSTextFrames include necessary inter-frame spaces.
+
+        Returns:
+            True, indicating that Google's text frames include necessary inter-frame spaces.
+        """
+        return True
+
    def language_to_service_language(self, language: Language) -> Optional[str]:
        """Convert a Language enum to Google TTS language format.

@@ -840,6 +849,15 @@ class GeminiTTSService(TTSService):
        """
        return True

+    @property
+    def includes_inter_frame_spaces(self) -> bool:
+        """Indicates that Gemini TTSTextFrames include necessary inter-frame spaces.
+
+        Returns:
+            True, indicating that Gemini's text frames include necessary inter-frame spaces.
+        """
+        return True
+
    def language_to_service_language(self, language: Language) -> Optional[str]:
        """Convert a Language enum to Gemini TTS language format.

--- a/src/pipecat/services/groq/tts.py
+++ b/src/pipecat/services/groq/tts.py
@@ -105,6 +105,15 @@ class GroqTTSService(TTSService):
        """
        return True

+    @property
+    def includes_inter_frame_spaces(self) -> bool:
+        """Indicates that Groq TTSTextFrames include necessary inter-frame spaces.
+
+        Returns:
+            True, indicating that Groq's text frames include necessary inter-frame spaces.
+        """
+        return True
+
    @traced_tts
    async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
        """Generate speech from text using Groq's TTS API.
--- a/src/pipecat/services/hume/tts.py
+++ b/src/pipecat/services/hume/tts.py
@@ -110,6 +110,15 @@ class HumeTTSService(TTSService):
        """
        return True

+    @property
+    def includes_inter_frame_spaces(self) -> bool:
+        """Indicates that Hume TTSTextFrames include necessary inter-frame spaces.
+
+        Returns:
+            True, indicating that Hume's text frames include necessary inter-frame spaces.
+        """
+        return True
+
    async def start(self, frame: StartFrame) -> None:
        """Start the service.

--- a/src/pipecat/services/inworld/tts.py
+++ b/src/pipecat/services/inworld/tts.py
@@ -250,6 +250,15 @@ class InworldTTSService(TTSService):
        """
        return True

+    @property
+    def includes_inter_frame_spaces(self) -> bool:
+        """Indicates that Inworld TTSTextFrames include necessary inter-frame spaces.
+
+        Returns:
+            True, indicating that Inworld's text frames include necessary inter-frame spaces.
+        """
+        return True
+
    async def start(self, frame: StartFrame):
        """Start the Inworld TTS service.

--- a/src/pipecat/services/lmnt/tts.py
+++ b/src/pipecat/services/lmnt/tts.py
@@ -124,6 +124,15 @@ class LmntTTSService(InterruptibleTTSService):
        """
        return True

+    @property
+    def includes_inter_frame_spaces(self) -> bool:
+        """Indicates that LMNT TTSTextFrames include necessary inter-frame spaces.
+
+        Returns:
+            True, indicating that LMNT's text frames include necessary inter-frame spaces.
+        """
+        return True
+
    def language_to_service_language(self, language: Language) -> Optional[str]:
        """Convert a Language enum to LMNT service language format.

--- a/src/pipecat/services/minimax/tts.py
+++ b/src/pipecat/services/minimax/tts.py
@@ -194,6 +194,15 @@ class MiniMaxHttpTTSService(TTSService):
        """
        return True

+    @property
+    def includes_inter_frame_spaces(self) -> bool:
+        """Indicates that MiniMax TTSTextFrames include necessary inter-frame spaces.
+
+        Returns:
+            True, indicating that MiniMax's text frames include necessary inter-frame spaces.
+        """
+        return True
+
    def language_to_service_language(self, language: Language) -> Optional[str]:
        """Convert a Language enum to MiniMax service language format.

--- a/src/pipecat/services/neuphonic/tts.py
+++ b/src/pipecat/services/neuphonic/tts.py
@@ -117,7 +117,6 @@ class NeuphonicTTSService(InterruptibleTTSService):
        """
        super().__init__(
            aggregate_sentences=aggregate_sentences,
-            push_text_frames=False,
            push_stop_frames=True,
            stop_frame_timeout_s=2.0,
            sample_rate=sample_rate,
@@ -152,6 +151,15 @@ class NeuphonicTTSService(InterruptibleTTSService):
        """
        return True

+    @property
+    def includes_inter_frame_spaces(self) -> bool:
+        """Indicates that Neuphonic TTSTextFrames include necessary inter-frame spaces.
+
+        Returns:
+            True, indicating that Neuphonic's text frames include necessary inter-frame spaces.
+        """
+        return True
+
    def language_to_service_language(self, language: Language) -> Optional[str]:
        """Convert a Language enum to Neuphonic service language format.

@@ -437,6 +445,15 @@ class NeuphonicHttpTTSService(TTSService):
        """
        return True

+    @property
+    def includes_inter_frame_spaces(self) -> bool:
+        """Indicates that Neuphonic TTSTextFrames include necessary inter-frame spaces.
+
+        Returns:
+            True, indicating that Neuphonic's text frames include necessary inter-frame spaces.
+        """
+        return True
+
    def language_to_service_language(self, language: Language) -> Optional[str]:
        """Convert a Language enum to Neuphonic service language format.

--- a/src/pipecat/services/openai/base_llm.py
+++ b/src/pipecat/services/openai/base_llm.py
@@ -390,7 +390,9 @@ class BaseOpenAILLMService(LLMService):
                    # Keep iterating through the response to collect all the argument fragments
                    arguments += tool_call.function.arguments
            elif chunk.choices[0].delta.content:
-                await self.push_frame(LLMTextFrame(chunk.choices[0].delta.content))
+                frame = LLMTextFrame(chunk.choices[0].delta.content)
+                frame.includes_inter_frame_spaces = True
+                await self.push_frame(frame)

            # When gpt-4o-audio / gpt-4o-mini-audio is used for llm or stt+llm
            # we need to get LLMTextFrame for the transcript
--- a/src/pipecat/services/piper/tts.py
+++ b/src/pipecat/services/piper/tts.py
@@ -66,6 +66,15 @@ class PiperTTSService(TTSService):
        """
        return True

+    @property
+    def includes_inter_frame_spaces(self) -> bool:
+        """Indicates that Piper TTSTextFrames include necessary inter-frame spaces.
+
+        Returns:
+            True, indicating that Piper's text frames include necessary inter-frame spaces.
+        """
+        return True
+
    @traced_tts
    async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
        """Generate speech from text using Piper's HTTP API.
--- a/src/pipecat/services/rime/tts.py
+++ b/src/pipecat/services/rime/tts.py
@@ -496,6 +496,15 @@ class RimeHttpTTSService(TTSService):
        """
        return True

+    @property
+    def includes_inter_frame_spaces(self) -> bool:
+        """Indicates that Rime TTSTextFrames include necessary inter-frame spaces.
+
+        Returns:
+            True, indicating that Rime's text frames include necessary inter-frame spaces.
+        """
+        return True
+
    def language_to_service_language(self, language: Language) -> str | None:
        """Convert pipecat language to Rime language code.

--- a/src/pipecat/services/riva/tts.py
+++ b/src/pipecat/services/riva/tts.py
@@ -112,6 +112,15 @@ class RivaTTSService(TTSService):
            riva.client.proto.riva_tts_pb2.RivaSynthesisConfigRequest()
        )

+    @property
+    def includes_inter_frame_spaces(self) -> bool:
+        """Indicates that Riva TTSTextFrames include necessary inter-frame spaces.
+
+        Returns:
+            True, indicating that Riva's text frames include necessary inter-frame spaces.
+        """
+        return True
+
    async def set_model(self, model: str):
        """Attempt to set the TTS model.

--- a/src/pipecat/services/sambanova/llm.py
+++ b/src/pipecat/services/sambanova/llm.py
@@ -176,7 +176,9 @@ class SambaNovaLLMService(OpenAILLMService):  # type: ignore
                    # Keep iterating through the response to collect all the argument fragments
                    arguments += tool_call.function.arguments
            elif chunk.choices[0].delta.content:
-                await self.push_frame(LLMTextFrame(chunk.choices[0].delta.content))
+                frame = LLMTextFrame(chunk.choices[0].delta.content)
+                frame.includes_inter_frame_spaces = True
+                await self.push_frame(frame)

            # When gpt-4o-audio / gpt-4o-mini-audio is used for llm or stt+llm
            # we need to get LLMTextFrame for the transcript
--- a/src/pipecat/services/sarvam/tts.py
+++ b/src/pipecat/services/sarvam/tts.py
@@ -195,6 +195,15 @@ class SarvamHttpTTSService(TTSService):
        """
        return True

+    @property
+    def includes_inter_frame_spaces(self) -> bool:
+        """Indicates that Sarvam TTSTextFrames include necessary inter-frame spaces.
+
+        Returns:
+            True, indicating that Sarvam's text frames include necessary inter-frame spaces.
+        """
+        return True
+
    def language_to_service_language(self, language: Language) -> Optional[str]:
        """Convert a Language enum to Sarvam AI language format.

@@ -458,6 +467,15 @@ class SarvamTTSService(InterruptibleTTSService):
        """
        return True

+    @property
+    def includes_inter_frame_spaces(self) -> bool:
+        """Indicates that Sarvam TTSTextFrames include necessary inter-frame spaces.
+
+        Returns:
+            True, indicating that Sarvam's text frames include necessary inter-frame spaces.
+        """
+        return True
+
    def language_to_service_language(self, language: Language) -> Optional[str]:
        """Convert a Language enum to Sarvam AI language format.

--- a/src/pipecat/services/speechmatics/tts.py
+++ b/src/pipecat/services/speechmatics/tts.py
@@ -105,6 +105,15 @@ class SpeechmaticsTTSService(TTSService):
        """
        return True

+    @property
+    def includes_inter_frame_spaces(self) -> bool:
+        """Indicates that Speechmatics TTSTextFrames include necessary inter-frame spaces.
+
+        Returns:
+            True, indicating that Speechmatics's text frames include necessary inter-frame spaces.
+        """
+        return True
+
    @traced_tts
    async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
        """Generate speech from text using Speechmatics' HTTP API.
--- a/src/pipecat/services/tts_service.py
+++ b/src/pipecat/services/tts_service.py
@@ -192,6 +192,23 @@ class TTSService(AIService):
        CHUNK_SECONDS = 0.5
        return int(self.sample_rate * CHUNK_SECONDS * 2)  # 2 bytes/sample

+    @property
+    def includes_inter_frame_spaces(self) -> bool:
+        """Indicates whether TTSTextFrames include necesary inter-frame spaces.
+
+        When True, the TTSTextFrame objects pushed by this service already
+        include all necessary spaces between subsequent frames. When False,
+        downstream processors (like the assistant context aggregator) may need
+        to add spacing.
+
+        Subclasses should override this property to return True if their text
+        generation process already includes necessary inter-frame spaces.
+
+        Returns:
+            False by default. Subclasses can override to return True.
+        """
+        return False
+
    async def set_model(self, model: str):
        """Set the TTS model to use.

@@ -490,7 +507,9 @@ class TTSService(AIService):
        if self._push_text_frames:
            # We send the original text after the audio. This way, if we are
            # interrupted, the text is not added to the assistant context.
-            await self.push_frame(TTSTextFrame(text))
+            frame = TTSTextFrame(text)
+            frame.includes_inter_frame_spaces = self.includes_inter_frame_spaces
+            await self.push_frame(frame)

    async def _stop_frame_handler(self):
        has_started = False