From d146a7f8e0e2bb367c5116c44bd4ed10aed3382f Mon Sep 17 00:00:00 2001
From: filipi87 <filipi87@gmail.com>
Date: Wed, 6 May 2026 14:55:49 -0300
Subject: [PATCH 1/3] Refactoring ElevenLabs to send close_context as soon as
 the turn context is complete.

---
 src/pipecat/services/elevenlabs/tts.py | 40 ++++++++++----------------
 1 file changed, 15 insertions(+), 25 deletions(-)

diff --git a/src/pipecat/services/elevenlabs/tts.py b/src/pipecat/services/elevenlabs/tts.py
index a52582758..8a44efa9d 100644
--- a/src/pipecat/services/elevenlabs/tts.py
+++ b/src/pipecat/services/elevenlabs/tts.py
@@ -558,7 +558,7 @@ class ElevenLabsTTSService(WebsocketTTSService):
             text_aggregation_mode=text_aggregation_mode,
             aggregate_sentences=aggregate_sentences,
             push_text_frames=False,
-            push_stop_frames=True,
+            push_stop_frames=False,
             pause_frame_processing=True,
             sample_rate=sample_rate,
             settings=default_settings,
@@ -825,15 +825,15 @@ class ElevenLabsTTSService(WebsocketTTSService):
         await self._close_context(context_id)
         await super().on_audio_context_interrupted(context_id)
 
-    async def on_audio_context_completed(self, context_id: str):
-        """Close the ElevenLabs context after all audio has been played.
+    async def on_turn_context_completed(self):
+        """Close the server-side context at end of turn.
 
-        ElevenLabs does not send a server-side signal when a context is
-        exhausted, so Pipecat must explicitly close it with
-        ``close_context: True`` to free server-side resources.
+        Sends close_context so isFinal arrives immediately after the last audio byte.
         """
-        await self._close_context(context_id)
-        await super().on_audio_context_completed(context_id)
+        context_id = self._turn_context_id
+        await super().on_turn_context_completed()
+        if context_id:
+            await self._close_context(context_id)
 
     async def _receive_messages(self):
         """Handle incoming WebSocket messages from ElevenLabs."""
@@ -843,25 +843,15 @@ class ElevenLabsTTSService(WebsocketTTSService):
             received_ctx_id = msg.get("contextId")
 
             # Handle final messages first, regardless of context availability
-            # At the moment, this message is received AFTER the close_context message is
-            # sent, so it doesn't serve any functional purpose. For now, we'll just log it.
             if msg.get("isFinal") is True:
-                logger.trace(f"Received final message for context {received_ctx_id}")
-                continue
-
-            # Check if this message belongs to the current context.
-            if not self.audio_context_available(received_ctx_id):
-                if self.get_active_audio_context_id() == received_ctx_id:
-                    logger.debug(
-                        f"Received a delayed message, recreating the context: {received_ctx_id}"
+                logger.debug(f"Received final message for context {received_ctx_id}")
+                # In case of interruption, there is no audio context available, so we don’t need to do anything.
+                if self.audio_context_available(received_ctx_id):
+                    await self.append_to_audio_context(
+                        received_ctx_id, TTSStoppedFrame(context_id=received_ctx_id)
                     )
-                    await self.create_audio_context(received_ctx_id)
-                else:
-                    # This can happen if a message is received _after_ we have closed a context
-                    # due to user interruption but _before_ the `isFinal` message for the context
-                    # is received.
-                    logger.debug(f"Ignoring message from unavailable context: {received_ctx_id}")
-                    continue
+                    await self.remove_audio_context(received_ctx_id)
+                continue
 
             if msg.get("audio"):
                 audio = base64.b64decode(msg["audio"])

From fda18a9afa9bb305270b927c4cc495af25a94da2 Mon Sep 17 00:00:00 2001
From: filipi87 <filipi87@gmail.com>
Date: Wed, 6 May 2026 14:58:18 -0300
Subject: [PATCH 2/3] Adding changelog for the elevenlabs improvement.

---
 changelog/4433.changed.md | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog/4433.changed.md

diff --git a/changelog/4433.changed.md b/changelog/4433.changed.md
new file mode 100644
index 000000000..fc2931638
--- /dev/null
+++ b/changelog/4433.changed.md
@@ -0,0 +1 @@
+- `ElevenLabsTTSService` now sends `close_context` to the server as soon as the turn is complete (on `on_turn_context_completed`) rather than waiting until all audio has finished playing back. The `isFinal` message from ElevenLabs is now used to signal `TTSStoppedFrame` and clean up the audio context, improving turn transition timing.

From a445399337ed963b96a2485db182e7c2249752d7 Mon Sep 17 00:00:00 2001
From: filipi87 <filipi87@gmail.com>
Date: Thu, 7 May 2026 10:10:54 -0300
Subject: [PATCH 3/3] Fixing a bug in the ElevenLabs TTS refactor where
 alignment state was reset too early mid-turn.

---
 src/pipecat/services/elevenlabs/tts.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/pipecat/services/elevenlabs/tts.py b/src/pipecat/services/elevenlabs/tts.py
index 8a44efa9d..b8f6a9abf 100644
--- a/src/pipecat/services/elevenlabs/tts.py
+++ b/src/pipecat/services/elevenlabs/tts.py
@@ -660,6 +660,7 @@ class ElevenLabsTTSService(WebsocketTTSService):
             if audio_contexts:
                 for ctx_id in audio_contexts:
                     await self._close_context(ctx_id)
+                    self._reset_alignment_state(ctx_id)
 
         if not url_changed:
             # Reconnect applies all settings; only warn about fields not handled
@@ -815,6 +816,8 @@ class ElevenLabsTTSService(WebsocketTTSService):
                 )
             except Exception as e:
                 await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)
+
+    def _reset_alignment_state(self, context_id: str):
         self._cumulative_time = 0.0
         self._partial_word = ""
         self._partial_word_start_time = 0.0
@@ -823,8 +826,14 @@ class ElevenLabsTTSService(WebsocketTTSService):
     async def on_audio_context_interrupted(self, context_id: str):
         """Close the ElevenLabs context when the bot is interrupted."""
         await self._close_context(context_id)
+        self._reset_alignment_state(context_id)
         await super().on_audio_context_interrupted(context_id)
 
+    async def on_audio_context_completed(self, context_id: str):
+        """Reset alignment state after all audio for the context has played."""
+        self._reset_alignment_state(context_id)
+        await super().on_audio_context_completed(context_id)
+
     async def on_turn_context_completed(self):
         """Close the server-side context at end of turn.