Merge pull request #3156 from pipecat-ai/mb/deepgram-stt-stopped-frame

fix: DeepgramTTSService, let the base class push TTSStoppedFrame
2025-12-01 17:18:19 -05:00
parent 2235d8f5a2 e72b135a4c
commit b84a40666c
2 changed files with 12 additions and 3 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Fixed an issue in `AWSTranscribeSTTService` where the `region` arg was
  always set to `us-east-1` when providing an AWS_REGION env var.

+- Fixed an issue in `DeepgramTTSService` where a `TTSStoppedFrame` was
+  incorrectly pushed after a functional call. This caused an issue with the
+  voice-ui-kit's conversational panel rending of the LLM output after a
+  function call.
+
 ## [0.0.96] - 2025-11-26 🦃 "Happy Thanksgiving!" 🦃

 ### Added
--- a/src/pipecat/services/deepgram/tts.py
+++ b/src/pipecat/services/deepgram/tts.py
@@ -71,7 +71,12 @@ class DeepgramTTSService(WebsocketTTSService):
            encoding: Audio encoding format. Defaults to "linear16".
            **kwargs: Additional arguments passed to parent InterruptibleTTSService class.
        """
-        super().__init__(sample_rate=sample_rate, **kwargs)
+        super().__init__(
+            sample_rate=sample_rate,
+            pause_frame_processing=True,
+            push_stop_frames=True,
+            **kwargs,
+        )

        self._api_key = api_key
        self._base_url = base_url
@@ -231,7 +236,6 @@ class DeepgramTTSService(WebsocketTTSService):
                        logger.trace(f"Received Flushed: {msg}")
                        # Flushed indicates the end of audio generation for the current buffer
                        # This happens after flush_audio() is called
-                        await self.push_frame(TTSStoppedFrame())
                    elif msg_type == "Cleared":
                        logger.trace(f"Received Cleared: {msg}")
                        # Buffer has been cleared after interruption
@@ -286,7 +290,7 @@ class DeepgramTTSService(WebsocketTTSService):
            speak_msg = {"type": "Speak", "text": text}
            await self._get_websocket().send(json.dumps(speak_msg))

-            # The actual audio frames will be handled in _receive_messages
+            # The audio frames will be handled in _receive_messages
            yield None

        except Exception as e: