services(cartesia): schedule TTSStoppedFrame after text

2024-10-15 09:02:51 -07:00
parent 616aa54f75
commit 3a3bf3fe34
2 changed files with 6 additions and 2 deletions
--- a/src/pipecat/services/ai_services.py
+++ b/src/pipecat/services/ai_services.py
@@ -434,6 +434,9 @@ class WordTTSService(TTSService):
                if word == "LLMFullResponseEndFrame" and timestamp == 0:
                    frame = LLMFullResponseEndFrame()
                    frame.pts = last_pts
+                elif word == "TTSStoppedFrame" and timestamp == 0:
+                    frame = TTSStoppedFrame()
+                    frame.pts = last_pts
                else:
                    frame = TextFrame(word)
                    frame.pts = self._initial_word_timestamp + timestamp
--- a/src/pipecat/services/cartesia.py
+++ b/src/pipecat/services/cartesia.py
@@ -227,12 +227,13 @@ class CartesiaTTSService(WordTTSService):
                    continue
                if msg["type"] == "done":
                    await self.stop_ttfb_metrics()
-                    await self.push_frame(TTSStoppedFrame())
                    # Unset _context_id but not the _context_id_start_timestamp
                    # because we are likely still playing out audio and need the
                    # timestamp to set send context frames.
                    self._context_id = None
-                    await self.add_word_timestamps([("LLMFullResponseEndFrame", 0)])
+                    await self.add_word_timestamps(
+                        [("TTSStoppedFrame", 0), ("LLMFullResponseEndFrame", 0)]
+                    )
                elif msg["type"] == "timestamps":
                    await self.add_word_timestamps(
                        list(zip(msg["word_timestamps"]["words"], msg["word_timestamps"]["start"]))