From 3a3bf3fe34e5684b77e4e5eee441aa4294d518d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Tue, 15 Oct 2024 09:02:51 -0700 Subject: [PATCH] services(cartesia): schedule TTSStoppedFrame after text --- src/pipecat/services/ai_services.py | 3 +++ src/pipecat/services/cartesia.py | 5 +++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/pipecat/services/ai_services.py b/src/pipecat/services/ai_services.py index f1e01a18e..15b9cb9b7 100644 --- a/src/pipecat/services/ai_services.py +++ b/src/pipecat/services/ai_services.py @@ -434,6 +434,9 @@ class WordTTSService(TTSService): if word == "LLMFullResponseEndFrame" and timestamp == 0: frame = LLMFullResponseEndFrame() frame.pts = last_pts + elif word == "TTSStoppedFrame" and timestamp == 0: + frame = TTSStoppedFrame() + frame.pts = last_pts else: frame = TextFrame(word) frame.pts = self._initial_word_timestamp + timestamp diff --git a/src/pipecat/services/cartesia.py b/src/pipecat/services/cartesia.py index 21074bbcc..f5657126c 100644 --- a/src/pipecat/services/cartesia.py +++ b/src/pipecat/services/cartesia.py @@ -227,12 +227,13 @@ class CartesiaTTSService(WordTTSService): continue if msg["type"] == "done": await self.stop_ttfb_metrics() - await self.push_frame(TTSStoppedFrame()) # Unset _context_id but not the _context_id_start_timestamp # because we are likely still playing out audio and need the # timestamp to set send context frames. self._context_id = None - await self.add_word_timestamps([("LLMFullResponseEndFrame", 0)]) + await self.add_word_timestamps( + [("TTSStoppedFrame", 0), ("LLMFullResponseEndFrame", 0)] + ) elif msg["type"] == "timestamps": await self.add_word_timestamps( list(zip(msg["word_timestamps"]["words"], msg["word_timestamps"]["start"]))