services(cartesia): schedule TTSStoppedFrame after text

This commit is contained in:
Aleix Conchillo Flaqué
2024-10-15 09:02:51 -07:00
parent 616aa54f75
commit 3a3bf3fe34
2 changed files with 6 additions and 2 deletions

View File

@@ -434,6 +434,9 @@ class WordTTSService(TTSService):
if word == "LLMFullResponseEndFrame" and timestamp == 0:
frame = LLMFullResponseEndFrame()
frame.pts = last_pts
elif word == "TTSStoppedFrame" and timestamp == 0:
frame = TTSStoppedFrame()
frame.pts = last_pts
else:
frame = TextFrame(word)
frame.pts = self._initial_word_timestamp + timestamp

View File

@@ -227,12 +227,13 @@ class CartesiaTTSService(WordTTSService):
continue
if msg["type"] == "done":
await self.stop_ttfb_metrics()
await self.push_frame(TTSStoppedFrame())
# Unset _context_id but not the _context_id_start_timestamp
# because we are likely still playing out audio and need the
# timestamp to set send context frames.
self._context_id = None
await self.add_word_timestamps([("LLMFullResponseEndFrame", 0)])
await self.add_word_timestamps(
[("TTSStoppedFrame", 0), ("LLMFullResponseEndFrame", 0)]
)
elif msg["type"] == "timestamps":
await self.add_word_timestamps(
list(zip(msg["word_timestamps"]["words"], msg["word_timestamps"]["start"]))