Compare commits

...

2 Commits

Author SHA1 Message Date
Mark Backman
5bb2201fa1 Add changelog for #3727 2026-02-12 08:58:03 -05:00
Mark Backman
7850517ba4 Fix ElevenLabs TTS word timestamp interleaving across sentences
Move cumulative_time and partial word state resets from every run_tts
call into the new-context creation block. With the multi-stream WebSocket
API, multiple sentences share the same context, so resetting timing state
on each run_tts call caused sentences to get overlapping timestamps,
resulting in interleaved word ordering.
2026-02-12 08:57:02 -05:00
2 changed files with 7 additions and 3 deletions

1
changelog/3727.fixed.md Normal file
View File

@@ -0,0 +1 @@
- Fixed `ElevenLabsTTSService` word timestamps being interleaved across sentences within the same context, causing incorrect word ordering.

View File

@@ -700,9 +700,6 @@ class ElevenLabsTTSService(AudioContextWordTTSService):
try:
await self.start_ttfb_metrics()
yield TTSStartedFrame(context_id=context_id)
self._cumulative_time = 0
self._partial_word = ""
self._partial_word_start_time = 0.0
# If a context ID does not exist, use the provided one.
# If an ID exists, that means the Pipeline doesn't allow
# user interruptions, so continue using the current ID.
@@ -710,6 +707,12 @@ class ElevenLabsTTSService(AudioContextWordTTSService):
# an interruption, which resets the context ID.
if not self._context_id:
self._context_id = context_id
# Reset timing state only when starting a new context.
# Within a context, cumulative_time must accumulate across
# sentences so word timestamps are properly sequenced.
self._cumulative_time = 0
self._partial_word = ""
self._partial_word_start_time = 0.0
if not self.audio_context_available(self._context_id):
await self.create_audio_context(self._context_id)