From b76831e67797672f30abe84dd88e2e01cdcced61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Thu, 14 May 2026 13:22:00 -0700 Subject: [PATCH] Fall back to _turn_context_id in get_active_audio_context_id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TTS services whose wire protocol does not echo the context_id back on incoming audio (Sarvam, Smallest, Soniox, Inworld, ...) call ``get_active_audio_context_id()`` to tag each chunk. That accessor returned only ``_playing_context_id`` — the playback-side cursor set asynchronously by ``_audio_context_task_handler`` when it pops a context off the serialization queue. Result: incoming audio that arrived in the gap between contexts or at the very start of a turn (before the playback loop popped) had ``context_id=None`` and was dropped with ``unable to append audio to context: no context ID provided``. Fall back to ``_turn_context_id`` (the synthesis-side cursor, set as soon as the turn's context is created) so the gap is covered without prematurely nulling the playback cursor. --- src/pipecat/services/tts_service.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py index 65718fee6..61eddb20c 100644 --- a/src/pipecat/services/tts_service.py +++ b/src/pipecat/services/tts_service.py @@ -1283,10 +1283,17 @@ class TTSService(AIService): def get_active_audio_context_id(self) -> str | None: """Get the active audio context ID. + Returns the playback cursor when set (during active playback), falling + back to the current turn's synthesis context_id. The fallback covers + the gap between contexts and the start of a turn before the playback + task has popped the just-created context off the serialization queue — + important for services whose wire protocol does not echo context_id + back on incoming audio. + Returns: - The active context ID, or None if no context is active. + The active context ID, or None if neither cursor is set. """ - return self._playing_context_id + return self._playing_context_id or self._turn_context_id async def remove_active_audio_context(self): """Remove the active audio context."""