From 3b668dc93761fb84161a6f9fcab0d96f471bfb18 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Thu, 21 May 2026 12:37:04 -0400 Subject: [PATCH] Broadcast Nova Sonic interruption on FINAL TEXT contentEnd unconditionally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The TEXT INTERRUPTED branch gated broadcast_interruption() on _assistant_is_responding, but Nova Sonic's mid-audio barge-in sequence fires AUDIO contentEnd with stopReason=END_TURN first (per AWS docs), which already flips _assistant_is_responding=False. By the time FINAL TEXT contentEnd with stopReason=INTERRUPTED arrives — the actual interruption notification — the guard skipped the broadcast and the output transport's buffered audio kept playing. Always broadcast on TEXT INTERRUPTED; keep the guard around _report_assistant_response_ended() so we don't double-close the response when AUDIO contentEnd already did it. --- src/pipecat/services/aws/nova_sonic/llm.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/pipecat/services/aws/nova_sonic/llm.py b/src/pipecat/services/aws/nova_sonic/llm.py index afd47a3fa..fb56c7d21 100644 --- a/src/pipecat/services/aws/nova_sonic/llm.py +++ b/src/pipecat/services/aws/nova_sonic/llm.py @@ -1443,16 +1443,15 @@ class AWSNovaSonicLLMService(LLMService[AWSNovaSonicLLMAdapter]): if self._sc.on_content_end_assistant_final_text(content.text_content): self.create_task(self._run_sc_handoff(), name="sc_handoff") else: + # FINAL TEXT INTERRUPTED is the canonical barge-in + # signal. The AUDIO branch usually closed the + # response already (AUDIO contentEnd arrives with + # END_TURN on barge-in, before this), but the + # output transport's audio buffer is still draining + # — broadcast unconditionally to clear it. + await self.broadcast_interruption() if self._assistant_is_responding: - # TEXT INTERRUPTED before audio started means no AUDIO - # contentEnd will arrive — end the response here. Emit - # InterruptionFrame upstream so the assistant aggregator - # marks the message interrupted=True, and downstream so - # BaseOutputTransport can clear any audio it had already - # buffered. Must fire before _report_assistant_response_ended - # so the aggregator handles InterruptionFrame before - # LLMFullResponseEndFrame closes the turn. - await self.broadcast_interruption() + # No AUDIO contentEnd will arrive — close here. self._assistant_is_responding = False await self._report_assistant_response_ended() # Session continuation: TEXT INTERRUPTED is a completion