From 575376235027ac6c1becc2544724bb228a53b4e2 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Thu, 15 Jan 2026 15:16:08 -0500 Subject: [PATCH] Fix "bot-llm-text" not firing when using OpenAI Realtime --- src/pipecat/services/openai/realtime/llm.py | 24 +++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/src/pipecat/services/openai/realtime/llm.py b/src/pipecat/services/openai/realtime/llm.py index 7c48d7e34..11a83741e 100644 --- a/src/pipecat/services/openai/realtime/llm.py +++ b/src/pipecat/services/openai/realtime/llm.py @@ -724,10 +724,26 @@ class OpenAIRealtimeLLMService(LLMService): # We receive audio transcript deltas (as opposed to text deltas) when # the output modality is "audio" (the default) if evt.delta: - frame = TTSTextFrame(evt.delta, aggregated_by=AggregationType.SENTENCE) - # OpenAI Realtime text already includes any necessary inter-chunk spaces - frame.includes_inter_frame_spaces = True - await self.push_frame(frame) + await self._push_output_transcript_text_frames(evt.delta) + + async def _push_output_transcript_text_frames(self, text: str): + # In a typical "cascade" LLM + TTS setup, LLMTextFrames would not + # proceed beyond the TTS service. Therefore, since a speech-to-speech + # service like OpenAI Realtime combines both LLM and TTS functionality, + # you might think we wouldn't need to push LLMTextFrames at all. + # However, RTVI relies on LLMTextFrames being pushed to trigger its + # "bot-llm-text" event. So here we push an LLMTextFrame, too, but avoid + # appending it to context to avoid context message duplication. + + # Push LLMTextFrame + llm_text_frame = LLMTextFrame(text) + llm_text_frame.append_to_context = False + await self.push_frame(llm_text_frame) + + # Push TTSTextFrame + tts_text_frame = TTSTextFrame(text, aggregated_by=AggregationType.SENTENCE) + tts_text_frame.includes_inter_frame_spaces = True + await self.push_frame(tts_text_frame) async def _handle_evt_function_call_arguments_done(self, evt): """Handle completion of function call arguments.