From b640b2d02417eef44cb641d9644caf356f068ea8 Mon Sep 17 00:00:00 2001
From: Kwindla Hultman Kramer <kwindla@gmail.com>
Date: Mon, 7 Oct 2024 20:58:17 -0700
Subject: [PATCH] send user started/stopped speaking event from openai realtime
 events

send user started/stopped speaking event from openai realtime events
---
 examples/foundational/19-openai-realtime-beta.py   |  2 +-
 .../openai_realtime_beta/llm_and_context.py        | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/examples/foundational/19-openai-realtime-beta.py b/examples/foundational/19-openai-realtime-beta.py
index feab9c10f..16b291ef2 100644
--- a/examples/foundational/19-openai-realtime-beta.py
+++ b/examples/foundational/19-openai-realtime-beta.py
@@ -76,7 +76,7 @@ async def main():
                 audio_out_enabled=True,
                 audio_out_sample_rate=24000,
                 transcription_enabled=False,
-                vad_enabled=True,
+                vad_enabled=False,
                 vad_analyzer=SileroVADAnalyzer(),
                 vad_audio_passthrough=True,
             ),
diff --git a/src/pipecat/services/openai_realtime_beta/llm_and_context.py b/src/pipecat/services/openai_realtime_beta/llm_and_context.py
index 5cf4f6985..35397d95d 100644
--- a/src/pipecat/services/openai_realtime_beta/llm_and_context.py
+++ b/src/pipecat/services/openai_realtime_beta/llm_and_context.py
@@ -22,11 +22,14 @@ from pipecat.frames.frames import (
     LLMUpdateSettingsFrame,
     StartFrame,
     StartInterruptionFrame,
+    StopInterruptionFrame,
     TextFrame,
     TranscriptionFrame,
     TTSAudioRawFrame,
     TTSStartedFrame,
     TTSStoppedFrame,
+    UserStartedSpeakingFrame,
+    UserStoppedSpeakingFrame,
 )
 from pipecat.metrics.metrics import LLMTokenUsage
 from pipecat.processors.aggregators.openai_llm_context import (
@@ -120,6 +123,7 @@ class OpenAILLMServiceRealtimeBeta(LLMService):
         session_properties: events.SessionProperties = events.SessionProperties(),
         start_audio_paused: bool = False,
         send_transcription_frames: bool = True,
+        send_user_started_speaking_frames: bool = True,
         **kwargs,
     ):
         super().__init__(base_url=base_url, **kwargs)
@@ -129,6 +133,7 @@ class OpenAILLMServiceRealtimeBeta(LLMService):
         self._session_properties = session_properties
         self._audio_input_paused = start_audio_paused
         self._send_transcription_frames = send_transcription_frames
+        self._send_user_started_speaking_frames = send_user_started_speaking_frames
         self._websocket = None
         self._receive_task = None
         self._context = None
@@ -213,10 +218,19 @@ class OpenAILLMServiceRealtimeBeta(LLMService):
                 elif evt.type == "input_audio_buffer.speech_started":
                     # user started speaking
                     # todo: send user started speaking if configured
+                    if self._send_user_started_speaking_frames:
+                        await self.push_frame(UserStartedSpeakingFrame())
+                        await self.push_frame(StartInterruptionFrame())
+                        logger.debug("User started speaking")
                     pass
                 elif evt.type == "input_audio_buffer.speech_stopped":
                     # user stopped speaking
                     # todo: send user stopped speaking if configured
+                    if self._send_user_started_speaking_frames:
+                        await self.push_frame(UserStoppedSpeakingFrame())
+                        await self.push_frame(StopInterruptionFrame())
+
+                        logger.debug("User stopped speaking")
                     await self.start_processing_metrics()
                     await self.start_ttfb_metrics()
                 elif evt.type == "conversation.item.created":