send user started/stopped speaking event from openai realtime events

send user started/stopped speaking event from openai realtime events
This commit is contained in:
Kwindla Hultman Kramer
2024-10-07 20:58:17 -07:00
parent bd0649e3ed
commit b640b2d024
2 changed files with 15 additions and 1 deletions

View File

@@ -76,7 +76,7 @@ async def main():
audio_out_enabled=True,
audio_out_sample_rate=24000,
transcription_enabled=False,
vad_enabled=True,
vad_enabled=False,
vad_analyzer=SileroVADAnalyzer(),
vad_audio_passthrough=True,
),

View File

@@ -22,11 +22,14 @@ from pipecat.frames.frames import (
LLMUpdateSettingsFrame,
StartFrame,
StartInterruptionFrame,
StopInterruptionFrame,
TextFrame,
TranscriptionFrame,
TTSAudioRawFrame,
TTSStartedFrame,
TTSStoppedFrame,
UserStartedSpeakingFrame,
UserStoppedSpeakingFrame,
)
from pipecat.metrics.metrics import LLMTokenUsage
from pipecat.processors.aggregators.openai_llm_context import (
@@ -120,6 +123,7 @@ class OpenAILLMServiceRealtimeBeta(LLMService):
session_properties: events.SessionProperties = events.SessionProperties(),
start_audio_paused: bool = False,
send_transcription_frames: bool = True,
send_user_started_speaking_frames: bool = True,
**kwargs,
):
super().__init__(base_url=base_url, **kwargs)
@@ -129,6 +133,7 @@ class OpenAILLMServiceRealtimeBeta(LLMService):
self._session_properties = session_properties
self._audio_input_paused = start_audio_paused
self._send_transcription_frames = send_transcription_frames
self._send_user_started_speaking_frames = send_user_started_speaking_frames
self._websocket = None
self._receive_task = None
self._context = None
@@ -213,10 +218,19 @@ class OpenAILLMServiceRealtimeBeta(LLMService):
elif evt.type == "input_audio_buffer.speech_started":
# user started speaking
# todo: send user started speaking if configured
if self._send_user_started_speaking_frames:
await self.push_frame(UserStartedSpeakingFrame())
await self.push_frame(StartInterruptionFrame())
logger.debug("User started speaking")
pass
elif evt.type == "input_audio_buffer.speech_stopped":
# user stopped speaking
# todo: send user stopped speaking if configured
if self._send_user_started_speaking_frames:
await self.push_frame(UserStoppedSpeakingFrame())
await self.push_frame(StopInterruptionFrame())
logger.debug("User stopped speaking")
await self.start_processing_metrics()
await self.start_ttfb_metrics()
elif evt.type == "conversation.item.created":