From 7ee6e7193d119bb89e9c73f45502af03b88029fb Mon Sep 17 00:00:00 2001 From: Vaibhav159 Date: Thu, 16 Jan 2025 21:23:56 +0530 Subject: [PATCH 1/3] adding metric generation without deepgram VAD --- CHANGELOG.md | 2 ++ src/pipecat/services/deepgram.py | 15 +++++++++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 232bf0f4f..127695c36 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -74,6 +74,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 `UserStoppedSpeakingFrame`. This helps in faster transcriptions and clearing the `Deepgram` audio buffer. +- Changed `DeepgramSTTService` to generate metrics using pipeline VAD. + ### Fixed - Fixed an issue where websocket based TTS services could incorrectly terminate diff --git a/src/pipecat/services/deepgram.py b/src/pipecat/services/deepgram.py index bba2e72ae..f13928af1 100644 --- a/src/pipecat/services/deepgram.py +++ b/src/pipecat/services/deepgram.py @@ -21,6 +21,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, UserStoppedSpeakingFrame, + UserStartedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.ai_services import STTService, TTSService @@ -169,7 +170,7 @@ class DeepgramSTTService(STTService): return self._settings["vad_events"] def can_generate_metrics(self) -> bool: - return self.vad_enabled + return True async def set_model(self, model: str): await super().set_model(model) @@ -210,9 +211,12 @@ class DeepgramSTTService(STTService): logger.debug("Disconnecting from Deepgram") await self._connection.finish() - async def _on_speech_started(self, *args, **kwargs): + async def start_metrics(self): await self.start_ttfb_metrics() await self.start_processing_metrics() + + async def _on_speech_started(self, *args, **kwargs): + await self.start_metrics() await self._call_event_handler("on_speech_started", *args, **kwargs) async def _on_utterance_end(self, *args, **kwargs): @@ -243,7 +247,10 @@ class DeepgramSTTService(STTService): async def process_frame(self, frame: Frame, direction: FrameDirection): await super().process_frame(frame, direction) - if isinstance(frame, UserStoppedSpeakingFrame): + if isinstance(frame, UserStartedSpeakingFrame) and not self.vad_enabled: + # Start metrics if Deepgram VAD is disabled & pipeline VAD has detected speech + await self.start_metrics() + elif isinstance(frame, UserStoppedSpeakingFrame): # https://developers.deepgram.com/docs/finalize await self._connection.finalize() - logger.debug(f"Triggering finalize event on: {frame.name=}, {direction=}") + logger.trace(f"Triggering finalize event on: {frame.name=}, {direction=}") From 923d33eeffa28f64035f3e45707a2db819251af0 Mon Sep 17 00:00:00 2001 From: Vaibhav159 Date: Thu, 16 Jan 2025 21:32:48 +0530 Subject: [PATCH 2/3] fixing ruff --- src/pipecat/services/deepgram.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pipecat/services/deepgram.py b/src/pipecat/services/deepgram.py index f13928af1..bd54c4245 100644 --- a/src/pipecat/services/deepgram.py +++ b/src/pipecat/services/deepgram.py @@ -20,8 +20,8 @@ from pipecat.frames.frames import ( TTSAudioRawFrame, TTSStartedFrame, TTSStoppedFrame, - UserStoppedSpeakingFrame, UserStartedSpeakingFrame, + UserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.ai_services import STTService, TTSService From 85e7d62f94e9ed4af9536b09ece92cc64ae06ea2 Mon Sep 17 00:00:00 2001 From: Vaibhav159 Date: Thu, 16 Jan 2025 21:36:51 +0530 Subject: [PATCH 3/3] fixing log text --- src/pipecat/services/deepgram.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pipecat/services/deepgram.py b/src/pipecat/services/deepgram.py index bd54c4245..a5d36370f 100644 --- a/src/pipecat/services/deepgram.py +++ b/src/pipecat/services/deepgram.py @@ -253,4 +253,4 @@ class DeepgramSTTService(STTService): elif isinstance(frame, UserStoppedSpeakingFrame): # https://developers.deepgram.com/docs/finalize await self._connection.finalize() - logger.trace(f"Triggering finalize event on: {frame.name=}, {direction=}") + logger.trace(f"Triggered finalize event on: {frame.name=}, {direction=}")