Merge pull request #1011 from Vaibhav159/vl_deepgram_metrics_without_vad
adding metric generation without deepgram VAD
This commit is contained in:
@@ -89,6 +89,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
`UserStoppedSpeakingFrame`. This helps in faster transcriptions and clearing
|
||||
the `Deepgram` audio buffer.
|
||||
|
||||
- Changed `DeepgramSTTService` to generate metrics using pipeline VAD.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed `UserIdleProcessor` not properly propagating `EndFrame`s through the
|
||||
|
||||
@@ -20,6 +20,7 @@ from pipecat.frames.frames import (
|
||||
TTSAudioRawFrame,
|
||||
TTSStartedFrame,
|
||||
TTSStoppedFrame,
|
||||
UserStartedSpeakingFrame,
|
||||
UserStoppedSpeakingFrame,
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameDirection
|
||||
@@ -169,7 +170,7 @@ class DeepgramSTTService(STTService):
|
||||
return self._settings["vad_events"]
|
||||
|
||||
def can_generate_metrics(self) -> bool:
|
||||
return self.vad_enabled
|
||||
return True
|
||||
|
||||
async def set_model(self, model: str):
|
||||
await super().set_model(model)
|
||||
@@ -210,9 +211,12 @@ class DeepgramSTTService(STTService):
|
||||
logger.debug("Disconnecting from Deepgram")
|
||||
await self._connection.finish()
|
||||
|
||||
async def _on_speech_started(self, *args, **kwargs):
|
||||
async def start_metrics(self):
|
||||
await self.start_ttfb_metrics()
|
||||
await self.start_processing_metrics()
|
||||
|
||||
async def _on_speech_started(self, *args, **kwargs):
|
||||
await self.start_metrics()
|
||||
await self._call_event_handler("on_speech_started", *args, **kwargs)
|
||||
|
||||
async def _on_utterance_end(self, *args, **kwargs):
|
||||
@@ -243,7 +247,10 @@ class DeepgramSTTService(STTService):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, UserStoppedSpeakingFrame):
|
||||
if isinstance(frame, UserStartedSpeakingFrame) and not self.vad_enabled:
|
||||
# Start metrics if Deepgram VAD is disabled & pipeline VAD has detected speech
|
||||
await self.start_metrics()
|
||||
elif isinstance(frame, UserStoppedSpeakingFrame):
|
||||
# https://developers.deepgram.com/docs/finalize
|
||||
await self._connection.finalize()
|
||||
logger.debug(f"Triggering finalize event on: {frame.name=}, {direction=}")
|
||||
logger.trace(f"Triggered finalize event on: {frame.name=}, {direction=}")
|
||||
|
||||
Reference in New Issue
Block a user