From 925e80bb20aeaeb9d6ee0a2ff91c291b1b565cad Mon Sep 17 00:00:00 2001 From: Kwindla Hultman Kramer Date: Sun, 18 Aug 2024 13:34:46 -0700 Subject: [PATCH 1/2] VADParamsUpdateFrame and handling thereof --- src/pipecat/frames/frames.py | 8 +++++++ src/pipecat/transports/base_input.py | 10 +++++++-- src/pipecat/vad/vad_analyzer.py | 31 +++++++++++++++++++--------- 3 files changed, 37 insertions(+), 12 deletions(-) diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py index 9cd4288fe..5444e501a 100644 --- a/src/pipecat/frames/frames.py +++ b/src/pipecat/frames/frames.py @@ -456,3 +456,11 @@ class FunctionCallResultFrame(DataFrame): tool_call_id: str arguments: str result: any + + +@dataclass +class VADParamsUpdateFrame(ControlFrame): + """A control frame containing a request to update VAD params. Intended + to be pushed upstream from RTVI processor. + """ + params: dict diff --git a/src/pipecat/transports/base_input.py b/src/pipecat/transports/base_input.py index 5eb5da16c..2e6ed75fb 100644 --- a/src/pipecat/transports/base_input.py +++ b/src/pipecat/transports/base_input.py @@ -20,9 +20,10 @@ from pipecat.frames.frames import ( StopInterruptionFrame, SystemFrame, UserStartedSpeakingFrame, - UserStoppedSpeakingFrame) + UserStoppedSpeakingFrame, + VADParamsUpdateFrame) from pipecat.transports.base_transport import TransportParams -from pipecat.vad.vad_analyzer import VADAnalyzer, VADState +from pipecat.vad.vad_analyzer import VADAnalyzer, VADParams, VADState from loguru import logger @@ -102,6 +103,11 @@ class BaseInputTransport(FrameProcessor): # finish and the task finishes when EndFrame is processed. await self._internal_push_frame(frame, direction) await self.stop(frame) + elif isinstance(frame, VADParamsUpdateFrame): + vad_analyzer = self.vad_analyzer() + if not vad_analyzer: + pass + vad_analyzer._set_params(frame.params) # Other frames else: await self._internal_push_frame(frame, direction) diff --git a/src/pipecat/vad/vad_analyzer.py b/src/pipecat/vad/vad_analyzer.py index f6b9b61a7..021b1b802 100644 --- a/src/pipecat/vad/vad_analyzer.py +++ b/src/pipecat/vad/vad_analyzer.py @@ -11,6 +11,8 @@ from pydantic.main import BaseModel from pipecat.utils.audio import calculate_audio_volume, exp_smoothing +from loguru import logger + class VADState(Enum): QUIET = 1 @@ -31,17 +33,8 @@ class VADAnalyzer: def __init__(self, *, sample_rate: int, num_channels: int, params: VADParams): self._sample_rate = sample_rate self._num_channels = num_channels - self._params = params - self._vad_frames = self.num_frames_required() - self._vad_frames_num_bytes = self._vad_frames * num_channels * 2 - vad_frames_per_sec = self._vad_frames / self._sample_rate - - self._vad_start_frames = round(self._params.start_secs / vad_frames_per_sec) - self._vad_stop_frames = round(self._params.stop_secs / vad_frames_per_sec) - self._vad_starting_count = 0 - self._vad_stopping_count = 0 - self._vad_state: VADState = VADState.QUIET + self._set_params(params) self._vad_buffer = b"" @@ -53,6 +46,10 @@ class VADAnalyzer: def sample_rate(self): return self._sample_rate + @property + def num_channels(self): + return self._num_channels + @abstractmethod def num_frames_required(self) -> int: pass @@ -61,6 +58,20 @@ class VADAnalyzer: def voice_confidence(self, buffer) -> float: pass + def _set_params(self, params: VADParams): + logger.debug(f"Setting VAD params to: {params}") + self._params = params + self._vad_frames = self.num_frames_required() + self._vad_frames_num_bytes = self._vad_frames * self._num_channels * 2 + + vad_frames_per_sec = self._vad_frames / self._sample_rate + + self._vad_start_frames = round(self._params.start_secs / vad_frames_per_sec) + self._vad_stop_frames = round(self._params.stop_secs / vad_frames_per_sec) + self._vad_starting_count = 0 + self._vad_stopping_count = 0 + self._vad_state: VADState = VADState.QUIET + def _get_smoothed_volume(self, audio: bytes) -> float: volume = calculate_audio_volume(audio, self._sample_rate) return exp_smoothing(volume, self._prev_volume, self._smoothing_factor) From e6b05005682cb5461bcc9a2f99a96c4f392429f5 Mon Sep 17 00:00:00 2001 From: Kwindla Hultman Kramer Date: Sun, 18 Aug 2024 21:11:18 -0700 Subject: [PATCH 2/2] make VADAnalyzer:set_params() public --- src/pipecat/transports/base_input.py | 2 +- src/pipecat/vad/vad_analyzer.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pipecat/transports/base_input.py b/src/pipecat/transports/base_input.py index 2e6ed75fb..91052788c 100644 --- a/src/pipecat/transports/base_input.py +++ b/src/pipecat/transports/base_input.py @@ -107,7 +107,7 @@ class BaseInputTransport(FrameProcessor): vad_analyzer = self.vad_analyzer() if not vad_analyzer: pass - vad_analyzer._set_params(frame.params) + vad_analyzer.set_params(frame.params) # Other frames else: await self._internal_push_frame(frame, direction) diff --git a/src/pipecat/vad/vad_analyzer.py b/src/pipecat/vad/vad_analyzer.py index 021b1b802..3b7f9931d 100644 --- a/src/pipecat/vad/vad_analyzer.py +++ b/src/pipecat/vad/vad_analyzer.py @@ -34,7 +34,7 @@ class VADAnalyzer: self._sample_rate = sample_rate self._num_channels = num_channels - self._set_params(params) + self.set_params(params) self._vad_buffer = b"" @@ -58,7 +58,7 @@ class VADAnalyzer: def voice_confidence(self, buffer) -> float: pass - def _set_params(self, params: VADParams): + def set_params(self, params: VADParams): logger.debug(f"Setting VAD params to: {params}") self._params = params self._vad_frames = self.num_frames_required()