Merge pull request #397 from pipecat-ai/khk/rtvi-vad-params
VADParamsUpdateFrame and handling thereof
This commit is contained in:
@@ -456,3 +456,11 @@ class FunctionCallResultFrame(DataFrame):
|
||||
tool_call_id: str
|
||||
arguments: str
|
||||
result: any
|
||||
|
||||
|
||||
@dataclass
|
||||
class VADParamsUpdateFrame(ControlFrame):
|
||||
"""A control frame containing a request to update VAD params. Intended
|
||||
to be pushed upstream from RTVI processor.
|
||||
"""
|
||||
params: dict
|
||||
|
||||
@@ -20,9 +20,10 @@ from pipecat.frames.frames import (
|
||||
StopInterruptionFrame,
|
||||
SystemFrame,
|
||||
UserStartedSpeakingFrame,
|
||||
UserStoppedSpeakingFrame)
|
||||
UserStoppedSpeakingFrame,
|
||||
VADParamsUpdateFrame)
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.vad.vad_analyzer import VADAnalyzer, VADState
|
||||
from pipecat.vad.vad_analyzer import VADAnalyzer, VADParams, VADState
|
||||
|
||||
from loguru import logger
|
||||
|
||||
@@ -102,6 +103,11 @@ class BaseInputTransport(FrameProcessor):
|
||||
# finish and the task finishes when EndFrame is processed.
|
||||
await self._internal_push_frame(frame, direction)
|
||||
await self.stop(frame)
|
||||
elif isinstance(frame, VADParamsUpdateFrame):
|
||||
vad_analyzer = self.vad_analyzer()
|
||||
if not vad_analyzer:
|
||||
pass
|
||||
vad_analyzer.set_params(frame.params)
|
||||
# Other frames
|
||||
else:
|
||||
await self._internal_push_frame(frame, direction)
|
||||
|
||||
@@ -11,6 +11,8 @@ from pydantic.main import BaseModel
|
||||
|
||||
from pipecat.utils.audio import calculate_audio_volume, exp_smoothing
|
||||
|
||||
from loguru import logger
|
||||
|
||||
|
||||
class VADState(Enum):
|
||||
QUIET = 1
|
||||
@@ -31,17 +33,8 @@ class VADAnalyzer:
|
||||
def __init__(self, *, sample_rate: int, num_channels: int, params: VADParams):
|
||||
self._sample_rate = sample_rate
|
||||
self._num_channels = num_channels
|
||||
self._params = params
|
||||
self._vad_frames = self.num_frames_required()
|
||||
self._vad_frames_num_bytes = self._vad_frames * num_channels * 2
|
||||
|
||||
vad_frames_per_sec = self._vad_frames / self._sample_rate
|
||||
|
||||
self._vad_start_frames = round(self._params.start_secs / vad_frames_per_sec)
|
||||
self._vad_stop_frames = round(self._params.stop_secs / vad_frames_per_sec)
|
||||
self._vad_starting_count = 0
|
||||
self._vad_stopping_count = 0
|
||||
self._vad_state: VADState = VADState.QUIET
|
||||
self.set_params(params)
|
||||
|
||||
self._vad_buffer = b""
|
||||
|
||||
@@ -53,6 +46,10 @@ class VADAnalyzer:
|
||||
def sample_rate(self):
|
||||
return self._sample_rate
|
||||
|
||||
@property
|
||||
def num_channels(self):
|
||||
return self._num_channels
|
||||
|
||||
@abstractmethod
|
||||
def num_frames_required(self) -> int:
|
||||
pass
|
||||
@@ -61,6 +58,20 @@ class VADAnalyzer:
|
||||
def voice_confidence(self, buffer) -> float:
|
||||
pass
|
||||
|
||||
def set_params(self, params: VADParams):
|
||||
logger.debug(f"Setting VAD params to: {params}")
|
||||
self._params = params
|
||||
self._vad_frames = self.num_frames_required()
|
||||
self._vad_frames_num_bytes = self._vad_frames * self._num_channels * 2
|
||||
|
||||
vad_frames_per_sec = self._vad_frames / self._sample_rate
|
||||
|
||||
self._vad_start_frames = round(self._params.start_secs / vad_frames_per_sec)
|
||||
self._vad_stop_frames = round(self._params.stop_secs / vad_frames_per_sec)
|
||||
self._vad_starting_count = 0
|
||||
self._vad_stopping_count = 0
|
||||
self._vad_state: VADState = VADState.QUIET
|
||||
|
||||
def _get_smoothed_volume(self, audio: bytes) -> float:
|
||||
volume = calculate_audio_volume(audio, self._sample_rate)
|
||||
return exp_smoothing(volume, self._prev_volume, self._smoothing_factor)
|
||||
|
||||
Reference in New Issue
Block a user