Merge pull request #266 from pipecat-ai/aleix/silero-num-frames-fixes

vad: fix Silero VAD required number of frames
This commit is contained in:
Aleix Conchillo Flaqué
2024-06-28 11:25:55 -07:00
committed by GitHub
2 changed files with 17 additions and 1 deletions

View File

@@ -5,6 +5,19 @@ All notable changes to **pipecat** will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [0.0.35] - 2024-06-28
### Changed
- `FastAPIWebsocketParams` now require a serializer.
- `TwilioFrameSerializer` now requires a `streamSid`.
### Fixed
- Silero VAD number of frames needs to be 512 for 16000 sample rate or 256 for
8000 sample rate.
## [0.0.34] - 2024-06-25
### Fixed

View File

@@ -36,6 +36,9 @@ class SileroVADAnalyzer(VADAnalyzer):
def __init__(self, sample_rate=16000, params: VADParams = VADParams()):
super().__init__(sample_rate=sample_rate, num_channels=1, params=params)
if sample_rate != 16000 and sample_rate != 8000:
raise Exception("Silero VAD sample rate needs to be 16000 or 8000")
logger.debug("Loading Silero VAD model...")
(self._model, utils) = torch.hub.load(
@@ -51,7 +54,7 @@ class SileroVADAnalyzer(VADAnalyzer):
#
def num_frames_required(self) -> int:
return int(self.sample_rate / 100) * 4 # 40ms
return 512 if self.sample_rate == 16000 else 256
def voice_confidence(self, buffer) -> float:
try: