Merge pull request #266 from pipecat-ai/aleix/silero-num-frames-fixes
vad: fix Silero VAD required number of frames
This commit is contained in:
13
CHANGELOG.md
13
CHANGELOG.md
@@ -5,6 +5,19 @@ All notable changes to **pipecat** will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [0.0.35] - 2024-06-28
|
||||
|
||||
### Changed
|
||||
|
||||
- `FastAPIWebsocketParams` now require a serializer.
|
||||
|
||||
- `TwilioFrameSerializer` now requires a `streamSid`.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Silero VAD number of frames needs to be 512 for 16000 sample rate or 256 for
|
||||
8000 sample rate.
|
||||
|
||||
## [0.0.34] - 2024-06-25
|
||||
|
||||
### Fixed
|
||||
|
||||
@@ -36,6 +36,9 @@ class SileroVADAnalyzer(VADAnalyzer):
|
||||
def __init__(self, sample_rate=16000, params: VADParams = VADParams()):
|
||||
super().__init__(sample_rate=sample_rate, num_channels=1, params=params)
|
||||
|
||||
if sample_rate != 16000 and sample_rate != 8000:
|
||||
raise Exception("Silero VAD sample rate needs to be 16000 or 8000")
|
||||
|
||||
logger.debug("Loading Silero VAD model...")
|
||||
|
||||
(self._model, utils) = torch.hub.load(
|
||||
@@ -51,7 +54,7 @@ class SileroVADAnalyzer(VADAnalyzer):
|
||||
#
|
||||
|
||||
def num_frames_required(self) -> int:
|
||||
return int(self.sample_rate / 100) * 4 # 40ms
|
||||
return 512 if self.sample_rate == 16000 else 256
|
||||
|
||||
def voice_confidence(self, buffer) -> float:
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user