audio(mixer): SoundfileMixer doesn't resample files anymore

This commit is contained in:
Aleix Conchillo Flaqué
2024-12-04 15:14:02 -08:00
parent 2a0e01898f
commit e0f7a8a9f4
2 changed files with 16 additions and 13 deletions

View File

@@ -34,6 +34,10 @@ async def on_audio_data(processor, audio, sample_rate, num_channels):
### Changed
- `SoundfileMixer` doesn't resample input files anymore to avoid startup
delays. The sample rate of the provided sound files now need to match the
sample rate of the output transport.
- All input frames (text, audio, image, etc.) are now system frames. This means
they are processed immediately by all processors instead of being queued
internally.

View File

@@ -11,7 +11,6 @@ import numpy as np
from loguru import logger
from pipecat.audio.mixers.base_audio_mixer import BaseAudioMixer
from pipecat.audio.utils import resample_audio
from pipecat.frames.frames import MixerControlFrame, MixerEnableFrame, MixerUpdateSettingsFrame
try:
@@ -27,9 +26,8 @@ except ModuleNotFoundError as e:
class SoundfileMixer(BaseAudioMixer):
"""This is an audio mixer that mixes incoming audio with audio from a
file. It uses the soundfile library to load files so it supports multiple
formats. The audio files need to only have one channel (mono) but they can
have any sample rate that will be resampled to the output transport sample
rate.
formats. The audio files need to only have one channel (mono) and it needs
to match the sample rate of the output transport.
Multiple files can be loaded, each with a different name. The
`MixerUpdateSettingsFrame` has the following settings available: `sound`
@@ -103,16 +101,17 @@ class SoundfileMixer(BaseAudioMixer):
def _load_sound_file(self, sound_name: str, file_name: str):
try:
logger.debug(f"Loading background sound from {file_name}")
logger.debug(f"Loading mixer sound from {file_name}")
sound, sample_rate = sf.read(file_name, dtype="int16")
audio = sound.tobytes()
if sample_rate != self._sample_rate:
logger.debug(f"Resampling background sound to {self._sample_rate}")
audio = resample_audio(audio, sample_rate, self._sample_rate)
# Convert from np to bytes again.
self._sounds[sound_name] = np.frombuffer(audio, dtype=np.int16)
if sample_rate == self._sample_rate:
audio = sound.tobytes()
# Convert from np to bytes again.
self._sounds[sound_name] = np.frombuffer(audio, dtype=np.int16)
else:
logger.warning(
f"Sound file {file_name} has incorrect sample rate {sample_rate} (should be {self._sample_rate})"
)
except Exception as e:
logger.error(f"Unable to open file {file_name}: {e}")
@@ -121,7 +120,7 @@ class SoundfileMixer(BaseAudioMixer):
file.
"""
if not self._mixing:
if not self._mixing or not self._current_sound in self._sounds:
return audio
audio_np = np.frombuffer(audio, dtype=np.int16)