Merge pull request #1057 from pipecat-ai/aleix/replace-resampy-soxr

improve audio resampling by switching from resampy to soxr
2025-01-21 17:52:49 -08:00
parent 20d5824e56 f23baa78d8
commit f89b9ec23f
4 changed files with 8 additions and 4 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,6 +16,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 - Fixed a type error when using `voice_settings` in `ElevenLabsHttpTTSService`.

+### Performance
+
+- Replaced audio resampling library `resampy` with `soxr`.
+
 ## [0.0.53] - 2025-01-18

 ### Added
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,7 +32,7 @@ dependencies = [
    "protobuf~=5.29.3",
    "pydantic~=2.10.5",
    "pyloudnorm~=0.1.1",
-    "resampy~=0.4.3"
+    "soxr~=0.5.0"
 ]

 [project.urls]
--- a/src/pipecat/audio/utils.py
+++ b/src/pipecat/audio/utils.py
@@ -8,14 +8,14 @@ import audioop

 import numpy as np
 import pyloudnorm as pyln
-import resampy
+import soxr


 def resample_audio(audio: bytes, original_rate: int, target_rate: int) -> bytes:
    if original_rate == target_rate:
        return audio
    audio_data = np.frombuffer(audio, dtype=np.int16)
-    resampled_audio = resampy.resample(audio_data, original_rate, target_rate)
+    resampled_audio = soxr.resample(audio_data, original_rate, target_rate)
    return resampled_audio.astype(np.int16).tobytes()


--- a/test-requirements.txt
+++ b/test-requirements.txt
@@ -22,8 +22,8 @@ pydantic~=2.8.2
 pyloudnorm~=0.1.1
 pyht~=0.1.4
 python-dotenv~=1.0.1
-resampy~=0.4.3
 silero-vad~=5.1
+soxr~=0.5.0
 together~=1.2.7
 transformers~=4.44.0
 websockets~=13.1