From ba59736f873702a67f10430a296d46f55be630c7 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Thu, 6 Feb 2025 09:55:15 -0500 Subject: [PATCH 1/2] fix: Use audio_in_sample_rate to deserialize data for TwilioFrameSerializer --- CHANGELOG.md | 8 ++++++++ src/pipecat/serializers/twilio.py | 13 ++++++++----- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2162047cd..88880656e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,14 @@ All notable changes to **Pipecat** will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Fixed + +- Fixed an issue with the `TwilioFrameSerializer` where + `frame.audio_out_sample_rate` was incorrectly used in place of + `frame.audio_in_sample_rate`. + ## [0.0.55] - 2025-02-05 ### Added diff --git a/src/pipecat/serializers/twilio.py b/src/pipecat/serializers/twilio.py index 0ca37c221..81f4d542d 100644 --- a/src/pipecat/serializers/twilio.py +++ b/src/pipecat/serializers/twilio.py @@ -27,15 +27,15 @@ from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializer class TwilioFrameSerializer(FrameSerializer): class InputParams(BaseModel): - twilio_sample_rate: Optional[int] = None - sample_rate: Optional[int] = None + twilio_sample_rate: Optional[int] = None # Default Twilio rate (8kHz) + sample_rate: Optional[int] = None # Pipeline input rate def __init__(self, stream_sid: str, params: InputParams = InputParams()): self._stream_sid = stream_sid self._params = params - self._twilio_sample_rate = 0 - self._sample_rate = 0 + self._twilio_sample_rate = 0 # Fixed rate for Twilio (8kHz) + self._sample_rate = 0 # Pipeline input rate self._resampler = create_default_resampler() @@ -44,8 +44,9 @@ class TwilioFrameSerializer(FrameSerializer): return FrameSerializerType.TEXT async def setup(self, frame: StartFrame): + # Configure rates for input path: Twilio (8kHz μ-law) -> Pipeline (PCM) self._twilio_sample_rate = self._params.twilio_sample_rate or frame.audio_in_sample_rate - self._sample_rate = self._params.sample_rate or frame.audio_out_sample_rate + self._sample_rate = self._params.sample_rate or frame.audio_in_sample_rate async def serialize(self, frame: Frame) -> str | bytes | None: if isinstance(frame, StartInterruptionFrame): @@ -54,6 +55,7 @@ class TwilioFrameSerializer(FrameSerializer): elif isinstance(frame, AudioRawFrame): data = frame.audio + # Output: Convert PCM at frame's rate to 8kHz μ-law for Twilio serialized_data = await pcm_to_ulaw( data, frame.sample_rate, self._twilio_sample_rate, self._resampler ) @@ -75,6 +77,7 @@ class TwilioFrameSerializer(FrameSerializer): payload_base64 = message["media"]["payload"] payload = base64.b64decode(payload_base64) + # Input: Convert Twilio's 8kHz μ-law to PCM at pipeline input rate deserialized_data = await ulaw_to_pcm( payload, self._twilio_sample_rate, self._sample_rate, self._resampler ) From c8ccf13bc7c3b646f1bf5dee79e7157f65077be5 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Thu, 6 Feb 2025 09:57:50 -0500 Subject: [PATCH 2/2] fix: Use audio_in_sample_rate to deserialize data for TelnyxFrameSerializer --- CHANGELOG.md | 7 ++++--- src/pipecat/serializers/telnyx.py | 12 +++++++++--- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 88880656e..e41abf532 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,9 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed -- Fixed an issue with the `TwilioFrameSerializer` where - `frame.audio_out_sample_rate` was incorrectly used in place of - `frame.audio_in_sample_rate`. +- Fixed an issue with the `TwilioFrameSerializer` and `TelnyxFrameSerializer` + where `frame.audio_out_sample_rate` was incorrectly used in place of + `frame.audio_in_sample_rate`, which caused audio input detection to fail when + using different input and output sample rates. ## [0.0.55] - 2025-02-05 diff --git a/src/pipecat/serializers/telnyx.py b/src/pipecat/serializers/telnyx.py index 9bff7535f..df184cec3 100644 --- a/src/pipecat/serializers/telnyx.py +++ b/src/pipecat/serializers/telnyx.py @@ -31,8 +31,8 @@ from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializer class TelnyxFrameSerializer(FrameSerializer): class InputParams(BaseModel): - telnyx_sample_rate: Optional[int] = None - sample_rate: Optional[int] = None + telnyx_sample_rate: Optional[int] = None # Default Telnyx rate (8kHz) + sample_rate: Optional[int] = None # Pipeline input rate inbound_encoding: str = "PCMU" outbound_encoding: str = "PCMU" @@ -48,6 +48,9 @@ class TelnyxFrameSerializer(FrameSerializer): params.inbound_encoding = inbound_encoding self._params = params + self._telnyx_sample_rate = 0 # Fixed rate for Telnyx (8kHz) + self._sample_rate = 0 # Pipeline input rate + self._resampler = create_default_resampler() @property @@ -55,13 +58,15 @@ class TelnyxFrameSerializer(FrameSerializer): return FrameSerializerType.TEXT async def setup(self, frame: StartFrame): + # Configure rates for input path: Telnyx (8kHz encoded) -> Pipeline (PCM) self._telnyx_sample_rate = self._params.telnyx_sample_rate or frame.audio_in_sample_rate - self._sample_rate = self._params.sample_rate or frame.audio_out_sample_rate + self._sample_rate = self._params.sample_rate or frame.audio_in_sample_rate async def serialize(self, frame: Frame) -> str | bytes | None: if isinstance(frame, AudioRawFrame): data = frame.audio + # Output: Convert PCM at frame's rate to 8kHz encoded for Telnyx if self._params.inbound_encoding == "PCMU": serialized_data = await pcm_to_ulaw( data, frame.sample_rate, self._telnyx_sample_rate, self._resampler @@ -92,6 +97,7 @@ class TelnyxFrameSerializer(FrameSerializer): payload_base64 = message["media"]["payload"] payload = base64.b64decode(payload_base64) + # Input: Convert Telnyx's 8kHz encoded audio to PCM at pipeline input rate if self._params.outbound_encoding == "PCMU": deserialized_data = await ulaw_to_pcm( payload,