Merge pull request #1148 from pipecat-ai/mb/fix-twilio-serializer

Fix sample rate handling in Twilio and Telnyx serializers
This commit is contained in:
Mark Backman
2025-02-06 10:25:13 -05:00
committed by GitHub
3 changed files with 26 additions and 8 deletions

View File

@@ -5,6 +5,15 @@ All notable changes to **Pipecat** will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
### Fixed
- Fixed an issue with the `TwilioFrameSerializer` and `TelnyxFrameSerializer`
where `frame.audio_out_sample_rate` was incorrectly used in place of
`frame.audio_in_sample_rate`, which caused audio input detection to fail when
using different input and output sample rates.
## [0.0.55] - 2025-02-05
### Added

View File

@@ -31,8 +31,8 @@ from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializer
class TelnyxFrameSerializer(FrameSerializer):
class InputParams(BaseModel):
telnyx_sample_rate: Optional[int] = None
sample_rate: Optional[int] = None
telnyx_sample_rate: Optional[int] = None # Default Telnyx rate (8kHz)
sample_rate: Optional[int] = None # Pipeline input rate
inbound_encoding: str = "PCMU"
outbound_encoding: str = "PCMU"
@@ -48,6 +48,9 @@ class TelnyxFrameSerializer(FrameSerializer):
params.inbound_encoding = inbound_encoding
self._params = params
self._telnyx_sample_rate = 0 # Fixed rate for Telnyx (8kHz)
self._sample_rate = 0 # Pipeline input rate
self._resampler = create_default_resampler()
@property
@@ -55,13 +58,15 @@ class TelnyxFrameSerializer(FrameSerializer):
return FrameSerializerType.TEXT
async def setup(self, frame: StartFrame):
# Configure rates for input path: Telnyx (8kHz encoded) -> Pipeline (PCM)
self._telnyx_sample_rate = self._params.telnyx_sample_rate or frame.audio_in_sample_rate
self._sample_rate = self._params.sample_rate or frame.audio_out_sample_rate
self._sample_rate = self._params.sample_rate or frame.audio_in_sample_rate
async def serialize(self, frame: Frame) -> str | bytes | None:
if isinstance(frame, AudioRawFrame):
data = frame.audio
# Output: Convert PCM at frame's rate to 8kHz encoded for Telnyx
if self._params.inbound_encoding == "PCMU":
serialized_data = await pcm_to_ulaw(
data, frame.sample_rate, self._telnyx_sample_rate, self._resampler
@@ -92,6 +97,7 @@ class TelnyxFrameSerializer(FrameSerializer):
payload_base64 = message["media"]["payload"]
payload = base64.b64decode(payload_base64)
# Input: Convert Telnyx's 8kHz encoded audio to PCM at pipeline input rate
if self._params.outbound_encoding == "PCMU":
deserialized_data = await ulaw_to_pcm(
payload,

View File

@@ -27,15 +27,15 @@ from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializer
class TwilioFrameSerializer(FrameSerializer):
class InputParams(BaseModel):
twilio_sample_rate: Optional[int] = None
sample_rate: Optional[int] = None
twilio_sample_rate: Optional[int] = None # Default Twilio rate (8kHz)
sample_rate: Optional[int] = None # Pipeline input rate
def __init__(self, stream_sid: str, params: InputParams = InputParams()):
self._stream_sid = stream_sid
self._params = params
self._twilio_sample_rate = 0
self._sample_rate = 0
self._twilio_sample_rate = 0 # Fixed rate for Twilio (8kHz)
self._sample_rate = 0 # Pipeline input rate
self._resampler = create_default_resampler()
@@ -44,8 +44,9 @@ class TwilioFrameSerializer(FrameSerializer):
return FrameSerializerType.TEXT
async def setup(self, frame: StartFrame):
# Configure rates for input path: Twilio (8kHz μ-law) -> Pipeline (PCM)
self._twilio_sample_rate = self._params.twilio_sample_rate or frame.audio_in_sample_rate
self._sample_rate = self._params.sample_rate or frame.audio_out_sample_rate
self._sample_rate = self._params.sample_rate or frame.audio_in_sample_rate
async def serialize(self, frame: Frame) -> str | bytes | None:
if isinstance(frame, StartInterruptionFrame):
@@ -54,6 +55,7 @@ class TwilioFrameSerializer(FrameSerializer):
elif isinstance(frame, AudioRawFrame):
data = frame.audio
# Output: Convert PCM at frame's rate to 8kHz μ-law for Twilio
serialized_data = await pcm_to_ulaw(
data, frame.sample_rate, self._twilio_sample_rate, self._resampler
)
@@ -75,6 +77,7 @@ class TwilioFrameSerializer(FrameSerializer):
payload_base64 = message["media"]["payload"]
payload = base64.b64decode(payload_base64)
# Input: Convert Twilio's 8kHz μ-law to PCM at pipeline input rate
deserialized_data = await ulaw_to_pcm(
payload, self._twilio_sample_rate, self._sample_rate, self._resampler
)