Merge pull request #1148 from pipecat-ai/mb/fix-twilio-serializer
Fix sample rate handling in Twilio and Telnyx serializers
This commit is contained in:
@@ -5,6 +5,15 @@ All notable changes to **Pipecat** will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue with the `TwilioFrameSerializer` and `TelnyxFrameSerializer`
|
||||
where `frame.audio_out_sample_rate` was incorrectly used in place of
|
||||
`frame.audio_in_sample_rate`, which caused audio input detection to fail when
|
||||
using different input and output sample rates.
|
||||
|
||||
## [0.0.55] - 2025-02-05
|
||||
|
||||
### Added
|
||||
|
||||
@@ -31,8 +31,8 @@ from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializer
|
||||
|
||||
class TelnyxFrameSerializer(FrameSerializer):
|
||||
class InputParams(BaseModel):
|
||||
telnyx_sample_rate: Optional[int] = None
|
||||
sample_rate: Optional[int] = None
|
||||
telnyx_sample_rate: Optional[int] = None # Default Telnyx rate (8kHz)
|
||||
sample_rate: Optional[int] = None # Pipeline input rate
|
||||
inbound_encoding: str = "PCMU"
|
||||
outbound_encoding: str = "PCMU"
|
||||
|
||||
@@ -48,6 +48,9 @@ class TelnyxFrameSerializer(FrameSerializer):
|
||||
params.inbound_encoding = inbound_encoding
|
||||
self._params = params
|
||||
|
||||
self._telnyx_sample_rate = 0 # Fixed rate for Telnyx (8kHz)
|
||||
self._sample_rate = 0 # Pipeline input rate
|
||||
|
||||
self._resampler = create_default_resampler()
|
||||
|
||||
@property
|
||||
@@ -55,13 +58,15 @@ class TelnyxFrameSerializer(FrameSerializer):
|
||||
return FrameSerializerType.TEXT
|
||||
|
||||
async def setup(self, frame: StartFrame):
|
||||
# Configure rates for input path: Telnyx (8kHz encoded) -> Pipeline (PCM)
|
||||
self._telnyx_sample_rate = self._params.telnyx_sample_rate or frame.audio_in_sample_rate
|
||||
self._sample_rate = self._params.sample_rate or frame.audio_out_sample_rate
|
||||
self._sample_rate = self._params.sample_rate or frame.audio_in_sample_rate
|
||||
|
||||
async def serialize(self, frame: Frame) -> str | bytes | None:
|
||||
if isinstance(frame, AudioRawFrame):
|
||||
data = frame.audio
|
||||
|
||||
# Output: Convert PCM at frame's rate to 8kHz encoded for Telnyx
|
||||
if self._params.inbound_encoding == "PCMU":
|
||||
serialized_data = await pcm_to_ulaw(
|
||||
data, frame.sample_rate, self._telnyx_sample_rate, self._resampler
|
||||
@@ -92,6 +97,7 @@ class TelnyxFrameSerializer(FrameSerializer):
|
||||
payload_base64 = message["media"]["payload"]
|
||||
payload = base64.b64decode(payload_base64)
|
||||
|
||||
# Input: Convert Telnyx's 8kHz encoded audio to PCM at pipeline input rate
|
||||
if self._params.outbound_encoding == "PCMU":
|
||||
deserialized_data = await ulaw_to_pcm(
|
||||
payload,
|
||||
|
||||
@@ -27,15 +27,15 @@ from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializer
|
||||
|
||||
class TwilioFrameSerializer(FrameSerializer):
|
||||
class InputParams(BaseModel):
|
||||
twilio_sample_rate: Optional[int] = None
|
||||
sample_rate: Optional[int] = None
|
||||
twilio_sample_rate: Optional[int] = None # Default Twilio rate (8kHz)
|
||||
sample_rate: Optional[int] = None # Pipeline input rate
|
||||
|
||||
def __init__(self, stream_sid: str, params: InputParams = InputParams()):
|
||||
self._stream_sid = stream_sid
|
||||
self._params = params
|
||||
|
||||
self._twilio_sample_rate = 0
|
||||
self._sample_rate = 0
|
||||
self._twilio_sample_rate = 0 # Fixed rate for Twilio (8kHz)
|
||||
self._sample_rate = 0 # Pipeline input rate
|
||||
|
||||
self._resampler = create_default_resampler()
|
||||
|
||||
@@ -44,8 +44,9 @@ class TwilioFrameSerializer(FrameSerializer):
|
||||
return FrameSerializerType.TEXT
|
||||
|
||||
async def setup(self, frame: StartFrame):
|
||||
# Configure rates for input path: Twilio (8kHz μ-law) -> Pipeline (PCM)
|
||||
self._twilio_sample_rate = self._params.twilio_sample_rate or frame.audio_in_sample_rate
|
||||
self._sample_rate = self._params.sample_rate or frame.audio_out_sample_rate
|
||||
self._sample_rate = self._params.sample_rate or frame.audio_in_sample_rate
|
||||
|
||||
async def serialize(self, frame: Frame) -> str | bytes | None:
|
||||
if isinstance(frame, StartInterruptionFrame):
|
||||
@@ -54,6 +55,7 @@ class TwilioFrameSerializer(FrameSerializer):
|
||||
elif isinstance(frame, AudioRawFrame):
|
||||
data = frame.audio
|
||||
|
||||
# Output: Convert PCM at frame's rate to 8kHz μ-law for Twilio
|
||||
serialized_data = await pcm_to_ulaw(
|
||||
data, frame.sample_rate, self._twilio_sample_rate, self._resampler
|
||||
)
|
||||
@@ -75,6 +77,7 @@ class TwilioFrameSerializer(FrameSerializer):
|
||||
payload_base64 = message["media"]["payload"]
|
||||
payload = base64.b64decode(payload_base64)
|
||||
|
||||
# Input: Convert Twilio's 8kHz μ-law to PCM at pipeline input rate
|
||||
deserialized_data = await ulaw_to_pcm(
|
||||
payload, self._twilio_sample_rate, self._sample_rate, self._resampler
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user