diff --git a/examples/telnyx-chatbot/README.md b/examples/telnyx-chatbot/README.md index ce0178893..e7400940a 100644 --- a/examples/telnyx-chatbot/README.md +++ b/examples/telnyx-chatbot/README.md @@ -76,7 +76,7 @@ This project is a FastAPI-based chatbot that integrates with Telnyx to handle We ``` - In `templates/streams.xml`, replace `` with your ngrok URL (without `https://`) - The final URL should look like: `wss://abc123.ngrok.io/ws` - - The encoding (`bidirectionalCodec`) should be `PCMU` or `PCMA` depending on your needs. Based on selected encoding, set the outbound_encoding in the `TelnyxFrameSerializer` in `src/pipecat/serializers/telnyx.py` + - The encoding (`bidirectionalCodec`) should be `PCMU` or `PCMA` depending on your needs. Based on selected encoding, set the outbound_encoding in `server.py` when the bot is initialized. - The inbound encoding can be controlled from the application configuration for inbound calls and dial/transfer commands for outbound calls. ## Running the Application diff --git a/examples/telnyx-chatbot/bot.py b/examples/telnyx-chatbot/bot.py index 771147e0f..8156256a3 100644 --- a/examples/telnyx-chatbot/bot.py +++ b/examples/telnyx-chatbot/bot.py @@ -34,7 +34,7 @@ logger.remove(0) logger.add(sys.stderr, level="DEBUG") -async def run_bot(websocket_client, stream_id, encoding): +async def run_bot(websocket_client, stream_id, outbound_encoding, inbound_encoding): transport = FastAPIWebsocketTransport( websocket=websocket_client, params=FastAPIWebsocketParams( @@ -43,7 +43,7 @@ async def run_bot(websocket_client, stream_id, encoding): vad_enabled=True, vad_analyzer=SileroVADAnalyzer(), vad_audio_passthrough=True, - serializer=TelnyxFrameSerializer(stream_id, encoding), + serializer=TelnyxFrameSerializer(stream_id, outbound_encoding, inbound_encoding), ), ) @@ -55,9 +55,7 @@ async def run_bot(websocket_client, stream_id, encoding): api_key=os.getenv("ELEVENLABS_API_KEY"), voice_id="CwhRBWXzGAHq8TQ4Fs17", output_format="pcm_24000", - params=ElevenLabsTTSService.InputParams( - language=Language.EN - ) + params=ElevenLabsTTSService.InputParams(language=Language.EN), ) messages = [ diff --git a/examples/telnyx-chatbot/server.py b/examples/telnyx-chatbot/server.py index 27dafdae6..840e1157f 100644 --- a/examples/telnyx-chatbot/server.py +++ b/examples/telnyx-chatbot/server.py @@ -16,13 +16,11 @@ app.add_middleware( allow_headers=["*"], ) - @app.post("/") async def start_call(): print("POST TeXML") return HTMLResponse(content=open("templates/streams.xml").read(), media_type="application/xml") - @app.websocket("/ws") async def websocket_endpoint(websocket: WebSocket): await websocket.accept() @@ -31,10 +29,9 @@ async def websocket_endpoint(websocket: WebSocket): call_data = json.loads(await start_data.__anext__()) print(call_data, flush=True) stream_id = call_data["stream_id"] - encoding = call_data["start"]["media_format"]["encoding"] + outbound_encoding = call_data["start"]["media_format"]["encoding"] print("WebSocket connection accepted") - await run_bot(websocket, stream_id, encoding) - + await run_bot(websocket, stream_id, outbound_encoding, "PCMU") if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=8765) diff --git a/src/pipecat/audio/utils.py b/src/pipecat/audio/utils.py index 8f4807f1c..9aac3a712 100644 --- a/src/pipecat/audio/utils.py +++ b/src/pipecat/audio/utils.py @@ -84,6 +84,7 @@ def ulaw_to_pcm(ulaw_bytes: bytes, in_sample_rate: int, out_sample_rate: int): return out_pcm_bytes + def pcm_to_ulaw(pcm_bytes: bytes, in_sample_rate: int, out_sample_rate: int): # Resample in_pcm_bytes = resample_audio(pcm_bytes, in_sample_rate, out_sample_rate) @@ -93,8 +94,8 @@ def pcm_to_ulaw(pcm_bytes: bytes, in_sample_rate: int, out_sample_rate: int): return ulaw_bytes -def alaw_to_pcm(alaw_bytes: bytes, in_sample_rate: int, out_sample_rate: int) -> bytes: - + +def alaw_to_pcm(alaw_bytes: bytes, in_sample_rate: int, out_sample_rate: int) -> bytes: # Convert a-law to PCM in_pcm_bytes = audioop.alaw2lin(alaw_bytes, 2) @@ -103,6 +104,7 @@ def alaw_to_pcm(alaw_bytes: bytes, in_sample_rate: int, out_sample_rate: int) -> return out_pcm_bytes + def pcm_to_alaw(pcm_bytes: bytes, in_sample_rate: int, out_sample_rate: int): # Resample in_pcm_bytes = resample_audio(pcm_bytes, in_sample_rate, out_sample_rate) @@ -110,4 +112,4 @@ def pcm_to_alaw(pcm_bytes: bytes, in_sample_rate: int, out_sample_rate: int): # Convert PCM to μ-law alaw_bytes = audioop.lin2alaw(in_pcm_bytes, 2) - return alaw_bytes \ No newline at end of file + return alaw_bytes diff --git a/src/pipecat/serializers/telnyx.py b/src/pipecat/serializers/telnyx.py index 415674e69..11111afb4 100644 --- a/src/pipecat/serializers/telnyx.py +++ b/src/pipecat/serializers/telnyx.py @@ -28,9 +28,10 @@ class TelnyxFrameSerializer(FrameSerializer): inbound_encoding: str = "PCMU" outbound_encoding: str = "PCMU" - def __init__(self, stream_id: str, encoding: str, params: InputParams = InputParams()): + def __init__(self, stream_id: str, outbound_encoding: str, inbound_encoding: str, params: InputParams = InputParams()): self._stream_id = stream_id - params.inbound_encoding = encoding + params.outbound_encoding = outbound_encoding + params.inbound_encoding = inbound_encoding self._params = params @property @@ -41,10 +42,14 @@ class TelnyxFrameSerializer(FrameSerializer): if isinstance(frame, AudioRawFrame): data = frame.audio - if self._params.outbound_encoding == "PCMU": - serialized_data = pcm_to_ulaw(data, frame.sample_rate, self._params.telnyx_sample_rate) - elif self._params.outbound_encoding == "PCMA": - serialized_data = pcm_to_alaw(data, frame.sample_rate, self._params.telnyx_sample_rate) + if self._params.inbound_encoding == "PCMU": + serialized_data = pcm_to_ulaw( + data, frame.sample_rate, self._params.telnyx_sample_rate + ) + elif self._params.inbound_encoding == "PCMA": + serialized_data = pcm_to_alaw( + data, frame.sample_rate, self._params.telnyx_sample_rate + ) else: raise ValueError(f"Unsupported encoding: {self._params.encoding}") @@ -67,11 +72,11 @@ class TelnyxFrameSerializer(FrameSerializer): payload_base64 = message["media"]["payload"] payload = base64.b64decode(payload_base64) - if self._params.inbound_encoding == "PCMU": + if self._params.outbound_encoding == "PCMU": deserialized_data = ulaw_to_pcm( payload, self._params.telnyx_sample_rate, self._params.sample_rate ) - elif self._params.inbound_encoding == "PCMA": + elif self._params.outbound_encoding == "PCMA": deserialized_data = alaw_to_pcm( payload, self._params.telnyx_sample_rate, self._params.sample_rate )