Formatting adjusted and the encoding selection moved from TelnyFrameSerilaizer to websocket_endpoint function in server.py

2025-01-30 12:52:30 +01:00
parent 9c22bd8df1
commit b93e4ab9cb
5 changed files with 24 additions and 22 deletions
--- a/examples/telnyx-chatbot/README.md
+++ b/examples/telnyx-chatbot/README.md
@@ -76,7 +76,7 @@ This project is a FastAPI-based chatbot that integrates with Telnyx to handle We
     ```
   - In `templates/streams.xml`, replace `<your server url>` with your ngrok URL (without `https://`)
   - The final URL should look like: `wss://abc123.ngrok.io/ws`
-   - The encoding (`bidirectionalCodec`) should be `PCMU` or `PCMA` depending on your needs. Based on selected encoding, set the outbound_encoding in the `TelnyxFrameSerializer` in `src/pipecat/serializers/telnyx.py`
+   - The encoding (`bidirectionalCodec`) should be `PCMU` or `PCMA` depending on your needs. Based on selected encoding, set the outbound_encoding in `server.py` when the bot is initialized.
   - The inbound encoding can be controlled from the application configuration for inbound calls and dial/transfer commands for outbound calls.

 ## Running the Application
--- a/examples/telnyx-chatbot/bot.py
+++ b/examples/telnyx-chatbot/bot.py
@@ -34,7 +34,7 @@ logger.remove(0)
 logger.add(sys.stderr, level="DEBUG")


-async def run_bot(websocket_client, stream_id, encoding):
+async def run_bot(websocket_client, stream_id, outbound_encoding, inbound_encoding):
    transport = FastAPIWebsocketTransport(
        websocket=websocket_client,
        params=FastAPIWebsocketParams(
@@ -43,7 +43,7 @@ async def run_bot(websocket_client, stream_id, encoding):
            vad_enabled=True,
            vad_analyzer=SileroVADAnalyzer(),
            vad_audio_passthrough=True,
-            serializer=TelnyxFrameSerializer(stream_id, encoding),
+            serializer=TelnyxFrameSerializer(stream_id, outbound_encoding, inbound_encoding),
        ),
    )

@@ -55,9 +55,7 @@ async def run_bot(websocket_client, stream_id, encoding):
        api_key=os.getenv("ELEVENLABS_API_KEY"),
        voice_id="CwhRBWXzGAHq8TQ4Fs17",
        output_format="pcm_24000",
-        params=ElevenLabsTTSService.InputParams(
-            language=Language.EN
-        )
+        params=ElevenLabsTTSService.InputParams(language=Language.EN),
    )

    messages = [
--- a/examples/telnyx-chatbot/server.py
+++ b/examples/telnyx-chatbot/server.py
@@ -16,13 +16,11 @@ app.add_middleware(
    allow_headers=["*"],
 )

-
@app.post("/")
 async def start_call():
    print("POST TeXML")
    return HTMLResponse(content=open("templates/streams.xml").read(), media_type="application/xml")

-
@app.websocket("/ws")
 async def websocket_endpoint(websocket: WebSocket):
    await websocket.accept()
@@ -31,10 +29,9 @@ async def websocket_endpoint(websocket: WebSocket):
    call_data = json.loads(await start_data.__anext__())
    print(call_data, flush=True)
    stream_id = call_data["stream_id"]
-    encoding = call_data["start"]["media_format"]["encoding"]
+    outbound_encoding = call_data["start"]["media_format"]["encoding"]
    print("WebSocket connection accepted")
-    await run_bot(websocket, stream_id, encoding)
-
+    await run_bot(websocket, stream_id, outbound_encoding, "PCMU")

 if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8765)
--- a/src/pipecat/audio/utils.py
+++ b/src/pipecat/audio/utils.py
@@ -84,6 +84,7 @@ def ulaw_to_pcm(ulaw_bytes: bytes, in_sample_rate: int, out_sample_rate: int):
    
    return out_pcm_bytes

+
 def pcm_to_ulaw(pcm_bytes: bytes, in_sample_rate: int, out_sample_rate: int):
    # Resample
    in_pcm_bytes = resample_audio(pcm_bytes, in_sample_rate, out_sample_rate)
@@ -93,8 +94,8 @@ def pcm_to_ulaw(pcm_bytes: bytes, in_sample_rate: int, out_sample_rate: int):

    return ulaw_bytes

-def alaw_to_pcm(alaw_bytes: bytes, in_sample_rate: int, out_sample_rate: int) -> bytes:
-        
+
+def alaw_to_pcm(alaw_bytes: bytes, in_sample_rate: int, out_sample_rate: int) -> bytes:     
    # Convert a-law to PCM
    in_pcm_bytes = audioop.alaw2lin(alaw_bytes, 2)

@@ -103,6 +104,7 @@ def alaw_to_pcm(alaw_bytes: bytes, in_sample_rate: int, out_sample_rate: int) ->
    
    return out_pcm_bytes

+
 def pcm_to_alaw(pcm_bytes: bytes, in_sample_rate: int, out_sample_rate: int):
    # Resample
    in_pcm_bytes = resample_audio(pcm_bytes, in_sample_rate, out_sample_rate)
@@ -110,4 +112,4 @@ def pcm_to_alaw(pcm_bytes: bytes, in_sample_rate: int, out_sample_rate: int):
    # Convert PCM to μ-law
    alaw_bytes = audioop.lin2alaw(in_pcm_bytes, 2)

-    return alaw_bytes
+    return alaw_bytes
--- a/src/pipecat/serializers/telnyx.py
+++ b/src/pipecat/serializers/telnyx.py
@@ -28,9 +28,10 @@ class TelnyxFrameSerializer(FrameSerializer):
        inbound_encoding: str = "PCMU"
        outbound_encoding: str = "PCMU"

-    def __init__(self, stream_id: str, encoding: str, params: InputParams = InputParams()):
+    def __init__(self, stream_id: str, outbound_encoding: str, inbound_encoding: str, params: InputParams = InputParams()):
        self._stream_id = stream_id
-        params.inbound_encoding = encoding
+        params.outbound_encoding = outbound_encoding
+        params.inbound_encoding = inbound_encoding  
        self._params = params

    @property
@@ -41,10 +42,14 @@ class TelnyxFrameSerializer(FrameSerializer):
        if isinstance(frame, AudioRawFrame):
            data = frame.audio
            
-            if self._params.outbound_encoding == "PCMU":
-                serialized_data = pcm_to_ulaw(data, frame.sample_rate, self._params.telnyx_sample_rate)
-            elif self._params.outbound_encoding == "PCMA":
-                serialized_data = pcm_to_alaw(data, frame.sample_rate, self._params.telnyx_sample_rate)
+            if self._params.inbound_encoding == "PCMU":
+                serialized_data = pcm_to_ulaw(
+                    data, frame.sample_rate, self._params.telnyx_sample_rate
+                    )
+            elif self._params.inbound_encoding == "PCMA":
+                serialized_data = pcm_to_alaw(
+                    data, frame.sample_rate, self._params.telnyx_sample_rate
+                    )
            else:
                raise ValueError(f"Unsupported encoding: {self._params.encoding}")
            
@@ -67,11 +72,11 @@ class TelnyxFrameSerializer(FrameSerializer):
            payload_base64 = message["media"]["payload"]
            payload = base64.b64decode(payload_base64)

-            if self._params.inbound_encoding == "PCMU":
+            if self._params.outbound_encoding == "PCMU":
                deserialized_data = ulaw_to_pcm(
                    payload, self._params.telnyx_sample_rate, self._params.sample_rate
                )
-            elif self._params.inbound_encoding == "PCMA":
+            elif self._params.outbound_encoding == "PCMA":
                deserialized_data = alaw_to_pcm(
                    payload, self._params.telnyx_sample_rate, self._params.sample_rate
                )