Merge pull request #247 from pipecat-ai/aleix/twilio-updates

some twilio updates
2024-06-21 10:14:40 -07:00
parent e195941aa5 0075dd8386
commit f9ecce739e
16 changed files with 178 additions and 128 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ### Added

+- Added new `FastAPIWebsocketTransport`. This is a new websocket transport that
+  can be integrated with FastAPI websockets.
+
+- Added new `TwilioFrameSerializer`. This is a new serializer that knows how to
+  serialize and deserialize audio frames from Twilio.
+
 - Added Daily transport event: `on_dialout_answered`.  See
  https://reference-python.daily.co/api_reference.html#daily.EventHandler

@@ -16,6 +22,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ### Other

+- Added `twilio-chatbot`. This is an example that shows how to integrate Twilio
+  phone numbers with a Pipecat bot.
+
 - Updated `07f-interruptible-azure.py` to use `AzureLLMService`,
  `AzureSTTService` and `AzureTTSService`.

--- a/examples/README.md
+++ b/examples/README.md
@@ -32,14 +32,15 @@ Next, follow the steps in the README for each demo.

 ## Projects:

-| Project                                      | Description                                                                                                                                | Services                                       |
-| -------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------- |
-| [Simple Chatbot](simple-chatbot)             | Basic voice-driven conversational bot. A good starting point for learning the flow of the framework.                                       | Deepgram, OpenAI, Daily, Daily Prebuilt UI            |
-| [Storytelling Chatbot](storytelling-chatbot) | Stitches together multiple third-party services to create a collaborative storytime experience.                                            | Deepgram, ElevenLabs, Open AI, Fal, Daily, Custom UI  |
-| [Translation Chatbot](translation-chatbot)   | Listens for user speech, then translates that speech to Spanish and speaks the translation back. Demonstrates multi-participant use-cases. | Deepgram, Azure, OpenAI, Daily, Daily Prebuilt UI     |
-| [Moondream Chatbot](moondream-chatbot)       | Demonstrates how to add vision capabilities to GPT4. **Note: works best with a GPU**                                                       | Deepgram, OpenAI, Moondream, Daily, Daily Prebuilt UI |
-| Function-calling Chatbot (TBC)               | A chatbot that can call functions in response to user input.                                                                                | Deepgram, OpenAI, Fireworks, Daily, Daily Prebuilt UI |
-| [Dialin Chatbot](dialin-chatbot)             | A chatbot that connects to an incoming phone call from Daily or Twilio.                                                                                | Deepgram, OpenAI, ElevenLabs, Daily, Twilio |
+| Project                                      | Description                                                                                                                                | Services                                                          |
+|----------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------|
+| [Simple Chatbot](simple-chatbot)             | Basic voice-driven conversational bot. A good starting point for learning the flow of the framework.                                       | Deepgram, ElevenLabs, OpenAI, Daily, Daily Prebuilt UI            |
+| [Storytelling Chatbot](storytelling-chatbot) | Stitches together multiple third-party services to create a collaborative storytime experience.                                            | Deepgram, ElevenLabs, OpenAI, Fal, Daily, Custom UI               |
+| [Translation Chatbot](translation-chatbot)   | Listens for user speech, then translates that speech to Spanish and speaks the translation back. Demonstrates multi-participant use-cases. | Deepgram, Azure, OpenAI, Daily, Daily Prebuilt UI                 |
+| [Moondream Chatbot](moondream-chatbot)       | Demonstrates how to add vision capabilities to GPT4. **Note: works best with a GPU**                                                       | Deepgram, ElevenLabs, OpenAI, Moondream, Daily, Daily Prebuilt UI |
+| [Patient intake](patient-intake)             | A chatbot that can call functions in response to user input.                                                                               | Deepgram, ElevenLabs, OpenAI, Daily, Daily Prebuilt UI            |
+| [Dialin Chatbot](dialin-chatbot)             | A chatbot that connects to an incoming phone call from Daily or Twilio.                                                                    | Deepgram, ElevenLabs, OpenAI, Daily, Twilio                       |
+| [Twilio Chatbot](twilio-chatbot)             | A chatbot that connects to an incoming phone call from Twilio.                                                                             | Deepgram, ElevenLabs, OpenAI, Daily, Twilio                       |

 > [!IMPORTANT]
 > These example projects use Daily as a WebRTC transport and can be joined using their hosted Prebuilt UI.
--- a/examples/twilio-chatbot/README.md
+++ b/examples/twilio-chatbot/README.md
@@ -7,6 +7,7 @@ This project is a FastAPI-based chatbot that integrates with Twilio to handle We
 - [Features](#features)
 - [Requirements](#requirements)
 - [Installation](#installation)
+- [Configure Twilio URLs](#configure-twilio-urls)
 - [Running the Application](#running-the-application)
 - [Usage](#usage)

@@ -38,11 +39,19 @@ This project is a FastAPI-based chatbot that integrates with Twilio to handle We
    ```

 3. **Create .env**:
-    create .env based on .env.example
+    create .env based on env.example

 4. **Install ngrok**:
    Follow the instructions on the [ngrok website](https://ngrok.com/download) to download and install ngrok.

+## Configure Twilio URLs
+
+1. **Update the Twilio Webhook**:
+    Copy the ngrok URL and update your Twilio phone number webhook URL to `http://<ngrok_url>/start_call`.
+
+2. **Update the streams.xml**:
+    Copy the ngrok URL and update templates/streams.xml with `wss://<ngrok_url>/ws`.
+
 ## Running the Application

 ### Using Python
@@ -57,13 +66,6 @@ This project is a FastAPI-based chatbot that integrates with Twilio to handle We
    ```sh
    ngrok http 8765
    ```
-
-3. **Update the Twilio Webhook**:
-    Copy the ngrok URL and update your Twilio phone number webhook URL to `http://<ngrok_url>/start_call`.
-
-3. **Update the streams.xml**:
-    Copy the ngrok URL and update your .xml URL to `wss://<ngrok_url>/ws`.
-
 ### Using Docker

 1. **Build the Docker image**:
@@ -73,22 +75,8 @@ This project is a FastAPI-based chatbot that integrates with Twilio to handle We

 2. **Run the Docker container**:
    ```sh
-    docker build -t twilio-chatbot .                               
    docker run -it --rm -p 8765:8765 twilio-chatbot
    ```
-
-3. **Start ngrok**:
-    In a new terminal, start ngrok to tunnel the local server:
-    ```sh
-    ngrok http 8765
-    ```
-
-4. **Update the Twilio Webhook**:
-    Copy the ngrok URL and update your Twilio phone number webhook URL to `http://<ngrok_url>/start_call`.
-
-5. **Update the streams.xml**:
-    Copy the ngrok URL and update your .xml URL to `wss://<ngrok_url>/ws`.
-
 ## Usage

 To start a call, simply make a call to your Twilio phone number. The webhook URL will direct the call to your FastAPI application, which will handle it accordingly.
--- a/examples/twilio-chatbot/test_bot.py
+++ b/examples/twilio-chatbot/test_bot.py
@@ -2,7 +2,7 @@ import aiohttp
 import os
 import sys

-from pipecat.frames.frames import LLMMessagesFrame, Frame, AudioRawFrame
+from pipecat.frames.frames import EndFrame, LLMMessagesFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -10,8 +10,6 @@ from pipecat.processors.aggregators.llm_response import (
    LLMAssistantResponseAggregator,
    LLMUserResponseAggregator
 )
-from pipecat.processors.frame_processor import FrameProcessor, FrameDirection
-
 from pipecat.services.openai import OpenAILLMService
 from pipecat.services.deepgram import DeepgramSTTService
 from pipecat.services.elevenlabs import ElevenLabsTTSService
@@ -32,10 +30,8 @@ async def run_bot(websocket_client):
        transport = FastAPIWebsocketTransport(
            websocket=websocket_client,
            params=FastAPIWebsocketParams(
-                audio_in_enabled=True,
                audio_out_enabled=True,
                add_wav_header=False,
-                transcription_enabled=False,
                vad_enabled=True,
                vad_analyzer=SileroVADAnalyzer(),
                vad_audio_passthrough=True
@@ -57,7 +53,7 @@ async def run_bot(websocket_client):
        messages = [
            {
                "role": "system",
-                "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
+                "content": "You are a helpful LLM in an audio call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
            },
        ]

@@ -83,6 +79,10 @@ async def run_bot(websocket_client):
                {"role": "system", "content": "Please introduce yourself to the user."})
            await task.queue_frames([LLMMessagesFrame(messages)])

+        @transport.event_handler("on_client_disconnected")
+        async def on_client_disconnected(transport, client):
+            await task.queue_frames([EndFrame()])
+
        runner = PipelineRunner(handle_sigint=False)

        await runner.run(task)
--- a/examples/twilio-chatbot/.env.example
+++ b/examples/twilio-chatbot/.env.example
--- a/examples/twilio-chatbot/server.py
+++ b/examples/twilio-chatbot/server.py
@@ -1,8 +1,10 @@
-from fastapi import FastAPI, WebSocket, WebSocketDisconnect
-from fastapi.middleware.cors import CORSMiddleware
 import uvicorn
+
+from fastapi import FastAPI, WebSocket
+from fastapi.middleware.cors import CORSMiddleware
 from starlette.responses import HTMLResponse
-from test_bot import run_bot
+
+from bot import run_bot

 app = FastAPI()

--- a/examples/twilio-chatbot/templates/streams.xml
+++ b/examples/twilio-chatbot/templates/streams.xml
@@ -4,4 +4,4 @@
    <Stream url="wss://<your server url>/ws"></Stream>
  </Connect>
  <Pause length="40"/>
-</Response>
+</Response>
--- a/linux-py3.10-requirements.txt
+++ b/linux-py3.10-requirements.txt
@@ -81,7 +81,7 @@ fal-client==0.4.0
    # via pipecat-ai (pyproject.toml)
 faster-whisper==1.0.2
    # via pipecat-ai (pyproject.toml)
-filelock==3.15.1
+filelock==3.15.3
    # via
    #   huggingface-hub
    #   pyht
@@ -113,7 +113,7 @@ google-api-core[grpc]==2.19.0
    #   google-ai-generativelanguage
    #   google-api-python-client
    #   google-generativeai
-google-api-python-client==2.133.0
+google-api-python-client==2.134.0
    # via google-generativeai
 google-auth==2.30.0
    # via
@@ -157,7 +157,7 @@ httpx==0.27.0
    #   openpipe
 httpx-sse==0.4.0
    # via fal-client
-huggingface-hub==0.23.3
+huggingface-hub==0.23.4
    # via
    #   faster-whisper
    #   timm
@@ -183,23 +183,23 @@ jsonpatch==1.33
    # via langchain-core
 jsonpointer==3.0.0
    # via jsonpatch
-langchain==0.2.3
+langchain==0.2.5
    # via
    #   langchain-community
    #   pipecat-ai (pyproject.toml)
-langchain-community==0.2.4
+langchain-community==0.2.5
    # via pipecat-ai (pyproject.toml)
-langchain-core==0.2.5
+langchain-core==0.2.9
    # via
    #   langchain
    #   langchain-community
    #   langchain-openai
    #   langchain-text-splitters
-langchain-openai==0.1.8
+langchain-openai==0.1.9
    # via pipecat-ai (pyproject.toml)
 langchain-text-splitters==0.2.1
    # via langchain
-langsmith==0.1.77
+langsmith==0.1.81
    # via
    #   langchain
    #   langchain-community
@@ -273,9 +273,9 @@ openai==1.26.0
    #   pipecat-ai (pyproject.toml)
 openpipe==4.14.0
    # via pipecat-ai (pyproject.toml)
-orjson==3.10.4
+orjson==3.10.5
    # via langsmith
-packaging==23.2
+packaging==24.1
    # via
    #   huggingface-hub
    #   langchain-core
@@ -289,7 +289,7 @@ pillow==10.3.0
    #   torchvision
 pluggy==1.5.0
    # via pytest
-proto-plus==1.23.0
+proto-plus==1.24.0
    # via
    #   google-ai-generativelanguage
    #   google-api-core
@@ -380,7 +380,7 @@ sniffio==1.3.1
    #   openai
 sounddevice==0.4.7
    # via pipecat-ai (pyproject.toml)
-sqlalchemy==2.0.30
+sqlalchemy==2.0.31
    # via
    #   langchain
    #   langchain-community
@@ -388,7 +388,7 @@ sympy==1.12.1
    # via
    #   onnxruntime
    #   torch
-tenacity==8.3.0
+tenacity==8.4.1
    # via
    #   langchain
    #   langchain-community
@@ -442,7 +442,7 @@ typing-inspect==0.9.0
    # via dataclasses-json
 uritemplate==4.1.1
    # via google-api-python-client
-urllib3==2.2.1
+urllib3==2.2.2
    # via requests
 verboselogs==1.7
    # via deepgram-sdk
--- a/macos-py3.10-requirements.txt
+++ b/macos-py3.10-requirements.txt
@@ -81,7 +81,7 @@ fal-client==0.4.0
    # via pipecat-ai (pyproject.toml)
 faster-whisper==1.0.2
    # via pipecat-ai (pyproject.toml)
-filelock==3.15.1
+filelock==3.15.3
    # via
    #   huggingface-hub
    #   pyht
@@ -112,7 +112,7 @@ google-api-core[grpc]==2.19.0
    #   google-ai-generativelanguage
    #   google-api-python-client
    #   google-generativeai
-google-api-python-client==2.133.0
+google-api-python-client==2.134.0
    # via google-generativeai
 google-auth==2.30.0
    # via
@@ -154,7 +154,7 @@ httpx==0.27.0
    #   openpipe
 httpx-sse==0.4.0
    # via fal-client
-huggingface-hub==0.23.3
+huggingface-hub==0.23.4
    # via
    #   faster-whisper
    #   timm
@@ -180,23 +180,23 @@ jsonpatch==1.33
    # via langchain-core
 jsonpointer==3.0.0
    # via jsonpatch
-langchain==0.2.3
+langchain==0.2.5
    # via
    #   langchain-community
    #   pipecat-ai (pyproject.toml)
-langchain-community==0.2.4
+langchain-community==0.2.5
    # via pipecat-ai (pyproject.toml)
-langchain-core==0.2.5
+langchain-core==0.2.9
    # via
    #   langchain
    #   langchain-community
    #   langchain-openai
    #   langchain-text-splitters
-langchain-openai==0.1.8
+langchain-openai==0.1.9
    # via pipecat-ai (pyproject.toml)
 langchain-text-splitters==0.2.1
    # via langchain
-langsmith==0.1.77
+langsmith==0.1.81
    # via
    #   langchain
    #   langchain-community
@@ -239,9 +239,9 @@ openai==1.26.0
    #   pipecat-ai (pyproject.toml)
 openpipe==4.14.0
    # via pipecat-ai (pyproject.toml)
-orjson==3.10.4
+orjson==3.10.5
    # via langsmith
-packaging==23.2
+packaging==24.1
    # via
    #   huggingface-hub
    #   langchain-core
@@ -255,7 +255,7 @@ pillow==10.3.0
    #   torchvision
 pluggy==1.5.0
    # via pytest
-proto-plus==1.23.0
+proto-plus==1.24.0
    # via
    #   google-ai-generativelanguage
    #   google-api-core
@@ -346,7 +346,7 @@ sniffio==1.3.1
    #   openai
 sounddevice==0.4.7
    # via pipecat-ai (pyproject.toml)
-sqlalchemy==2.0.30
+sqlalchemy==2.0.31
    # via
    #   langchain
    #   langchain-community
@@ -354,7 +354,7 @@ sympy==1.12.1
    # via
    #   onnxruntime
    #   torch
-tenacity==8.3.0
+tenacity==8.4.1
    # via
    #   langchain
    #   langchain-community
@@ -406,7 +406,7 @@ typing-inspect==0.9.0
    # via dataclasses-json
 uritemplate==4.1.1
    # via google-api-python-client
-urllib3==2.2.1
+urllib3==2.2.2
    # via requests
 verboselogs==1.7
    # via deepgram-sdk
--- a/src/pipecat/serializers/TwilioFrameSerializer.py
+++ b/src/pipecat/serializers/TwilioFrameSerializer.py
@@ -1,42 +0,0 @@
-import base64
-import json
-
-from pipecat.frames.frames import AudioRawFrame, Frame
-from pipecat.serializers.base_serializer import FrameSerializer
-from pipecat.utils.audio import ulaw_8000_to_pcm_16000, pcm_16000_to_ulaw_8000
-
-
-class TwilioFrameSerializer(FrameSerializer):
-    SERIALIZABLE_TYPES = {
-        AudioRawFrame: "audio",
-    }
-
-    def __init__(self):
-        self.sid = None
-
-
-    def serialize(self, frame: AudioRawFrame) -> dict:
-        data = frame.audio
-
-        serialized_data = pcm_16000_to_ulaw_8000(data)
-        payload = base64.b64encode(serialized_data).decode('utf-8')
-        answer_dict = {"event": "media",
-                       "streamSid": self.sid,
-                       "media": {"payload": payload}}
-
-        return answer_dict
-
-    def deserialize(self, message: bytes) -> AudioRawFrame | None:
-        data = json.loads(message)
-        if not self.sid:
-            self.sid = data['streamSid'] if data.get("streamSid") else None
-
-        if data['event'] != 'media':
-            return None
-        else:
-            payload_base64 = data['media']['payload']
-            payload = base64.b64decode(payload_base64)
-
-            deserialized_data = ulaw_8000_to_pcm_16000(payload)
-            audio_frame = AudioRawFrame(audio=deserialized_data, num_channels=1, sample_rate=16000)
-            return audio_frame
--- a/src/pipecat/serializers/base_serializer.py
+++ b/src/pipecat/serializers/base_serializer.py
@@ -12,9 +12,9 @@ from pipecat.frames.frames import Frame
 class FrameSerializer(ABC):

    @abstractmethod
-    def serialize(self, frame: Frame) -> bytes:
+    def serialize(self, frame: Frame) -> str | bytes | None:
        pass

    @abstractmethod
-    def deserialize(self, data: bytes) -> Frame | None:
+    def deserialize(self, data: str | bytes) -> Frame | None:
        pass
--- a/src/pipecat/serializers/protobuf.py
+++ b/src/pipecat/serializers/protobuf.py
@@ -26,7 +26,7 @@ class ProtobufFrameSerializer(FrameSerializer):
    def __init__(self):
        pass

-    def serialize(self, frame: Frame) -> bytes:
+    def serialize(self, frame: Frame) -> str | bytes | None:
        proto_frame = frame_protos.Frame()
        if type(frame) not in self.SERIALIZABLE_TYPES:
            raise ValueError(
@@ -41,7 +41,7 @@ class ProtobufFrameSerializer(FrameSerializer):
        result = proto_frame.SerializeToString()
        return result

-    def deserialize(self, data: bytes) -> Frame | None:
+    def deserialize(self, data: str | bytes) -> Frame | None:
        """Returns a Frame object from a Frame protobuf. Used to convert frames
        passed over the wire as protobufs to Frame objects used in pipelines
        and frame processors.
--- a/src/pipecat/serializers/twilio.py
+++ b/src/pipecat/serializers/twilio.py
@@ -0,0 +1,55 @@
+#
+# Copyright (c) 2024, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import base64
+import json
+
+from pipecat.frames.frames import AudioRawFrame, Frame
+from pipecat.serializers.base_serializer import FrameSerializer
+from pipecat.utils.audio import ulaw_8000_to_pcm_16000, pcm_16000_to_ulaw_8000
+
+
+class TwilioFrameSerializer(FrameSerializer):
+    SERIALIZABLE_TYPES = {
+        AudioRawFrame: "audio",
+    }
+
+    def __init__(self):
+        self._sid = None
+
+    def serialize(self, frame: Frame) -> str | bytes | None:
+        if not isinstance(frame, AudioRawFrame):
+            return None
+
+        data = frame.audio
+
+        serialized_data = pcm_16000_to_ulaw_8000(data)
+        payload = base64.b64encode(serialized_data).decode("utf-8")
+        answer = {
+            "event": "media",
+            "streamSid": self._sid,
+            "media": {
+                "payload": payload
+            }
+        }
+
+        return json.dumps(answer)
+
+    def deserialize(self, data: str | bytes) -> Frame | None:
+        message = json.loads(data)
+
+        if not self._sid:
+            self._sid = message["streamSid"] if "streamSid" in message else None
+
+        if message["event"] != "media":
+            return None
+        else:
+            payload_base64 = message["media"]["payload"]
+            payload = base64.b64decode(payload_base64)
+
+            deserialized_data = ulaw_8000_to_pcm_16000(payload)
+            audio_frame = AudioRawFrame(audio=deserialized_data, num_channels=1, sample_rate=16000)
+            return audio_frame
--- a/src/pipecat/transports/network/fastapi_websocket.py
+++ b/src/pipecat/transports/network/fastapi_websocket.py
@@ -1,12 +1,18 @@
+#
+# Copyright (c) 2024, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+
 import asyncio
 import io
 import wave
-from fastapi import WebSocket

 from typing import Awaitable, Callable
 from pydantic.main import BaseModel

-from pipecat.serializers.TwilioFrameSerializer import TwilioFrameSerializer
+from pipecat.serializers.twilio import TwilioFrameSerializer
 from pipecat.frames.frames import AudioRawFrame, StartFrame
 from pipecat.processors.frame_processor import FrameProcessor
 from pipecat.serializers.base_serializer import FrameSerializer
@@ -16,6 +22,15 @@ from pipecat.transports.base_transport import BaseTransport, TransportParams

 from loguru import logger

+try:
+    from fastapi import WebSocket
+    from starlette.websockets import WebSocketState
+except ModuleNotFoundError as e:
+    logger.error(f"Exception: {e}")
+    logger.error(
+        "In order to use FastAPI websockets, you need to `pip install pipecat-ai[websocket]`.")
+    raise Exception(f"Missing module: {e}")
+

 class FastAPIWebsocketParams(TransportParams):
    add_wav_header: bool = False
@@ -30,7 +45,12 @@ class FastAPIWebsocketCallbacks(BaseModel):

 class FastAPIWebsocketInputTransport(BaseInputTransport):

-    def __init__(self, websocket: WebSocket, params: FastAPIWebsocketParams, callbacks: FastAPIWebsocketCallbacks, **kwargs):
+    def __init__(
+            self,
+            websocket: WebSocket,
+            params: FastAPIWebsocketParams,
+            callbacks: FastAPIWebsocketCallbacks,
+            **kwargs):
        super().__init__(params, **kwargs)

        self._websocket = websocket
@@ -43,7 +63,8 @@ class FastAPIWebsocketInputTransport(BaseInputTransport):
        self._receive_task = self.get_event_loop().create_task(self._receive_messages())

    async def stop(self):
-        await self._websocket.close()
+        if self._websocket.client_state != WebSocketState.DISCONNECTED:
+            await self._websocket.close()
        await super().stop()

    async def _receive_messages(self):
@@ -58,6 +79,7 @@ class FastAPIWebsocketInputTransport(BaseInputTransport):

        await self._callbacks.on_client_disconnected(self._websocket)

+
 class FastAPIWebsocketOutputTransport(BaseOutputTransport):

    def __init__(self, websocket: WebSocket, params: FastAPIWebsocketParams, **kwargs):
@@ -92,17 +114,23 @@ class FastAPIWebsocketOutputTransport(BaseOutputTransport):
                frame = wav_frame

            payload = self._params.serializer.serialize(frame)
-
-            future = asyncio.run_coroutine_threadsafe(
-                self._websocket.send_json(payload), self.get_event_loop())
-            future.result()
+            if payload:
+                future = asyncio.run_coroutine_threadsafe(
+                    self._websocket.send_text(payload), self.get_event_loop())
+                future.result()

            self._audio_buffer = self._audio_buffer[self._params.audio_frame_size:]


 class FastAPIWebsocketTransport(BaseTransport):

-    def __init__(self, websocket: WebSocket, params: FastAPIWebsocketParams = FastAPIWebsocketParams(), input_name: str | None = None, output_name: str | None = None, loop: asyncio.AbstractEventLoop | None = None):
+    def __init__(
+            self,
+            websocket: WebSocket,
+            params: FastAPIWebsocketParams = FastAPIWebsocketParams(),
+            input_name: str | None = None,
+            output_name: str | None = None,
+            loop: asyncio.AbstractEventLoop | None = None):
        super().__init__(input_name=input_name, output_name=output_name, loop=loop)
        self._params = params

@@ -111,8 +139,10 @@ class FastAPIWebsocketTransport(BaseTransport):
            on_client_disconnected=self._on_client_disconnected
        )

-        self._input = FastAPIWebsocketInputTransport(websocket, self._params, self._callbacks, name=self._input_name)
-        self._output = FastAPIWebsocketOutputTransport(websocket, self._params, name=self._output_name)
+        self._input = FastAPIWebsocketInputTransport(
+            websocket, self._params, self._callbacks, name=self._input_name)
+        self._output = FastAPIWebsocketOutputTransport(
+            websocket, self._params, name=self._output_name)

        # Register supported handlers. The user will only be able to register
        # these handlers.
--- a/src/pipecat/transports/network/websocket_server.py
+++ b/src/pipecat/transports/network/websocket_server.py
@@ -4,11 +4,9 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #

-
 import asyncio
 import io
 import wave
-import websockets

 from typing import Awaitable, Callable
 from pydantic.main import BaseModel
@@ -23,6 +21,13 @@ from pipecat.transports.base_transport import BaseTransport, TransportParams

 from loguru import logger

+try:
+    import websockets
+except ModuleNotFoundError as e:
+    logger.error(f"Exception: {e}")
+    logger.error("In order to use websockets, you need to `pip install pipecat-ai[websocket]`.")
+    raise Exception(f"Missing module: {e}")
+

 class WebsocketServerParams(TransportParams):
    add_wav_header: bool = False
--- a/src/pipecat/utils/audio.py
+++ b/src/pipecat/utils/audio.py
@@ -33,6 +33,7 @@ def calculate_audio_volume(audio: bytes, sample_rate: int) -> float:
 def exp_smoothing(value: float, prev_value: float, factor: float) -> float:
    return prev_value + factor * (value - prev_value)

+
 def ulaw_8000_to_pcm_16000(ulaw_8000_bytes):
    # Convert μ-law to PCM
    pcm_8000_bytes = audioop.ulaw2lin(ulaw_8000_bytes, 2)
@@ -42,6 +43,7 @@ def ulaw_8000_to_pcm_16000(ulaw_8000_bytes):

    return pcm_16000_bytes

+
 def pcm_16000_to_ulaw_8000(pcm_16000_bytes):
    # Resample from 16000 Hz to 8000 Hz
    pcm_8000_bytes = audioop.ratecv(pcm_16000_bytes, 2, 1, 16000, 8000, None)[0]
@@ -49,4 +51,4 @@ def pcm_16000_to_ulaw_8000(pcm_16000_bytes):
    # Convert PCM to μ-law
    ulaw_8000_bytes = audioop.lin2ulaw(pcm_8000_bytes, 2)

-    return ulaw_8000_bytes
+    return ulaw_8000_bytes