diff --git a/examples/foundational/websocket-server/index.html b/examples/foundational/websocket-server/index.html
index 247ab9432..0b51e05e3 100644
--- a/examples/foundational/websocket-server/index.html
+++ b/examples/foundational/websocket-server/index.html
@@ -1,134 +1,197 @@
-
+
- WebSocket Audio Stream
-
+ Pipecat WebSocket Client Example
+
-
- WebSocket Audio Stream
+
+ Pipecat WebSocket Client Example
+ Loading, wait...
-
+
diff --git a/examples/foundational/websocket-server/server.py b/examples/foundational/websocket-server/server.py
index 7deef7572..45ac1352d 100644
--- a/examples/foundational/websocket-server/server.py
+++ b/examples/foundational/websocket-server/server.py
@@ -9,32 +9,37 @@ import asyncio
import os
import sys
-from loguru import logger
-from pipecat.frames.frames import Frame, TextFrame, TranscriptionFrame
+from pipecat.frames.frames import LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineTask
-
-from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
+from pipecat.processors.aggregators.llm_response import LLMAssistantResponseAggregator, LLMUserResponseAggregator
from pipecat.services.elevenlabs import ElevenLabsTTSService
+from pipecat.services.openai import OpenAILLMService
from pipecat.services.whisper import WhisperSTTService
-from pipecat.transports.network.websocket_server import WebsocketServerTransport
+from pipecat.transports.network.websocket_server import WebsocketServerParams, WebsocketServerTransport
+from pipecat.vad.silero import SileroVAD
+
+from loguru import logger
+
+from dotenv import load_dotenv
+load_dotenv(override=True)
logger.remove(0)
-logger.add(sys.stderr, level="DEBUG")
-
-
-class WhisperTranscriber(FrameProcessor):
- async def process_frame(self, frame: Frame, direction: FrameDirection):
- if isinstance(frame, TranscriptionFrame):
- print(f"Transcribed: {frame.text}")
- else:
- await self.push_frame(frame, direction)
+logger.add(sys.stderr, level="TRACE")
async def main():
async with aiohttp.ClientSession() as session:
- transport = WebsocketServerTransport()
+ transport = WebsocketServerTransport(params=WebsocketServerParams(add_wav_header=True))
+
+ vad = SileroVAD(audio_passthrough=True)
+
+ llm = OpenAILLMService(
+ api_key=os.getenv("OPENAI_API_KEY"),
+ model="gpt-4-turbo-preview")
+
+ stt = WhisperSTTService()
tts = ElevenLabsTTSService(
aiohttp_session=session,
@@ -42,19 +47,35 @@ async def main():
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
)
+ messages = [
+ {
+ "role": "system",
+ "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
+ },
+ ]
+
+ tma_in = LLMUserResponseAggregator(messages)
+ tma_out = LLMAssistantResponseAggregator(messages)
+
pipeline = Pipeline([
- transport.input(),
- WhisperSTTService(),
- WhisperTranscriber(),
- tts,
- transport.output(),
+ transport.input(), # Websocket input from client
+ vad, # VAD to detect user speech
+ stt, # Speech-To-Text
+ tma_in, # User responses
+ llm, # LLM
+ tts, # Text-To-Speech
+ transport.output(), # Websocket output to client
+ tma_out # LLM responses
])
task = PipelineTask(pipeline)
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
- await task.queue_frame(TextFrame("Hello there!"))
+ # Kick off the conversation.
+ messages.append(
+ {"role": "system", "content": "Please introduce yourself to the user."})
+ await task.queue_frames([LLMMessagesFrame(messages)])
runner = PipelineRunner()