From 5f45a9d90f0fffddc8a7310eae8dfe088b939c25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Wed, 29 May 2024 16:21:23 -0700 Subject: [PATCH] examples: websocket-server updates --- .../foundational/websocket-server/index.html | 267 +++++++++++------- .../foundational/websocket-server/server.py | 63 +++-- 2 files changed, 207 insertions(+), 123 deletions(-) diff --git a/examples/foundational/websocket-server/index.html b/examples/foundational/websocket-server/index.html index 247ab9432..0b51e05e3 100644 --- a/examples/foundational/websocket-server/index.html +++ b/examples/foundational/websocket-server/index.html @@ -1,134 +1,197 @@ - + - WebSocket Audio Stream - + Pipecat WebSocket Client Example + - -

WebSocket Audio Stream

+ +

Pipecat WebSocket Client Example

+

Loading, wait...

- + diff --git a/examples/foundational/websocket-server/server.py b/examples/foundational/websocket-server/server.py index 7deef7572..45ac1352d 100644 --- a/examples/foundational/websocket-server/server.py +++ b/examples/foundational/websocket-server/server.py @@ -9,32 +9,37 @@ import asyncio import os import sys -from loguru import logger -from pipecat.frames.frames import Frame, TextFrame, TranscriptionFrame +from pipecat.frames.frames import LLMMessagesFrame from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineTask - -from pipecat.processors.frame_processor import FrameDirection, FrameProcessor +from pipecat.processors.aggregators.llm_response import LLMAssistantResponseAggregator, LLMUserResponseAggregator from pipecat.services.elevenlabs import ElevenLabsTTSService +from pipecat.services.openai import OpenAILLMService from pipecat.services.whisper import WhisperSTTService -from pipecat.transports.network.websocket_server import WebsocketServerTransport +from pipecat.transports.network.websocket_server import WebsocketServerParams, WebsocketServerTransport +from pipecat.vad.silero import SileroVAD + +from loguru import logger + +from dotenv import load_dotenv +load_dotenv(override=True) logger.remove(0) -logger.add(sys.stderr, level="DEBUG") - - -class WhisperTranscriber(FrameProcessor): - async def process_frame(self, frame: Frame, direction: FrameDirection): - if isinstance(frame, TranscriptionFrame): - print(f"Transcribed: {frame.text}") - else: - await self.push_frame(frame, direction) +logger.add(sys.stderr, level="TRACE") async def main(): async with aiohttp.ClientSession() as session: - transport = WebsocketServerTransport() + transport = WebsocketServerTransport(params=WebsocketServerParams(add_wav_header=True)) + + vad = SileroVAD(audio_passthrough=True) + + llm = OpenAILLMService( + api_key=os.getenv("OPENAI_API_KEY"), + model="gpt-4-turbo-preview") + + stt = WhisperSTTService() tts = ElevenLabsTTSService( aiohttp_session=session, @@ -42,19 +47,35 @@ async def main(): voice_id=os.getenv("ELEVENLABS_VOICE_ID"), ) + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.", + }, + ] + + tma_in = LLMUserResponseAggregator(messages) + tma_out = LLMAssistantResponseAggregator(messages) + pipeline = Pipeline([ - transport.input(), - WhisperSTTService(), - WhisperTranscriber(), - tts, - transport.output(), + transport.input(), # Websocket input from client + vad, # VAD to detect user speech + stt, # Speech-To-Text + tma_in, # User responses + llm, # LLM + tts, # Text-To-Speech + transport.output(), # Websocket output to client + tma_out # LLM responses ]) task = PipelineTask(pipeline) @transport.event_handler("on_client_connected") async def on_client_connected(transport, client): - await task.queue_frame(TextFrame("Hello there!")) + # Kick off the conversation. + messages.append( + {"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMMessagesFrame(messages)]) runner = PipelineRunner()