diff --git a/examples/foundational/07c-interruptible-deepgram.py b/examples/foundational/07c-interruptible-deepgram.py index 4ca7dfeac..25be49a1c 100644 --- a/examples/foundational/07c-interruptible-deepgram.py +++ b/examples/foundational/07c-interruptible-deepgram.py @@ -15,7 +15,7 @@ from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_response import ( LLMAssistantResponseAggregator, LLMUserResponseAggregator) -from pipecat.services.deepgram import DeepgramTTSService +from pipecat.services.deepgram import DeepgramSTTService, DeepgramTTSService from pipecat.services.openai import OpenAILLMService from pipecat.transports.services.daily import DailyParams, DailyTransport from pipecat.vad.silero import SileroVADAnalyzer @@ -39,23 +39,23 @@ async def main(room_url: str, token): "Respond bot", DailyParams( audio_out_enabled=True, - transcription_enabled=True, vad_enabled=True, - vad_analyzer=SileroVADAnalyzer() + vad_analyzer=SileroVADAnalyzer(), + vad_audio_passthrough=True ) ) + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + tts = DeepgramTTSService( aiohttp_session=session, api_key=os.getenv("DEEPGRAM_API_KEY"), + voice="aura-helios-en" ) llm = OpenAILLMService( - # api_key=os.getenv("OPENAI_API_KEY"), - # model="gpt-4o" - model="meta-llama/Meta-Llama-3-8B-Instruct", - base_url="http://0.0.0.0:8000/v1" - ) + api_key=os.getenv("OPENAI_API_KEY"), + model="gpt-4o") messages = [ { @@ -69,6 +69,7 @@ async def main(room_url: str, token): pipeline = Pipeline([ transport.input(), # Transport user input + stt, # STT tma_in, # User responses llm, # LLM tts, # TTS @@ -76,7 +77,7 @@ async def main(room_url: str, token): tma_out # Assistant spoken responses ]) - task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True, enable_metrics=True)) + task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True)) @transport.event_handler("on_first_participant_joined") async def on_first_participant_joined(transport, participant): diff --git a/examples/foundational/16-gpu-container-local-bot.py b/examples/foundational/16-gpu-container-local-bot.py new file mode 100644 index 000000000..0da208ae4 --- /dev/null +++ b/examples/foundational/16-gpu-container-local-bot.py @@ -0,0 +1,130 @@ +# +# Copyright (c) 2024, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import aiohttp +import os +import sys +import json + +from pipecat.frames.frames import LLMMessagesFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_response import ( + LLMAssistantResponseAggregator, LLMUserResponseAggregator) +from pipecat.services.deepgram import DeepgramTTSService +from pipecat.services.openai import OpenAILLMService +from pipecat.transports.services.daily import DailyParams, DailyTransport, DailyTransportMessageFrame +from pipecat.vad.silero import SileroVADAnalyzer + +from runner import configure + +from loguru import logger + +from dotenv import load_dotenv +load_dotenv(override=True) + +logger.remove(0) +logger.add(sys.stderr, level="DEBUG") + + +async def main(room_url: str, token): + async with aiohttp.ClientSession() as session: + transport = DailyTransport( + room_url, + token, + "Respond bot", + DailyParams( + audio_out_enabled=True, + transcription_enabled=True, + vad_enabled=True, + vad_analyzer=SileroVADAnalyzer() + ) + ) + + tts = DeepgramTTSService( + aiohttp_session=session, + api_key=os.getenv("DEEPGRAM_API_KEY"), + voice="aura-asteria-en", + base_url="http://0.0.0.0:8080/v1/speak" + ) + + llm = OpenAILLMService( + # To use OpenAI + # api_key=os.getenv("OPENAI_API_KEY"), + # model="gpt-4o" + # Or, to use a local vLLM (or similar) api server + model="meta-llama/Meta-Llama-3-8B-Instruct", + base_url="http://0.0.0.0:8000/v1" + ) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.", + }, + ] + + tma_in = LLMUserResponseAggregator(messages) + tma_out = LLMAssistantResponseAggregator(messages) + + pipeline = Pipeline([ + transport.input(), # Transport user input + tma_in, # User responses + llm, # LLM + tts, # TTS + transport.output(), # Transport bot output + tma_out # Assistant spoken responses + ]) + + task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True, enable_metrics=True)) + + # When a participant joins, start transcription for that participant so the + # bot can "hear" and respond to them. + @transport.event_handler("on_participant_joined") + async def on_participant_joined(transport, participant): + transport.capture_participant_transcription(participant["id"]) + + # When the first participant joins, the bot should introduce itself. + @transport.event_handler("on_first_participant_joined") + async def on_first_participant_joined(transport, participant): + messages.append( + {"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMMessagesFrame(messages)]) + + # Handle "latency-ping" messages. The client will send app messages that look like + # this: + # { "latency-ping": { ts: }} + # + # We want to send an immediate pong back to the client from this handler function. + # Also, we will push a frame into the top of the pipeline and send it after the + # + @transport.event_handler("on_app_message") + async def on_app_message(transport, message, sender): + try: + if "latency-ping" in message: + logger.debug(f"Received latency ping app message: {message}") + ts = message["latency-ping"]["ts"] + # Send immediately + transport.output().send_message(DailyTransportMessageFrame( + message={"latency-pong-msg-handler": {"ts": ts}}, + participant_id=sender)) + # And push to the pipeline for the Daily transport.output to send + await tma_in.push_frame( + DailyTransportMessageFrame( + message={"latency-pong-pipeline-delivery": {"ts": ts}}, + participant_id=sender)) + except Exception as e: + logger.debug(f"message handling error: {e} - {message}") + + runner = PipelineRunner() + await runner.run(task) + + +if __name__ == "__main__": + (url, token) = configure() + asyncio.run(main(url, token))