# # Copyright (c) 2024, Daily # # SPDX-License-Identifier: BSD 2-Clause License # import asyncio import aiohttp import os import sys from typing import Any, Mapping from pipecat.frames.frames import LLMMessagesFrame from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_response import ( LLMAssistantResponseAggregator, LLMUserResponseAggregator, ) from pipecat.services.cartesia import CartesiaTTSService from pipecat.services.openai import OpenAILLMService from pipecat.services.deepgram import DeepgramSTTService from pipecat.services.tavus import TavusVideoService from pipecat.transports.services.daily import DailyParams, DailyTransport from pipecat.audio.vad.silero import SileroVADAnalyzer from loguru import logger from dotenv import load_dotenv load_dotenv(override=True) logger.remove(0) logger.add(sys.stderr, level="DEBUG") async def main(): async with aiohttp.ClientSession() as session: tavus = TavusVideoService( api_key=os.getenv("TAVUS_API_KEY"), replica_id=os.getenv("TAVUS_REPLICA_ID"), persona_id=os.getenv("TAVUS_PERSONA_ID", "pipecat0"), session=session, ) # get persona, look up persona_name, set this as the bot name to ignore persona_name = await tavus.get_persona_name() room_url = await tavus.initialize() transport = DailyTransport( room_url=room_url, token=None, bot_name="Pipecat bot", params=DailyParams( vad_enabled=True, vad_analyzer=SileroVADAnalyzer(), vad_audio_passthrough=True, ), ) stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) tts = CartesiaTTSService( api_key=os.getenv("CARTESIA_API_KEY"), voice_id="a167e0f3-df7e-4d52-a9c3-f949145efdab", ) llm = OpenAILLMService(model="gpt-4o-mini") messages = [ { "role": "system", "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.", }, ] tma_in = LLMUserResponseAggregator(messages) tma_out = LLMAssistantResponseAggregator(messages) pipeline = Pipeline( [ transport.input(), # Transport user input stt, # STT tma_in, # User responses llm, # LLM tts, # TTS tavus, # Tavus output layer transport.output(), # Transport bot output tma_out, # Assistant spoken responses ] ) task = PipelineTask( pipeline, PipelineParams( allow_interruptions=True, enable_metrics=True, enable_usage_metrics=True, report_only_initial_ttfb=True, ), ) @transport.event_handler("on_participant_joined") async def on_participant_joined( transport: DailyTransport, participant: Mapping[str, Any] ) -> None: # Ignore the Tavus replica's microphone if participant.get("info", {}).get("userName", "") == persona_name: logger.debug(f"Ignoring {participant['id']}'s microphone") await transport.update_subscriptions( participant_settings={ participant["id"]: { "media": {"microphone": "unsubscribed"}, } } ) if participant.get("info", {}).get("userName", "") != persona_name: # Kick off the conversation. messages.append( {"role": "system", "content": "Please introduce yourself to the user."} ) await task.queue_frames([LLMMessagesFrame(messages)]) runner = PipelineRunner() await runner.run(task) if __name__ == "__main__": asyncio.run(main())