# # Copyright (c) 2024-2026, Daily # # SPDX-License-Identifier: BSD 2-Clause License # import os from dotenv import load_dotenv from loguru import logger from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.frames.frames import ( BotStartedSpeakingFrame, BotStoppedSpeakingFrame, EndFrame, InterruptionFrame, LLMRunFrame, TTSTextFrame, UserStartedSpeakingFrame, ) from pipecat.observers.base_observer import BaseObserver, FramePushed from pipecat.observers.loggers.debug_log_observer import DebugLogObserver, FrameEndpoint from pipecat.observers.loggers.llm_log_observer import LLMLogObserver from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext from pipecat.processors.aggregators.llm_response_universal import ( LLMContextAggregatorPair, LLMUserAggregatorParams, ) from pipecat.processors.frame_processor import FrameDirection from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_input import BaseInputTransport from pipecat.transports.base_output import BaseOutputTransport from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) class CustomObserver(BaseObserver): """Observer to log interruptions and bot speaking events to the console. Logs all frame instances of: - InterruptionFrame - BotStartedSpeakingFrame - BotStoppedSpeakingFrame This allows you to see the frame flow from processor to processor through the pipeline for these frames. Log format: [EVENT TYPE]: [source processor] → [destination processor] at [timestamp]s """ async def on_push_frame(self, data: FramePushed): src = data.source dst = data.destination frame = data.frame direction = data.direction timestamp = data.timestamp # Convert timestamp to seconds for readability time_sec = timestamp / 1_000_000_000 # Create direction arrow arrow = "→" if direction == FrameDirection.DOWNSTREAM else "←" if isinstance(frame, InterruptionFrame) and isinstance(src, BaseOutputTransport): logger.info(f"⚡ INTERRUPTION START: {src} {arrow} {dst} at {time_sec:.2f}s") elif isinstance(frame, BotStartedSpeakingFrame): logger.info(f"🤖 BOT START SPEAKING: {src} {arrow} {dst} at {time_sec:.2f}s") elif isinstance(frame, BotStoppedSpeakingFrame): logger.info(f"🤖 BOT STOP SPEAKING: {src} {arrow} {dst} at {time_sec:.2f}s") # We use lambdas to defer transport parameter creation until the transport # type is selected at runtime. transport_params = { "daily": lambda: DailyParams( audio_in_enabled=True, audio_out_enabled=True, ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, ), } async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info(f"Starting bot") stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) tts = CartesiaTTSService( api_key=os.getenv("CARTESIA_API_KEY"), voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady ) llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) messages = [ { "role": "system", "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", }, ] context = LLMContext(messages) user_aggregator, assistant_aggregator = LLMContextAggregatorPair( context, user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), ) pipeline = Pipeline( [ transport.input(), # Transport user input stt, user_aggregator, # User responses llm, # LLM tts, # TTS transport.output(), # Transport bot output assistant_aggregator, # Assistant spoken responses ] ) task = PipelineTask( pipeline, params=PipelineParams( enable_metrics=True, enable_usage_metrics=True, ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, observers=[ CustomObserver(), LLMLogObserver(), DebugLogObserver( frame_types={ TTSTextFrame: (BaseOutputTransport, FrameEndpoint.SOURCE), UserStartedSpeakingFrame: (BaseInputTransport, FrameEndpoint.SOURCE), EndFrame: None, } ), ], ) @transport.event_handler("on_client_connected") async def on_client_connected(transport, client): logger.info(f"Client connected") # Kick off the conversation. messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") await task.cancel() runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) await runner.run(task) async def bot(runner_args: RunnerArguments): """Main bot entry point compatible with Pipecat Cloud.""" transport = await create_transport(runner_args, transport_params) await run_bot(transport, runner_args) if __name__ == "__main__": from pipecat.runner.run import main main()