Files
pipecat/examples/phone-chatbot/simple_dialout.py
2025-04-16 08:33:34 -04:00

188 lines
6.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import asyncio
import os
import sys
from call_connection_manager import CallConfigManager
from dotenv import load_dotenv
from loguru import logger
from pipecat.adapters.schemas.function_schema import FunctionSchema
from pipecat.adapters.schemas.tools_schema import ToolsSchema
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndTaskFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.processors.frame_processor import FrameDirection
from pipecat.services.cartesia.tts import CartesiaTTSService
from pipecat.services.llm_service import LLMService
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
daily_api_key = os.getenv("DAILY_API_KEY", "")
daily_api_url = os.getenv("DAILY_API_URL", "https://api.daily.co/v1")
async def main(
room_url: str,
token: str,
body: dict,
):
# ------------ CONFIGURATION AND SETUP ------------
# Create a config manager using the provided body
call_config_manager = CallConfigManager.from_json_string(body) if body else CallConfigManager()
# Get important configuration values
dialout_settings = call_config_manager.get_dialout_settings()
test_mode = call_config_manager.is_test_mode()
# ------------ TRANSPORT SETUP ------------
transport_params = DailyParams(
api_url=daily_api_url,
api_key=daily_api_key,
audio_in_enabled=True,
audio_out_enabled=True,
camera_out_enabled=False,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
transcription_enabled=True,
)
# Initialize transport with Daily
transport = DailyTransport(
room_url,
token,
"Simple Dial-out Bot",
transport_params,
)
# Initialize TTS
tts = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY", ""),
voice_id="b7d50908-b17c-442d-ad8d-810c63997ed9", # Use Helpful Woman voice by default
)
# ------------ FUNCTION DEFINITIONS ------------
async def terminate_call(
function_name, tool_call_id, args, llm: LLMService, context, result_callback
):
"""Function the bot can call to terminate the call upon completion of a voicemail message."""
await llm.queue_frame(EndTaskFrame(), FrameDirection.UPSTREAM)
# Define function schemas for tools
terminate_call_function = FunctionSchema(
name="terminate_call",
description="Call this function to terminate the call.",
properties={},
required=[],
)
# Create tools schema
tools = ToolsSchema(standard_tools=[terminate_call_function])
# ------------ LLM AND CONTEXT SETUP ------------
# Set up the system instruction for the LLM
system_instruction = """You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by introducing yourself. If the user ends the conversation, **IMMEDIATELY** call the `terminate_call` function. """
# Initialize LLM
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
# Register functions with the LLM
llm.register_function("terminate_call", terminate_call)
# Create system message and initialize messages list
messages = [call_config_manager.create_system_message(system_instruction)]
# Initialize LLM context and aggregator
context = OpenAILLMContext(messages, tools)
context_aggregator = llm.create_context_aggregator(context)
# ------------ PIPELINE SETUP ------------
# Build pipeline
pipeline = Pipeline(
[
transport.input(), # Transport user input
context_aggregator.user(), # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
context_aggregator.assistant(), # Assistant spoken responses
]
)
# Create pipeline task
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
# ------------ EVENT HANDLERS ------------
@transport.event_handler("on_joined")
async def on_joined(transport, data):
# Start dialout if needed
if not test_mode and dialout_settings:
logger.debug("Dialout settings detected; starting dialout")
await call_config_manager.start_dialout(transport, dialout_settings)
@transport.event_handler("on_dialout_connected")
async def on_dialout_connected(transport, data):
logger.debug(f"Dial-out connected: {data}")
@transport.event_handler("on_dialout_answered")
async def on_dialout_answered(transport, data):
logger.debug(f"Dial-out answered: {data}")
# Automatically start capturing transcription for the participant
await transport.capture_participant_transcription(data["sessionId"])
# The bot will wait to hear the user before the bot speaks
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
if test_mode:
logger.debug(f"First participant joined: {participant['id']}")
await transport.capture_participant_transcription(participant["id"])
# The bot will wait to hear the user before the bot speaks
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
logger.debug(f"Participant left: {participant}, reason: {reason}")
await task.cancel()
# ------------ RUN PIPELINE ------------
if test_mode:
logger.debug("Running in test mode (can be tested in Daily Prebuilt)")
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Simple Dial-out Bot")
parser.add_argument("-u", "--url", type=str, help="Room URL")
parser.add_argument("-t", "--token", type=str, help="Room Token")
parser.add_argument("-b", "--body", type=str, help="JSON configuration string")
args = parser.parse_args()
# Log the arguments for debugging
logger.info(f"Room URL: {args.url}")
logger.info(f"Token: {args.token}")
logger.info(f"Body provided: {bool(args.body)}")
asyncio.run(main(args.url, args.token, args.body))