Files
pipecat/examples/foundational/25-conversation-flow.py
2024-11-18 12:18:20 -05:00

186 lines
6.9 KiB
Python

#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import os
import sys
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.flows.manager import FlowManager
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.deepgram import DeepgramSTTService, DeepgramTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
# Define our conversation flow
flow_config = {
"initial_node": "start",
"nodes": {
"start": {
"message": {
"role": "system",
"content": "You are an order-taking assistant. You must ALWAYS use one of the available functions to progress the conversation. For this step, ask the user if they want pizza or sushi, and wait for them to use a function to choose.",
},
"functions": [
{
"type": "function",
"function": {
"name": "choose_pizza",
"description": "User wants to order pizza",
"parameters": {"type": "object", "properties": {}},
},
},
{
"type": "function",
"function": {
"name": "choose_sushi",
"description": "User wants to order sushi",
"parameters": {"type": "object", "properties": {}},
},
},
],
},
"choose_pizza": {
"message": {
"role": "system",
"content": "The user has chosen pizza. You must now ask them to select a size using the select_pizza_size function. Do not proceed until they use this function. Do not assume any selections have been made.",
},
"functions": [
{
"type": "function",
"function": {
"name": "select_pizza_size",
"description": "Select pizza size",
"parameters": {
"type": "object",
"properties": {
"size": {
"type": "string",
"enum": ["small", "medium", "large"],
"description": "Size of the pizza",
}
},
"required": ["size"],
},
},
}
],
"pre_actions": [{"type": "tts_say", "text": "Ok, let me pull up our pizza menu..."}],
},
"choose_sushi": {
"message": {
"role": "system",
"content": "The user has chosen sushi. Immediately say: 'How many sushi rolls would you like to order?' If they answer provide to the question of how many rolls, use the select_roll_count function.",
},
"functions": [
{
"type": "function",
"function": {
"name": "select_roll_count",
"description": "Select number of sushi rolls",
"parameters": {
"type": "object",
"properties": {
"count": {
"type": "integer",
"minimum": 1,
"maximum": 10,
"description": "Number of rolls to order",
}
},
"required": ["count"],
},
},
}
],
"pre_actions": [{"type": "tts_say", "text": "Ok, one moment..."}],
},
},
}
async def main():
async with aiohttp.ClientSession() as session:
(room_url, _) = await configure(session)
transport = DailyTransport(
room_url,
None,
"Respond bot",
DailyParams(
audio_out_enabled=True,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
vad_audio_passthrough=True,
),
)
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
tts = DeepgramTTSService(api_key=os.getenv("DEEPGRAM_API_KEY"), voice="aura-helios-en")
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4")
# Get initial tools from the first node
initial_tools = flow_config["nodes"]["start"]["functions"]
# Create initial context
messages = [
{
"role": "system",
"content": "You are an order-taking assistant. You must ALWAYS use the available functions to progress the conversation. Never assume an order is complete without the proper function calls. Your responses will be converted to audio so avoid special characters.",
}
]
context = OpenAILLMContext(messages, initial_tools)
context_aggregator = llm.create_context_aggregator(context)
pipeline = Pipeline(
[
transport.input(), # Transport user input
stt, # STT
context_aggregator.user(), # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
context_aggregator.assistant(), # Assistant spoken responses
]
)
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
# Initialize flow manager
flow_manager = FlowManager(flow_config, task, tts)
# Register functions with LLM service
await flow_manager.register_functions(llm)
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
# Initialize the flow processor
await flow_manager.initialize(messages)
# Kick off the conversation using the context aggregator
await task.queue_frames([context_aggregator.user().get_context_frame()])
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())