Files
pipecat/examples/foundational/39-aws-nova-sonic.py
2025-05-07 13:52:51 -04:00

121 lines
3.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import os
from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.audio.vad.vad_analyzer import VADParams
from pipecat.frames.frames import LLMMessagesAppendFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.services.aws_nova_sonic import AWSNovaSonicService
from pipecat.transports.base_transport import TransportParams
from pipecat.transports.network.small_webrtc import SmallWebRTCTransport
from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection
# Load environment variables
load_dotenv(override=True)
async def run_bot(webrtc_connection: SmallWebRTCConnection):
logger.info(f"Starting bot")
# Initialize the SmallWebRTCTransport with the connection
transport = SmallWebRTCTransport(
webrtc_connection=webrtc_connection,
params=TransportParams(
audio_in_enabled=True,
audio_in_sample_rate=16000,
audio_out_enabled=True,
camera_in_enabled=False,
vad_enabled=True,
vad_audio_passthrough=True,
# set stop_secs to something roughly similar to the internal setting
# of the Multimodal Live api, just to align events.
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)),
),
)
# Create the AWS Nova Sonic LLM service
# system_instruction = f"""
# You are a helpful AI assistant.
# Your goal is to demonstrate your capabilities in a helpful and engaging way.
# Your output will be converted to audio so don't include special characters in your answers.
# Respond to what the user said in a creative and helpful way.
# """
# TODO: looks like Nova Sonic can't handle new lines?
system_instruction = "You are a friendly assistant. The user and you will engage in a spoken dialog " \
"exchanging the transcripts of a natural real-time conversation. Keep your responses short, " \
"generally two or three sentences for chatty scenarios."
llm = AWSNovaSonicService(
instruction=system_instruction,
secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
region=os.getenv("AWS_REGION"),
)
# Build the pipeline
pipeline = Pipeline(
[
transport.input(),
llm,
transport.output(),
]
)
# Configure the pipeline task
task = PipelineTask(
pipeline,
params=PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
),
)
# Handle client connection event
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
logger.info(f"Client connected")
# Kick off the conversation.
await task.queue_frames(
[
LLMMessagesAppendFrame(
messages=[
{
"role": "user",
"content": f"Greet the user and introduce yourself.",
}
]
)
]
)
# Handle client disconnection events
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
@transport.event_handler("on_client_closed")
async def on_client_closed(transport, client):
logger.info(f"Client closed connection")
await task.cancel()
# Run the pipeline
runner = PipelineRunner(handle_sigint=False)
await runner.run(task)
if __name__ == "__main__":
from run import main
main()