121 lines
3.9 KiB
Python
121 lines
3.9 KiB
Python
#
|
||
# Copyright (c) 2024–2025, Daily
|
||
#
|
||
# SPDX-License-Identifier: BSD 2-Clause License
|
||
#
|
||
|
||
import os
|
||
|
||
from dotenv import load_dotenv
|
||
from loguru import logger
|
||
|
||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||
from pipecat.frames.frames import LLMMessagesAppendFrame
|
||
from pipecat.pipeline.pipeline import Pipeline
|
||
from pipecat.pipeline.runner import PipelineRunner
|
||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||
from pipecat.services.aws_nova_sonic import AWSNovaSonicService
|
||
from pipecat.transports.base_transport import TransportParams
|
||
from pipecat.transports.network.small_webrtc import SmallWebRTCTransport
|
||
from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection
|
||
|
||
# Load environment variables
|
||
load_dotenv(override=True)
|
||
|
||
|
||
async def run_bot(webrtc_connection: SmallWebRTCConnection):
|
||
logger.info(f"Starting bot")
|
||
|
||
# Initialize the SmallWebRTCTransport with the connection
|
||
transport = SmallWebRTCTransport(
|
||
webrtc_connection=webrtc_connection,
|
||
params=TransportParams(
|
||
audio_in_enabled=True,
|
||
audio_in_sample_rate=16000,
|
||
audio_out_enabled=True,
|
||
camera_in_enabled=False,
|
||
vad_enabled=True,
|
||
vad_audio_passthrough=True,
|
||
# set stop_secs to something roughly similar to the internal setting
|
||
# of the Multimodal Live api, just to align events.
|
||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)),
|
||
),
|
||
)
|
||
|
||
# Create the AWS Nova Sonic LLM service
|
||
# system_instruction = f"""
|
||
# You are a helpful AI assistant.
|
||
# Your goal is to demonstrate your capabilities in a helpful and engaging way.
|
||
# Your output will be converted to audio so don't include special characters in your answers.
|
||
# Respond to what the user said in a creative and helpful way.
|
||
# """
|
||
# TODO: looks like Nova Sonic can't handle new lines?
|
||
system_instruction = "You are a friendly assistant. The user and you will engage in a spoken dialog " \
|
||
"exchanging the transcripts of a natural real-time conversation. Keep your responses short, " \
|
||
"generally two or three sentences for chatty scenarios."
|
||
|
||
llm = AWSNovaSonicService(
|
||
instruction=system_instruction,
|
||
secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
|
||
access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
|
||
region=os.getenv("AWS_REGION"),
|
||
)
|
||
|
||
# Build the pipeline
|
||
pipeline = Pipeline(
|
||
[
|
||
transport.input(),
|
||
llm,
|
||
transport.output(),
|
||
]
|
||
)
|
||
|
||
# Configure the pipeline task
|
||
task = PipelineTask(
|
||
pipeline,
|
||
params=PipelineParams(
|
||
allow_interruptions=True,
|
||
enable_metrics=True,
|
||
enable_usage_metrics=True,
|
||
),
|
||
)
|
||
|
||
# Handle client connection event
|
||
@transport.event_handler("on_client_connected")
|
||
async def on_client_connected(transport, client):
|
||
logger.info(f"Client connected")
|
||
# Kick off the conversation.
|
||
await task.queue_frames(
|
||
[
|
||
LLMMessagesAppendFrame(
|
||
messages=[
|
||
{
|
||
"role": "user",
|
||
"content": f"Greet the user and introduce yourself.",
|
||
}
|
||
]
|
||
)
|
||
]
|
||
)
|
||
|
||
# Handle client disconnection events
|
||
@transport.event_handler("on_client_disconnected")
|
||
async def on_client_disconnected(transport, client):
|
||
logger.info(f"Client disconnected")
|
||
|
||
@transport.event_handler("on_client_closed")
|
||
async def on_client_closed(transport, client):
|
||
logger.info(f"Client closed connection")
|
||
await task.cancel()
|
||
|
||
# Run the pipeline
|
||
runner = PipelineRunner(handle_sigint=False)
|
||
await runner.run(task)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
from run import main
|
||
|
||
main()
|