113 lines
3.7 KiB
Python
113 lines
3.7 KiB
Python
#
|
|
# Copyright (c) 2025, Daily
|
|
#
|
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
#
|
|
|
|
import os
|
|
import sys
|
|
from typing import Optional
|
|
|
|
from dotenv import load_dotenv
|
|
from fastapi import WebSocket
|
|
from loguru import logger
|
|
|
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
|
from pipecat.pipeline.pipeline import Pipeline
|
|
from pipecat.pipeline.runner import PipelineRunner
|
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
|
from pipecat.serializers.plivo import PlivoFrameSerializer
|
|
from pipecat.services.cartesia import CartesiaTTSService
|
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
|
from pipecat.services.openai import OpenAILLMService
|
|
from pipecat.transports.network.fastapi_websocket import (
|
|
FastAPIWebsocketParams,
|
|
FastAPIWebsocketTransport,
|
|
)
|
|
|
|
load_dotenv()
|
|
logger.remove(0)
|
|
logger.add(sys.stderr, level="DEBUG")
|
|
|
|
|
|
async def run_bot(websocket_client: WebSocket, stream_id: str, call_id: Optional[str]):
|
|
logger.info(f"Starting bot for stream: {stream_id}")
|
|
|
|
serializer = PlivoFrameSerializer(
|
|
stream_id=stream_id,
|
|
call_id=call_id,
|
|
auth_id=os.getenv("PLIVO_AUTH_ID"),
|
|
auth_token=os.getenv("PLIVO_AUTH_TOKEN"),
|
|
)
|
|
|
|
transport = FastAPIWebsocketTransport(
|
|
websocket=websocket_client,
|
|
params=FastAPIWebsocketParams(
|
|
audio_in_enabled=True,
|
|
audio_out_enabled=True,
|
|
add_wav_header=False,
|
|
vad_analyzer=SileroVADAnalyzer(),
|
|
serializer=serializer,
|
|
),
|
|
)
|
|
|
|
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
|
|
|
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
|
|
|
tts = CartesiaTTSService(
|
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
|
)
|
|
|
|
messages = [
|
|
{
|
|
"role": "system",
|
|
"content": "You are an elementary teacher in an audio call. Your output will be converted to audio so don't include special characters in your answers. Respond to what the student said in a short short sentence.",
|
|
},
|
|
]
|
|
|
|
context = OpenAILLMContext(messages)
|
|
context_aggregator = llm.create_context_aggregator(context)
|
|
|
|
pipeline = Pipeline(
|
|
[
|
|
transport.input(), # Websocket input from client
|
|
stt, # Speech-To-Text
|
|
context_aggregator.user(),
|
|
llm, # LLM
|
|
tts, # Text-To-Speech
|
|
transport.output(), # Websocket output to client
|
|
context_aggregator.assistant(),
|
|
]
|
|
)
|
|
|
|
task = PipelineTask(
|
|
pipeline,
|
|
params=PipelineParams(
|
|
audio_in_sample_rate=8000,
|
|
audio_out_sample_rate=8000,
|
|
enable_metrics=True,
|
|
enable_usage_metrics=True,
|
|
),
|
|
)
|
|
|
|
@transport.event_handler("on_client_connected")
|
|
async def on_client_connected(transport, client):
|
|
# Kick off the conversation.
|
|
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
|
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
|
|
|
@transport.event_handler("on_client_disconnected")
|
|
async def on_client_disconnected(transport, client):
|
|
await task.cancel()
|
|
|
|
# We use `handle_sigint=False` because `uvicorn` is controlling keyboard
|
|
# interruptions. We use `force_gc=True` to force garbage collection after
|
|
# the runner finishes running a task which could be useful for long running
|
|
# applications with multiple clients connecting.
|
|
runner = PipelineRunner(handle_sigint=False, force_gc=True)
|
|
|
|
await runner.run(task)
|