Compare commits

..

6 Commits

Author SHA1 Message Date
Jon Taylor
5bd5d22270 removed space from event handler 2024-06-26 18:30:56 +01:00
Jon Taylor
6ee7932337 added pause to start and new intro prompt 2024-06-26 18:24:14 +01:00
Jon Taylor
c407445dd1 removed header comment from bot runner 2024-06-24 17:35:26 +01:00
Jon Taylor
447f37167e added VAD stop seconds env 2024-06-24 17:34:25 +01:00
Jon Taylor
354c21500e prompt tweaks 2024-06-24 17:28:10 +01:00
Jon Taylor
5728e25b5a added fastbot example 2024-06-24 16:25:36 +01:00
13 changed files with 359 additions and 615 deletions

View File

@@ -1 +0,0 @@
classic-pipeline.py

View File

@@ -1,199 +0,0 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
from loguru import logger
from runner import configure
import argparse
import asyncio
import aiohttp
import os
import sys
from typing import List, Optional
from pydantic import BaseModel, ValidationError
from pipecat.vad.vad_analyzer import VADParams
from pipecat.vad.silero import SileroVADAnalyzer
from pipecat.transports.services.daily import DailyParams, DailyTransport, DailyTransportMessageFrame
from pipecat.services.openai import OpenAILLMService, OpenAILLMContext
from pipecat.services.deepgram import DeepgramSTTService
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.pipeline import Pipeline
from pipecat.processors.logger import FrameLogger
from pipecat.frames.frames import LLMMessagesFrame, EndFrame
from pipecat.processors.aggregators.llm_response import (
LLMAssistantResponseAggregator, LLMUserResponseAggregator
)
from fastbothelpers import (
GreedyLLMAggregator,
ClearableDeepgramTTSService,
VADGate,
AudioVolumeTimer,
TranscriptionTimingLogger
)
from dotenv import load_dotenv
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
class BotSettings(BaseModel):
room_url: str
room_token: str
bot_name: str = "Pipecat"
prompt: Optional[str] = "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Respond to what the user said in a creative and helpful way in a few short sentences."
deepgram_api_key: Optional[str] = None
deepgram_voice: Optional[str] = "aura-asteria-en"
deepgram_base_url: Optional[str] = "https://api.deepgram.com/v1/speak"
openai_api_key: Optional[str] = None
openai_model: Optional[str] = "gpt-4o"
openai_base_url: Optional[str] = None
async def main(settings: BotSettings):
async with aiohttp.ClientSession() as session:
transport = DailyTransport(
settings.room_url,
settings.room_token,
settings.bot_name,
DailyParams(
audio_out_enabled=True,
transcription_enabled=False,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.200)),
vad_audio_passthrough=True
)
)
stt = DeepgramSTTService(
api_key=os.getenv("DEEPGRAM_API_KEY"),
**({'url': url} if (url := os.getenv("DEEPGRAM_STT_URL")) else {})
)
tts = ClearableDeepgramTTSService(
name="Voice",
aiohttp_session=session,
api_key=os.getenv("DEEPGRAM_API_KEY"),
voice="aura-asteria-en",
**({'base_url': url} if (url := os.getenv("DEEPGRAM_TTS_BASE_URL")) else {})
)
llm = OpenAILLMService(
name="LLM",
# To use OpenAI
api_key=os.getenv("OPENAI_API_KEY"),
model=os.getenv("OPENAI_MODEL"),
base_url=os.getenv("OPENAI_BASE_URL")
)
messages = [
{
"role": "system",
"content": """You are a helpful assistant in an audio conversation.
Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers.
Respond to what the user said in a creative and helpful way. Be concise in your answers to basic questions. If you are asked to elaborate or tell a story, provide a longer response.
""",
},
]
avt = AudioVolumeTimer()
tl = TranscriptionTimingLogger(avt)
tma_in = LLMUserResponseAggregator(messages)
tma_out = LLMAssistantResponseAggregator(messages)
pipeline = Pipeline([
transport.input(), # Transport user input
avt,
stt,
tl,
tma_in, # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
tma_out, # Assistant spoken responses
])
task = PipelineTask(
pipeline,
PipelineParams(
allow_interruptions=True,
enable_metrics=True,
report_only_initial_ttfb=True
))
# When a participant joins, start transcription for that participant so the
# bot can "hear" and respond to them.
# @ transport.event_handler("on_participant_joined")
# async def on_participant_joined(transport, participant):
# transport.capture_participant_transcription(participant["id"])
# When the participant leaves, we exit the bot.
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
# When the first participant joins, the bot should introduce itself.
@ transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
messages.append(
{"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([LLMMessagesFrame(messages)])
# Handle "latency-ping" messages. The client will send app messages that look like
# this:
# { "latency-ping": { ts: <client-side timestamp> }}
#
# We want to send an immediate pong back to the client from this handler function.
# Also, we will push a frame into the top of the pipeline and send it after the
#
@ transport.event_handler("on_app_message")
async def on_app_message(transport, message, sender):
try:
if "latency-ping" in message:
logger.debug(f"Received latency ping app message: {message}")
ts = message["latency-ping"]["ts"]
# Send immediately
transport.output().send_message(DailyTransportMessageFrame(
message={"latency-pong-msg-handler": {"ts": ts}},
participant_id=sender))
# And push to the pipeline for the Daily transport.output to send
await tma_in.push_frame(
DailyTransportMessageFrame(
message={"latency-pong-pipeline-delivery": {"ts": ts}},
participant_id=sender))
except Exception as e:
logger.debug(f"message handling error: {e} - {message}")
runner = PipelineRunner()
await runner.run(task)
# if __name__ == "__main__":
# (url, token) = configure()
# asyncio.run(main(url, token))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Pipecat Bot")
parser.add_argument("-s", "--settings", type=str, required=True, help="Pipecat bot settings")
args, unknown = parser.parse_known_args()
try:
settings = BotSettings.model_validate_json(args.settings)
asyncio.run(main(settings))
except ValidationError as e:
print(e)

View File

@@ -1,15 +0,0 @@
DAILY_SAMPLE_ROOM_URL=...
DAILY_API_KEY=...
DEEPGRAM_API_KEY=...
DEEPGRAM_VOICE=alpha-asteria-en-v2
# DEEPGRAM_STT_URL=ws://localhost:8080
# DEEPGRAM_TTS_BASE_URL=http://0.0.0.0:8080/v1/speak
OPENAI_API_KEY=...
# OPENAI_API_KEY=
OPENAI_MODEL=gpt-4o
# OPENAI_MODEL=meta-llama/Meta-Llama-3-70B-Instruct
# OPENAI_MODEL=meta-llama/Meta-Llama-3-8B-Instruct
# OPENAI_MODEL=neuralmagic/Meta-Llama-3-70B-Instruct-FP8
# OPENAI_BASE_URL=http://0.0.0.0:8000/v1

View File

@@ -1,158 +0,0 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import asyncio
import aiohttp
import sys
from pydantic import BaseModel, ValidationError
from typing import Optional
from pipecat.frames.frames import EndFrame, LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.llm_response import (
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
from pipecat.services.deepgram import DeepgramTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport, DailyTransportMessageFrame
from pipecat.vad.silero import SileroVADAnalyzer
from loguru import logger
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
class BotSettings(BaseModel):
room_url: str
room_token: str
bot_name: str = "Pipecat"
prompt: Optional[str] = "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Respond to what the user said in a creative and helpful way in a few short sentences."
deepgram_api_key: Optional[str] = None
deepgram_voice: Optional[str] = "aura-asteria-en"
deepgram_base_url: Optional[str] = "https://api.deepgram.com/v1/speak"
openai_api_key: Optional[str] = None
openai_model: Optional[str] = "gpt-4o"
openai_base_url: Optional[str] = None
async def main(settings: BotSettings):
async with aiohttp.ClientSession() as session:
transport = DailyTransport(
settings.room_url,
settings.room_token,
settings.bot_name,
DailyParams(
audio_out_enabled=True,
transcription_enabled=True,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer()
)
)
tts = DeepgramTTSService(
aiohttp_session=session,
api_key=settings.deepgram_api_key,
voice=settings.deepgram_voice,
base_url=settings.deepgram_base_url
)
llm = OpenAILLMService(
api_key=settings.openai_api_key,
model=settings.openai_model,
base_url=settings.openai_base_url
)
messages = [
{
"role": "system",
"content": settings.prompt,
},
]
tma_in = LLMUserResponseAggregator(messages)
tma_out = LLMAssistantResponseAggregator(messages)
pipeline = Pipeline([
transport.input(), # Transport user input
tma_in, # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
tma_out, # Assistant spoken responses
])
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True, enable_metrics=True))
# When the first participant joins, the bot should introduce itself.
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
# Kick off the conversation.
messages.append(
{"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frame(LLMMessagesFrame(messages))
# When a participant joins, start transcription for that participant so the
# bot can "hear" and respond to them.
@transport.event_handler("on_participant_joined")
async def on_participant_joined(transport, participant):
transport.capture_participant_transcription(participant["id"])
# When the participant leaves, we exit the bot.
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
# If the call is ended make sure we quit as well.
@transport.event_handler("on_call_state_updated")
async def on_call_state_updated(transport, state):
if state == "left":
await task.queue_frame(EndFrame())
# Handle "latency-ping" messages. The client will send app messages that look like
# this:
# { "latency-ping": { ts: <client-side timestamp> }}
#
# We want to send an immediate pong back to the client from this handler function.
# Also, we will push a frame into the top of the pipeline and send it after the
#
@transport.event_handler("on_app_message")
async def on_app_message(transport, message, sender):
try:
if "latency-ping" in message:
logger.debug(f"Received latency ping app message: {message}")
ts = message["latency-ping"]["ts"]
# Send immediately
transport.output().send_message(DailyTransportMessageFrame(
message={"latency-pong-msg-handler": {"ts": ts}},
participant_id=sender))
# And push to the pipeline for the Daily transport.output to send
await tma_in.push_frame(
DailyTransportMessageFrame(
message={"latency-pong-pipeline-delivery": {"ts": ts}},
participant_id=sender))
except Exception as e:
logger.debug(f"message handling error: {e} - {message}")
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Pipecat Bot")
parser.add_argument("-s", "--settings", type=str, required=True, help="Pipecat bot settings")
args, unknown = parser.parse_known_args()
try:
settings = BotSettings.model_validate_json(args.settings)
asyncio.run(main(settings))
except ValidationError as e:
print(e)

View File

@@ -1,58 +0,0 @@
import argparse
import os
import time
import urllib
import requests
def configure():
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
parser.add_argument(
"-u",
"--url",
type=str,
required=False,
help="URL of the Daily room to join")
parser.add_argument(
"-k",
"--apikey",
type=str,
required=False,
help="Daily API Key (needed to create an owner token for the room)",
)
args, unknown = parser.parse_known_args()
url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL")
key = args.apikey or os.getenv("DAILY_API_KEY")
if not url:
raise Exception(
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL.")
if not key:
raise Exception("No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
# Create a meeting token for the given room with an expiration 1 hour in
# the future.
room_name: str = urllib.parse.urlparse(url).path[1:]
expiration: float = time.time() + 60 * 60
res: requests.Response = requests.post(
f"https://api.daily.co/v1/meeting-tokens",
headers={
"Authorization": f"Bearer {key}"},
json={
"properties": {
"room_name": room_name,
"is_owner": True,
"exp": expiration}},
)
if res.status_code != 200:
raise Exception(
f"Failed to create meeting token: {res.status_code} {res.text}")
token: str = res.json()["token"]
return (url, token)

View File

@@ -1,165 +0,0 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
from loguru import logger
from runner import configure
import asyncio
import aiohttp
import os
import sys
from typing import List
from pipecat.vad.vad_analyzer import VADParams
from pipecat.vad.silero import SileroVADAnalyzer
from pipecat.transports.services.daily import DailyParams, DailyTransport, DailyTransportMessageFrame
from pipecat.services.openai import OpenAILLMService, OpenAILLMContext
from pipecat.services.deepgram import DeepgramSTTService
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.pipeline import Pipeline
from pipecat.processors.logger import FrameLogger
from pipecat.frames.frames import LLMMessagesFrame
from fastbothelpers import (
GreedyLLMAggregator,
ClearableDeepgramTTSService,
VADGate,
AudioVolumeTimer,
TranscriptionTimingLogger
)
from dotenv import load_dotenv
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main(room_url: str, token):
async with aiohttp.ClientSession() as session:
transport = DailyTransport(
room_url,
token,
"Respond bot",
DailyParams(
audio_out_enabled=True,
transcription_enabled=False,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.400)),
vad_audio_passthrough=True
)
)
stt = DeepgramSTTService(
api_key=os.getenv("DEEPGRAM_API_KEY"),
**({'url': url} if (url := os.getenv("DEEPGRAM_STT_URL")) else {})
)
tts = ClearableDeepgramTTSService(
name="Voice",
aiohttp_session=session,
api_key=os.getenv("DEEPGRAM_API_KEY"),
voice="aura-asteria-en",
**({'base_url': url} if (url := os.getenv("DEEPGRAM_TTS_BASE_URL")) else {})
)
llm = OpenAILLMService(
name="LLM",
# To use OpenAI
api_key=os.getenv("OPENAI_API_KEY"),
model=os.getenv("OPENAI_MODEL"),
base_url=os.getenv("OPENAI_BASE_URL")
)
messages = [
{
"role": "system",
"content": """You are a helpful assistant in an audio conversation.
Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers.
Respond to what the user said in a creative and helpful way. Be concise in your answers to basic questions. If you are asked to elaborate or tell a story, provide a longer response.
""",
},
]
ctx = OpenAILLMContext()
greedy = GreedyLLMAggregator(name="greedy", context=ctx)
gate = VADGate(name="gate", vad_analyzer=transport.input().vad_analyzer(), context=ctx)
avt = AudioVolumeTimer()
tl = TranscriptionTimingLogger(avt)
pipeline = Pipeline([
transport.input(), # Transport user input
avt,
stt,
tl,
greedy,
llm, # LLM
tts, # TTS
gate,
transport.output(), # Transport bot output
# FrameLogger()
])
task = PipelineTask(
pipeline,
PipelineParams(
allow_interruptions=True,
enable_metrics=True,
report_only_initial_ttfb=True
))
# When a participant joins, start transcription for that participant so the
# bot can "hear" and respond to them.
# @ transport.event_handler("on_participant_joined")
# async def on_participant_joined(transport, participant):
# transport.capture_participant_transcription(participant["id"])
# When the first participant joins, the bot should introduce itself.
@ transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
messages.append(
{"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([LLMMessagesFrame(messages)])
# Handle "latency-ping" messages. The client will send app messages that look like
# this:
# { "latency-ping": { ts: <client-side timestamp> }}
#
# We want to send an immediate pong back to the client from this handler function.
# Also, we will push a frame into the top of the pipeline and send it after the
#
@ transport.event_handler("on_app_message")
async def on_app_message(transport, message, sender):
try:
if "latency-ping" in message:
logger.debug(f"Received latency ping app message: {message}")
ts = message["latency-ping"]["ts"]
# Send immediately
transport.output().send_message(DailyTransportMessageFrame(
message={"latency-pong-msg-handler": {"ts": ts}},
participant_id=sender))
# And push to the pipeline for the Daily transport.output to send
await tma_in.push_frame(
DailyTransportMessageFrame(
message={"latency-pong-pipeline-delivery": {"ts": ts}},
participant_id=sender))
except Exception as e:
logger.debug(f"message handling error: {e} - {message}")
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url, token))

165
examples/fast-chatbot/.gitignore vendored Normal file
View File

@@ -0,0 +1,165 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
runpod.toml
# custom script to recursively upgrade items in requirements.py
upgrade_requirements.py
.DS_Store

View File

View File

@@ -0,0 +1,164 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
from loguru import logger
import argparse
import asyncio
import aiohttp
import os
import sys
import time
from typing import Optional
from pydantic import BaseModel, ValidationError
from pipecat.vad.vad_analyzer import VADParams
from pipecat.vad.silero import SileroVADAnalyzer
from pipecat.transports.services.daily import DailyParams, DailyTransport
from pipecat.services.openai import OpenAILLMService
from pipecat.services.deepgram import DeepgramSTTService
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.pipeline import Pipeline
from pipecat.frames.frames import LLMMessagesFrame, EndFrame
from pipecat.processors.aggregators.llm_response import (
LLMAssistantResponseAggregator, LLMUserResponseAggregator
)
from helpers import (
ClearableDeepgramTTSService,
AudioVolumeTimer,
TranscriptionTimingLogger
)
from dotenv import load_dotenv
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level=os.getenv("LOG_LEVEL", "DEBUG"))
class BotSettings(BaseModel):
room_url: str
room_token: str
bot_name: str = "Pipecat"
prompt: Optional[str] = "You are a helpful assistant."
deepgram_api_key: Optional[str] = os.getenv("DEEPGRAM_API_KEY", None)
deepgram_voice: Optional[str] = os.getenv("DEEPGRAM_VOICE", "aura-asteria-en")
deepgram_tts_base_url: Optional[str] = os.getenv(
"DEEPGRAM_TTS_BASE_URL", "https://api.deepgram.com/v1/speak")
deepgram_stt_base_url: Optional[str] = os.getenv(
"DEEPGRAM_STT_BASE_URL", "https://api.deepgram.com/v1/speak")
openai_api_key: Optional[str] = os.getenv("OPENAI_API_KEY", None),
openai_model: Optional[str] = os.getenv("OPENAI_MODEL", None),
openai_base_url: Optional[str] = os.getenv("OPENAI_BASE_URL", None)
vad_stop_secs: Optional[float] = os.getenv("VAD_STOP_SECS", 0.200)
async def main(settings: BotSettings):
async with aiohttp.ClientSession() as session:
transport = DailyTransport(
settings.room_url,
settings.room_token,
settings.bot_name,
DailyParams(
audio_out_enabled=True,
transcription_enabled=False,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(params=VADParams(
stop_secs=settings.vad_stop_secs
)),
vad_audio_passthrough=True
)
)
stt = DeepgramSTTService(
name="STT",
api_key=settings.deepgram_api_key,
url=settings.deepgram_stt_base_url
)
tts = ClearableDeepgramTTSService(
name="Voice",
aiohttp_session=session,
api_key=settings.deepgram_api_key,
voice=settings.deepgram_voice,
**({'base_url': url} if (url := settings.deepgram_tts_base_url) else {})
)
llm = OpenAILLMService(
name="LLM",
api_key=settings.openai_api_key,
model=settings.openai_model,
base_url=settings.openai_base_url,
)
messages = [
{
"role": "system",
"content": settings.prompt,
},
]
avt = AudioVolumeTimer()
tl = TranscriptionTimingLogger(avt)
tma_in = LLMUserResponseAggregator(messages)
tma_out = LLMAssistantResponseAggregator(messages)
pipeline = Pipeline([
transport.input(), # Transport user input
avt, # Audio volume timer
stt, # Speech-to-text
tl, # Transcription timing logger
tma_in, # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
tma_out, # Assistant spoken responses
])
task = PipelineTask(
pipeline,
PipelineParams(
allow_interruptions=True,
enable_metrics=True,
report_only_initial_ttfb=True
))
# When the participant leaves, we exit the bot.
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
# When the first participant joins, the bot should introduce itself.
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
# Provide some air whilst tracks subscribe
time.sleep(2)
messages.append(
{
"role": "system",
"content": "Briefly introduce yourself by saying 'hello, I'm FastBot, how can I help you today?'"})
await task.queue_frames([LLMMessagesFrame(messages)])
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Pipecat Bot")
parser.add_argument("-s", "--settings", type=str, required=True, help="Pipecat bot settings")
args, unknown = parser.parse_known_args()
try:
settings = BotSettings.model_validate_json(args.settings)
asyncio.run(main(settings))
except ValidationError as e:
print(e)

View File

@@ -1,11 +1,9 @@
"""
bot_runner.py
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
HTTP service that listens for incoming calls from either Daily or Twilio,
provisioning a room and starting a Pipecat bot in response.
Refer to README for more information.
"""
import os
import argparse
import subprocess
@@ -13,12 +11,11 @@ import subprocess
from pydantic import BaseModel, ValidationError
from typing import Optional
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomObject, DailyRoomProperties, DailyRoomSipParams, DailyRoomParams
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomObject, DailyRoomProperties, DailyRoomParams
from fastapi import FastAPI, Request, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, PlainTextResponse
from twilio.twiml.voice_response import VoiceResponse
from fastapi.responses import JSONResponse
from bot import BotSettings
@@ -37,9 +34,10 @@ daily_rest_helper = DailyRESTHelper(
class RunnerSettings(BaseModel):
prompt: Optional[str] = "You are a helpful assistant."
prompt: Optional[
str] = "You are a fast, low-latency chatbot. Your goal is to demonstrate voice-driven AI capabilities at human-like speeds. When introducing yourself briefly mention your goal is to showcase speed and conversational flow. The technology powering you is Daily for transport, Cerebrium for GPU hosting, Llama 3 (8-B version) LLM, and Deepgram for speech-to-text and text-to-speech. You are hosted on the east coast of the United States. Respond to what the user said in a creative and helpful way, but keep responses short and legible. Ensure responses contain only words. Check again that you have not included special characters other than '?' or '!'."
deepgram_voice: Optional[str] = os.getenv("DEEPGRAM_VOICE")
openai_model: Optional[str] = os.getenv("OPENAI_MODEL")
openai_model: Optional[str] = os.getenv("OPENAI_MODEL", "gpt-4o")
openai_api_key: Optional[str] = os.getenv("OPENAI_API_KEY")
test: Optional[bool] = None
@@ -107,18 +105,14 @@ async def start_bot(request: Request) -> JSONResponse:
status_code=500, detail=f"Failed to get token for room: {room_url}")
# Spawn a new agent, and join the user session
# Note: this is mostly for demonstration purposes (refer to 'deployment' in README)
try:
bot_settings = BotSettings(
room_url=room.url,
room_token=token,
prompt=runner_settings.prompt,
deepgram_voice=runner_settings.deepgram_voice,
deepgram_api_key=os.getenv("DEEPGRAM_API_KEY"),
# deepgram_base_url="http://0.0.0.0:8080/v1/speak",
openai_model=runner_settings.openai_model,
openai_api_key=runner_settings.openai_api_key,
# openai_base_url="http://0.0.0.0:8000/v1",
)
bot_settings_str = bot_settings.model_dump_json(exclude_none=True)

View File

@@ -0,0 +1,12 @@
DAILY_SAMPLE_ROOM_URL= #optional: use the same room each time, or create a new one if unset
DAILY_API_KEY=
DAILY_API_URL=
DEEPGRAM_API_KEY=
DEEPGRAM_VOICE=
DEEPGRAM_STT_URL=
DEEPGRAM_TTS_BASE_URL=
OPENAI_API_KEY=
OPENAI_MODEL=
OPENAI_BASE_URL=

View File

@@ -15,14 +15,13 @@ from pipecat.frames.frames import (
TranscriptionFrame,
TextFrame,
StartInterruptionFrame,
StopInterruptionFrame,
LLMFullResponseStartFrame,
TTSStoppedFrame,
MetricsFrame
)
from pipecat.vad.vad_analyzer import VADAnalyzer, VADState
from pipecat.services.deepgram import DeepgramTTSService, DeepgramSTTService
from pipecat.services.deepgram import DeepgramTTSService
from pipecat.services.openai import OpenAILLMContext, OpenAILLMContextFrame
@@ -251,7 +250,7 @@ class AudioVolumeTimer(FrameProcessor):
raise ValueError(f"Expected 1 channel, got {frame.num_channels}")
# Unpack audio data into 16-bit integers
fmt = f"{len(frame.audio)//2}h"
fmt = f"{len(frame.audio) // 2}h"
audio_samples = struct.unpack(fmt, frame.audio)
# Calculate RMS

View File

@@ -0,0 +1,6 @@
pipecat-ai[daily,openai,silero,deepgram]
fastapi
uvicorn
requests
python-dotenv
loguru