Add studypal

This commit is contained in:
Yash Narayan
2024-08-19 16:58:29 -07:00
parent 90479fff95
commit 980265ca97
5 changed files with 251 additions and 0 deletions

View File

@@ -0,0 +1,5 @@
DAILY_SAMPLE_ROOM_URL= # Follow instructions here and put your https://YOURDOMAIN.daily.co/YOURROOM (Instructions: https://docs.pipecat.ai/quickstart#preparing-your-environment)
DAILY_API_KEY= # Create here: https://dashboard.daily.co/developers
OPENAI_API_KEY= # Create here: https://platform.openai.com/docs/overview
CARTESIA_API_KEY= # Create here: https://play.cartesia.ai/console
CARTESIA_VOICE_ID= # Find here: https://play.cartesia.ai/

View File

@@ -0,0 +1,12 @@
# studypal
### Have a conversation about any article on the web
studypal is a fast conversational ai built using [Daily](https://www.daily.co/) for real-time media transport and [Cartesia](https://cartesia.ai) for text-to-speech. Everything is orchestrated together (VAD -> STT -> LLM -> TTS) using [Pipecat](https://www.pipecat.ai/).
## Setup
1. Clone the repository
2. Copy `.env.example` to a `.env` file and add API keys
3. Install the required packages: `pip install -r requirements.txt`
4. Run `python3 studypal.py` from your command line.
5. While the app is running, go to the `https://<yourdomain>.daily.co/<room_url>` set in `DAILY_SAMPLE_ROOM_URL` and talk to studypal!

View File

@@ -0,0 +1,16 @@
aiohttp==3.9.5
beautifulsoup4==4.12.2
PyPDF2==3.0.1
tiktoken==0.7.0
pipecat==0.3.0
pipecat-ai==0.0.39
python-dotenv==1.0.1
loguru==0.7.2
requests==2.32.3
pydantic==2.8.2
httpx==0.27.0
openai==1.27.0
websockets==12.0
daily-python==0.10.1
torch==2.2.2
torchaudio==2.2.2

View File

@@ -0,0 +1,61 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import aiohttp
import argparse
import os
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
async def configure(aiohttp_session: aiohttp.ClientSession):
(url, token, _) = await configure_with_args(aiohttp_session)
return (url, token)
async def configure_with_args(
aiohttp_session: aiohttp.ClientSession,
parser: argparse.ArgumentParser | None = None):
if not parser:
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
parser.add_argument(
"-u",
"--url",
type=str,
required=False,
help="URL of the Daily room to join")
parser.add_argument(
"-k",
"--apikey",
type=str,
required=False,
help="Daily API Key (needed to create an owner token for the room)",
)
args, unknown = parser.parse_known_args()
url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL")
key = args.apikey or os.getenv("DAILY_API_KEY")
if not url:
raise Exception(
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL.")
if not key:
raise Exception("No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
daily_rest_helper = DailyRESTHelper(
daily_api_key=key,
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
)
# Create a meeting token for the given room with an expiration 1 hour in
# the future.
expiry_time: float = 60 * 60
token = daily_rest_helper.get_token(url, expiry_time)
return (url, token, args)

View File

@@ -0,0 +1,157 @@
import aiohttp
import asyncio
import os
import sys
import requests
import io
from bs4 import BeautifulSoup
from PyPDF2 import PdfReader
import tiktoken
from pipecat.frames.frames import LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.llm_response import (
LLMAssistantResponseAggregator, LLMUserResponseAggregator)
from pipecat.services.cartesia import CartesiaTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
from pipecat.vad.silero import SileroVADAnalyzer
from runner import configure
from loguru import logger
from dotenv import load_dotenv
load_dotenv(override=True)
from openai import OpenAI
client = OpenAI()
# Run this script directly from your command line.
# This project was adapted from https://github.com/pipecat-ai/pipecat/blob/main/examples/foundational/07d-interruptible-cartesia.py
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
# Count number of tokens used in model and truncate the content
def truncate_content(content, model_name):
encoding = tiktoken.encoding_for_model(model_name)
tokens = encoding.encode(content)
max_tokens = 10000
if len(tokens) > max_tokens:
truncated_tokens = tokens[:max_tokens]
return encoding.decode(truncated_tokens)
return content
# Main function to extract content from url
def get_article_content(url):
if 'arxiv.org' in url:
return get_arxiv_content(url)
else:
return get_wikipedia_content(url)
# Helper function to extract content from Wikipedia url (this is technically agnostic to URL type but will work best with Wikipedia articles)
def get_wikipedia_content(url):
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
content = soup.find('div', {'class': 'mw-parser-output'})
if content:
return content.get_text()
else:
return "Failed to extract Wikipedia article content."
# Helper function to extract content from arXiv url
def get_arxiv_content(url):
if '/abs/' in url:
url = url.replace('/abs/', '/pdf/')
if not url.endswith('.pdf'):
url += '.pdf'
response = requests.get(url)
if response.status_code == 200:
pdf_file = io.BytesIO(response.content)
pdf_reader = PdfReader(pdf_file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
return text
else:
return "Failed to download arXiv PDF."
# This is the main function that handles STT -> LLM -> TTS
async def main():
url = input("Enter the URL of the article you would like to talk about: ")
article_content = get_article_content(url)
article_content = truncate_content(article_content, model_name="gpt-4o-mini")
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
"studypal",
DailyParams(
audio_out_sample_rate=44100,
audio_out_enabled=True,
transcription_enabled=True,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer()
)
)
tts = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_id="4d2fd738-3b3d-4368-957a-bb4805275bd9", # British Narration Lady: 4d2fd738-3b3d-4368-957a-bb4805275bd9
sample_rate=44100,
)
llm = OpenAILLMService(
api_key=os.getenv("OPENAI_API_KEY"),
model="gpt-4o-mini")
messages = [
{
"role": "system",
"content": f"""You are an AI study partner. You have been given the following article content:
{article_content}
Your task is to help the user understand and learn from this article in 2 sentences. THESE RESPONSES SHOULD BE ONLY MAX 2 SENTENCES. THIS INSTRUCTION IS VERY IMPORTANT. RESPONSES SHOULDN'T BE LONG.
""",
},
]
tma_in = LLMUserResponseAggregator(messages)
tma_out = LLMAssistantResponseAggregator(messages)
pipeline = Pipeline([
transport.input(),
tma_in,
llm,
tts,
tma_out,
transport.output(),
])
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True, enable_metrics=True))
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
transport.capture_participant_transcription(participant["id"])
messages.append(
{"role": "system", "content": "Hello! I'm ready to discuss the article with you. What would you like to learn about?"})
await task.queue_frames([LLMMessagesFrame(messages)])
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())