From 980265ca97a0866778da216d600dd8df42e37dfb Mon Sep 17 00:00:00 2001 From: Yash Narayan Date: Mon, 19 Aug 2024 16:58:29 -0700 Subject: [PATCH] Add studypal --- examples/studypal/.env.example | 5 + examples/studypal/README.md | 12 +++ examples/studypal/requirements.txt | 16 +++ examples/studypal/runner.py | 61 +++++++++++ examples/studypal/studypal.py | 157 +++++++++++++++++++++++++++++ 5 files changed, 251 insertions(+) create mode 100644 examples/studypal/.env.example create mode 100644 examples/studypal/README.md create mode 100644 examples/studypal/requirements.txt create mode 100644 examples/studypal/runner.py create mode 100644 examples/studypal/studypal.py diff --git a/examples/studypal/.env.example b/examples/studypal/.env.example new file mode 100644 index 000000000..69245a3d6 --- /dev/null +++ b/examples/studypal/.env.example @@ -0,0 +1,5 @@ +DAILY_SAMPLE_ROOM_URL= # Follow instructions here and put your https://YOURDOMAIN.daily.co/YOURROOM (Instructions: https://docs.pipecat.ai/quickstart#preparing-your-environment) +DAILY_API_KEY= # Create here: https://dashboard.daily.co/developers +OPENAI_API_KEY= # Create here: https://platform.openai.com/docs/overview +CARTESIA_API_KEY= # Create here: https://play.cartesia.ai/console +CARTESIA_VOICE_ID= # Find here: https://play.cartesia.ai/ \ No newline at end of file diff --git a/examples/studypal/README.md b/examples/studypal/README.md new file mode 100644 index 000000000..3e06ce190 --- /dev/null +++ b/examples/studypal/README.md @@ -0,0 +1,12 @@ +# studypal +### Have a conversation about any article on the web + +studypal is a fast conversational ai built using [Daily](https://www.daily.co/) for real-time media transport and [Cartesia](https://cartesia.ai) for text-to-speech. Everything is orchestrated together (VAD -> STT -> LLM -> TTS) using [Pipecat](https://www.pipecat.ai/). + +## Setup + +1. Clone the repository +2. Copy `.env.example` to a `.env` file and add API keys +3. Install the required packages: `pip install -r requirements.txt` +4. Run `python3 studypal.py` from your command line. +5. While the app is running, go to the `https://.daily.co/` set in `DAILY_SAMPLE_ROOM_URL` and talk to studypal! diff --git a/examples/studypal/requirements.txt b/examples/studypal/requirements.txt new file mode 100644 index 000000000..b9c8f42d8 --- /dev/null +++ b/examples/studypal/requirements.txt @@ -0,0 +1,16 @@ +aiohttp==3.9.5 +beautifulsoup4==4.12.2 +PyPDF2==3.0.1 +tiktoken==0.7.0 +pipecat==0.3.0 +pipecat-ai==0.0.39 +python-dotenv==1.0.1 +loguru==0.7.2 +requests==2.32.3 +pydantic==2.8.2 +httpx==0.27.0 +openai==1.27.0 +websockets==12.0 +daily-python==0.10.1 +torch==2.2.2 +torchaudio==2.2.2 \ No newline at end of file diff --git a/examples/studypal/runner.py b/examples/studypal/runner.py new file mode 100644 index 000000000..949e46b59 --- /dev/null +++ b/examples/studypal/runner.py @@ -0,0 +1,61 @@ +# +# Copyright (c) 2024, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import aiohttp +import argparse +import os + +from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper + + +async def configure(aiohttp_session: aiohttp.ClientSession): + (url, token, _) = await configure_with_args(aiohttp_session) + return (url, token) + + +async def configure_with_args( + aiohttp_session: aiohttp.ClientSession, + parser: argparse.ArgumentParser | None = None): + if not parser: + parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample") + parser.add_argument( + "-u", + "--url", + type=str, + required=False, + help="URL of the Daily room to join") + parser.add_argument( + "-k", + "--apikey", + type=str, + required=False, + help="Daily API Key (needed to create an owner token for the room)", + ) + + args, unknown = parser.parse_known_args() + + url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL") + key = args.apikey or os.getenv("DAILY_API_KEY") + + if not url: + raise Exception( + "No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL.") + + if not key: + raise Exception("No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.") + + daily_rest_helper = DailyRESTHelper( + daily_api_key=key, + daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"), + ) + + # Create a meeting token for the given room with an expiration 1 hour in + # the future. + expiry_time: float = 60 * 60 + + token = daily_rest_helper.get_token(url, expiry_time) + + return (url, token, args) \ No newline at end of file diff --git a/examples/studypal/studypal.py b/examples/studypal/studypal.py new file mode 100644 index 000000000..78c4a2654 --- /dev/null +++ b/examples/studypal/studypal.py @@ -0,0 +1,157 @@ +import aiohttp +import asyncio +import os +import sys +import requests +import io +from bs4 import BeautifulSoup +from PyPDF2 import PdfReader +import tiktoken + +from pipecat.frames.frames import LLMMessagesFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_response import ( + LLMAssistantResponseAggregator, LLMUserResponseAggregator) +from pipecat.services.cartesia import CartesiaTTSService +from pipecat.services.openai import OpenAILLMService +from pipecat.transports.services.daily import DailyParams, DailyTransport +from pipecat.vad.silero import SileroVADAnalyzer + +from runner import configure + +from loguru import logger + +from dotenv import load_dotenv +load_dotenv(override=True) + +from openai import OpenAI +client = OpenAI() + +# Run this script directly from your command line. +# This project was adapted from https://github.com/pipecat-ai/pipecat/blob/main/examples/foundational/07d-interruptible-cartesia.py + +logger.remove(0) +logger.add(sys.stderr, level="DEBUG") + + +# Count number of tokens used in model and truncate the content +def truncate_content(content, model_name): + encoding = tiktoken.encoding_for_model(model_name) + tokens = encoding.encode(content) + + max_tokens = 10000 + if len(tokens) > max_tokens: + truncated_tokens = tokens[:max_tokens] + return encoding.decode(truncated_tokens) + return content + +# Main function to extract content from url +def get_article_content(url): + if 'arxiv.org' in url: + return get_arxiv_content(url) + else: + return get_wikipedia_content(url) + +# Helper function to extract content from Wikipedia url (this is technically agnostic to URL type but will work best with Wikipedia articles) +def get_wikipedia_content(url): + response = requests.get(url) + soup = BeautifulSoup(response.content, 'html.parser') + + content = soup.find('div', {'class': 'mw-parser-output'}) + + if content: + return content.get_text() + else: + return "Failed to extract Wikipedia article content." + +# Helper function to extract content from arXiv url +def get_arxiv_content(url): + if '/abs/' in url: + url = url.replace('/abs/', '/pdf/') + if not url.endswith('.pdf'): + url += '.pdf' + + response = requests.get(url) + if response.status_code == 200: + pdf_file = io.BytesIO(response.content) + pdf_reader = PdfReader(pdf_file) + text = "" + for page in pdf_reader.pages: + text += page.extract_text() + return text + else: + return "Failed to download arXiv PDF." + +# This is the main function that handles STT -> LLM -> TTS +async def main(): + url = input("Enter the URL of the article you would like to talk about: ") + article_content = get_article_content(url) + article_content = truncate_content(article_content, model_name="gpt-4o-mini") + + async with aiohttp.ClientSession() as session: + (room_url, token) = await configure(session) + + transport = DailyTransport( + room_url, + token, + "studypal", + DailyParams( + audio_out_sample_rate=44100, + audio_out_enabled=True, + transcription_enabled=True, + vad_enabled=True, + vad_analyzer=SileroVADAnalyzer() + ) + ) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="4d2fd738-3b3d-4368-957a-bb4805275bd9", # British Narration Lady: 4d2fd738-3b3d-4368-957a-bb4805275bd9 + sample_rate=44100, + ) + + llm = OpenAILLMService( + api_key=os.getenv("OPENAI_API_KEY"), + model="gpt-4o-mini") + + messages = [ + { + "role": "system", + "content": f"""You are an AI study partner. You have been given the following article content: + +{article_content} + +Your task is to help the user understand and learn from this article in 2 sentences. THESE RESPONSES SHOULD BE ONLY MAX 2 SENTENCES. THIS INSTRUCTION IS VERY IMPORTANT. RESPONSES SHOULDN'T BE LONG. +""", + }, + ] + + tma_in = LLMUserResponseAggregator(messages) + tma_out = LLMAssistantResponseAggregator(messages) + + pipeline = Pipeline([ + transport.input(), + tma_in, + llm, + tts, + tma_out, + transport.output(), + ]) + + task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True, enable_metrics=True)) + + @transport.event_handler("on_first_participant_joined") + async def on_first_participant_joined(transport, participant): + transport.capture_participant_transcription(participant["id"]) + messages.append( + {"role": "system", "content": "Hello! I'm ready to discuss the article with you. What would you like to learn about?"}) + await task.queue_frames([LLMMessagesFrame(messages)]) + + runner = PipelineRunner() + + await runner.run(task) + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file