Add studypal

2024-08-19 16:58:29 -07:00
parent 90479fff95
commit 980265ca97
5 changed files with 251 additions and 0 deletions
--- a/examples/studypal/.env.example
+++ b/examples/studypal/.env.example
@@ -0,0 +1,5 @@
+DAILY_SAMPLE_ROOM_URL= # Follow instructions here and put your https://YOURDOMAIN.daily.co/YOURROOM (Instructions: https://docs.pipecat.ai/quickstart#preparing-your-environment)
+DAILY_API_KEY= # Create here: https://dashboard.daily.co/developers 
+OPENAI_API_KEY= # Create here: https://platform.openai.com/docs/overview
+CARTESIA_API_KEY= # Create here: https://play.cartesia.ai/console
+CARTESIA_VOICE_ID= # Find here: https://play.cartesia.ai/  
--- a/examples/studypal/README.md
+++ b/examples/studypal/README.md
@@ -0,0 +1,12 @@
+# studypal
+### Have a conversation about any article on the web
+
+studypal is a fast conversational ai built using [Daily](https://www.daily.co/) for real-time media transport and [Cartesia](https://cartesia.ai) for text-to-speech. Everything is orchestrated together (VAD -> STT -> LLM -> TTS) using [Pipecat](https://www.pipecat.ai/). 
+
+## Setup
+
+1. Clone the repository
+2. Copy `.env.example` to a `.env` file and add API keys 
+3. Install the required packages: `pip install -r requirements.txt` 
+4. Run `python3 studypal.py` from your command line. 
+5. While the app is running, go to the `https://<yourdomain>.daily.co/<room_url>` set in `DAILY_SAMPLE_ROOM_URL` and talk to studypal!
--- a/examples/studypal/requirements.txt
+++ b/examples/studypal/requirements.txt
@@ -0,0 +1,16 @@
+aiohttp==3.9.5
+beautifulsoup4==4.12.2
+PyPDF2==3.0.1
+tiktoken==0.7.0
+pipecat==0.3.0
+pipecat-ai==0.0.39
+python-dotenv==1.0.1
+loguru==0.7.2
+requests==2.32.3
+pydantic==2.8.2
+httpx==0.27.0
+openai==1.27.0
+websockets==12.0
+daily-python==0.10.1
+torch==2.2.2
+torchaudio==2.2.2
--- a/examples/studypal/runner.py
+++ b/examples/studypal/runner.py
@@ -0,0 +1,61 @@
+#
+# Copyright (c) 2024, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import aiohttp
+import argparse
+import os
+
+from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
+
+
+async def configure(aiohttp_session: aiohttp.ClientSession):
+    (url, token, _) = await configure_with_args(aiohttp_session)
+    return (url, token)
+
+
+async def configure_with_args(
+        aiohttp_session: aiohttp.ClientSession,
+        parser: argparse.ArgumentParser | None = None):
+    if not parser:
+        parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
+    parser.add_argument(
+        "-u",
+        "--url",
+        type=str,
+        required=False,
+        help="URL of the Daily room to join")
+    parser.add_argument(
+        "-k",
+        "--apikey",
+        type=str,
+        required=False,
+        help="Daily API Key (needed to create an owner token for the room)",
+    )
+
+    args, unknown = parser.parse_known_args()
+
+    url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL")
+    key = args.apikey or os.getenv("DAILY_API_KEY")
+
+    if not url:
+        raise Exception(
+            "No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL.")
+
+    if not key:
+        raise Exception("No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
+
+    daily_rest_helper = DailyRESTHelper(
+        daily_api_key=key,
+        daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
+    )
+
+    # Create a meeting token for the given room with an expiration 1 hour in
+    # the future.
+    expiry_time: float = 60 * 60
+
+    token = daily_rest_helper.get_token(url, expiry_time)
+
+    return (url, token, args)
--- a/examples/studypal/studypal.py
+++ b/examples/studypal/studypal.py
@@ -0,0 +1,157 @@
+import aiohttp
+import asyncio
+import os
+import sys
+import requests
+import io
+from bs4 import BeautifulSoup
+from PyPDF2 import PdfReader
+import tiktoken
+
+from pipecat.frames.frames import LLMMessagesFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.processors.aggregators.llm_response import (
+    LLMAssistantResponseAggregator, LLMUserResponseAggregator)
+from pipecat.services.cartesia import CartesiaTTSService
+from pipecat.services.openai import OpenAILLMService
+from pipecat.transports.services.daily import DailyParams, DailyTransport
+from pipecat.vad.silero import SileroVADAnalyzer
+
+from runner import configure
+
+from loguru import logger
+
+from dotenv import load_dotenv
+load_dotenv(override=True)
+
+from openai import OpenAI
+client = OpenAI()
+
+# Run this script directly from your command line. 
+# This project was adapted from https://github.com/pipecat-ai/pipecat/blob/main/examples/foundational/07d-interruptible-cartesia.py
+
+logger.remove(0)
+logger.add(sys.stderr, level="DEBUG")
+
+
+# Count number of tokens used in model and truncate the content 
+def truncate_content(content, model_name):
+    encoding = tiktoken.encoding_for_model(model_name)
+    tokens = encoding.encode(content)
+
+    max_tokens = 10000
+    if len(tokens) > max_tokens:
+        truncated_tokens = tokens[:max_tokens]
+        return encoding.decode(truncated_tokens)
+    return content
+
+# Main function to extract content from url 
+def get_article_content(url):
+    if 'arxiv.org' in url:
+        return get_arxiv_content(url)
+    else:
+        return get_wikipedia_content(url)
+
+# Helper function to extract content from Wikipedia url (this is technically agnostic to URL type but will work best with Wikipedia articles)
+def get_wikipedia_content(url):
+    response = requests.get(url)
+    soup = BeautifulSoup(response.content, 'html.parser')
+    
+    content = soup.find('div', {'class': 'mw-parser-output'})
+    
+    if content:
+        return content.get_text()
+    else:
+        return "Failed to extract Wikipedia article content."
+
+# Helper function to extract content from arXiv url 
+def get_arxiv_content(url):
+    if '/abs/' in url:
+        url = url.replace('/abs/', '/pdf/')
+    if not url.endswith('.pdf'):
+        url += '.pdf'
+
+    response = requests.get(url)
+    if response.status_code == 200:
+        pdf_file = io.BytesIO(response.content)
+        pdf_reader = PdfReader(pdf_file)
+        text = ""
+        for page in pdf_reader.pages:
+            text += page.extract_text()
+        return text
+    else:
+        return "Failed to download arXiv PDF."
+
+# This is the main function that handles STT -> LLM -> TTS 
+async def main():
+    url = input("Enter the URL of the article you would like to talk about: ")
+    article_content = get_article_content(url)
+    article_content = truncate_content(article_content, model_name="gpt-4o-mini")
+
+    async with aiohttp.ClientSession() as session:
+        (room_url, token) = await configure(session)
+
+        transport = DailyTransport(
+            room_url,
+            token,
+            "studypal",
+            DailyParams(
+                audio_out_sample_rate=44100,
+                audio_out_enabled=True,
+                transcription_enabled=True,
+                vad_enabled=True,
+                vad_analyzer=SileroVADAnalyzer()
+            )
+        )
+
+        tts = CartesiaTTSService(
+            api_key=os.getenv("CARTESIA_API_KEY"),
+            voice_id="4d2fd738-3b3d-4368-957a-bb4805275bd9",  # British Narration Lady: 4d2fd738-3b3d-4368-957a-bb4805275bd9
+            sample_rate=44100, 
+        )
+
+        llm = OpenAILLMService(
+            api_key=os.getenv("OPENAI_API_KEY"),
+            model="gpt-4o-mini")
+
+        messages = [
+            {
+                "role": "system",
+                "content": f"""You are an AI study partner. You have been given the following article content:
+
+{article_content}
+
+Your task is to help the user understand and learn from this article in 2 sentences. THESE RESPONSES SHOULD BE ONLY MAX 2 SENTENCES. THIS INSTRUCTION IS VERY IMPORTANT. RESPONSES SHOULDN'T BE LONG.
+""",
+            },
+        ]
+
+        tma_in = LLMUserResponseAggregator(messages)
+        tma_out = LLMAssistantResponseAggregator(messages)
+
+        pipeline = Pipeline([
+            transport.input(),
+            tma_in,
+            llm,
+            tts,
+            tma_out,
+            transport.output(),
+        ])
+
+        task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True, enable_metrics=True))
+
+        @transport.event_handler("on_first_participant_joined")
+        async def on_first_participant_joined(transport, participant):
+            transport.capture_participant_transcription(participant["id"])
+            messages.append(
+                {"role": "system", "content": "Hello! I'm ready to discuss the article with you. What would you like to learn about?"})
+            await task.queue_frames([LLMMessagesFrame(messages)])
+
+        runner = PipelineRunner()
+
+        await runner.run(task)
+
+if __name__ == "__main__":
+    asyncio.run(main())