pipecat cloud

more cleanup
added api route
2025-03-05 21:30:42 +00:00 · 2025-02-06 01:31:05 +00:00 · 2025-02-06 01:20:35 +00:00 · 2025-02-05 20:33:06 +00:00 · 2025-02-05 16:06:27 +00:00
12 changed files with 2249 additions and 217 deletions
--- a/examples/foundational/14e-function-calling-gemini.py
+++ b/examples/foundational/14e-function-calling-gemini.py
@@ -63,7 +63,7 @@ async def main():
        )

        llm = GoogleLLMService(
-            model="gemini-1.5-flash-latest",
+            model="gemini-2.0-flash-exp",
            # model="gemini-exp-1114",
            api_key=os.getenv("GOOGLE_API_KEY"),
        )
--- a/examples/storytelling-chatbot/Dockerfile
+++ b/examples/storytelling-chatbot/Dockerfile
@@ -1,54 +1,11 @@
-FROM python:3.11-slim-bookworm
+FROM pipecatai/cloud-base:latest

-ARG DEBIAN_FRONTEND=noninteractive
-ARG USE_PERSISTENT_DATA
-ENV PYTHONUNBUFFERED=1
-ENV NODE_MAJOR=20
-
-# Expose FastAPI port
-ENV FAST_API_PORT=7860
-EXPOSE 7860
-
-# Install system dependencies
-RUN apt-get update && apt-get install --no-install-recommends -y \
-    build-essential \
-    git \
-    ffmpeg \
-    google-perftools \
-    ca-certificates curl gnupg \
-    && apt-get clean && rm -rf /var/lib/apt/lists/*
-
-# Install Node.js
-RUN mkdir -p /etc/apt/keyrings 
-RUN curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg
-RUN echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_${NODE_MAJOR}.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list > /dev/null
-RUN apt-get update && apt-get install nodejs -y
-
-# Set up a new user named "user" with user ID 1000
-RUN useradd -m -u 1000 user
-
-# Set home to the user's home directory
-ENV HOME=/home/user \
-    PATH=/home/user/.local/bin:$PATH \
-    PYTHONPATH=$HOME/app \
-    PYTHONUNBUFFERED=1
-
-# Switch to the "user" user
-USER user
-
-# Set the working directory to the user's home directory
-WORKDIR $HOME/app
-
-# Install Python dependencies
 COPY ./requirements.txt requirements.txt
-RUN pip3 install --no-cache-dir --upgrade -r requirements.txt

-# Copy everything else
-COPY --chown=user ./src/ src/
+RUN pip install --no-cache-dir --upgrade -r requirements.txt

-# Copy frontend app and build
-COPY --chown=user ./frontend/ frontend/
-RUN cd frontend && npm install && npm run build
-
-# Start the FastAPI server
-CMD python3 src/bot_runner.py --port ${FAST_API_PORT}
+COPY ./src/bot.py bot.py
+COPY ./src/processors.py processors.py
+COPY ./src/prompts.py prompts.py
+COPY ./src/assets assets
+COPY ./src/utils utils
--- a/examples/storytelling-chatbot/frontend/app/api/route.ts
+++ b/examples/storytelling-chatbot/frontend/app/api/route.ts
@@ -0,0 +1,27 @@
+// [POST] /api
+
+export async function POST(request: Request) {
+  const params = await request.json();
+    console.log("in POST, params is ", params)
+    const url = process.env.BOT_START_URL || "http://localhost:7860"
+  const req = await fetch(url, {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      Authorization: `Bearer ${process.env.PCC_API_KEY}`,
+    },
+    body: JSON.stringify(params),
+  });
+
+  const res = await req.json();
+
+  if (req.status !== 200) {
+    return Response.json(res, { status: req.status });
+  }
+  console.log({res});
+  return Response.json(res);
+}
+
+export async function GET(request: Request) {
+    return Response.json({message: "Hello World"});
+}
--- a/examples/storytelling-chatbot/frontend/components/App.tsx
+++ b/examples/storytelling-chatbot/frontend/components/App.tsx
@@ -27,22 +27,26 @@ export default function Call() {

    // Create a new room for the story session
    try {
-      const response = await fetch("/", {
+      console.log("POSTing to /api")
+      const response = await fetch("/api", {
        method: "POST",
        headers: {
          "Content-Type": "application/json",
        },
+        body: JSON.stringify({
+          "createDailyRoom": true
+        })
      });

-      const { room_url, token } = await response.json();
-
+      const {dailyRoom, dailyToken} = await response.json();
+      console.log({dailyRoom, dailyToken})
      // Keep a reference to the room url for later
-      setRoom(room_url);
+      setRoom(dailyRoom);

      // Join the WebRTC session
      await daily.join({
-        url: room_url,
-        token,
+        url: dailyRoom,
+        token: dailyToken,
        videoSource: false,
        startAudioOff: true,
      });
@@ -54,6 +58,7 @@ export default function Call() {

      setState("started");
    } catch (error) {
+      console.log("caught error:", error)
      setState("error");
    }
  }
--- a/examples/storytelling-chatbot/frontend/example.env.local
+++ b/examples/storytelling-chatbot/frontend/example.env.local
@@ -0,0 +1,3 @@
+SITE_URL=
+PCC_API_KEY=
+BOT_START_URL=
--- a/examples/storytelling-chatbot/frontend/next.config.mjs
+++ b/examples/storytelling-chatbot/frontend/next.config.mjs
@@ -1,15 +0,0 @@
-/** @type {import('next').NextConfig} */
-const nextConfig = {
-  output: "export",
-
-  async rewrites() {
-    return [
-      {
-        source: "/:path*",
-        destination: "http://localhost:7860/:path*",
-      },
-    ];
-  },
-};
-
-export default nextConfig;
--- a/examples/storytelling-chatbot/frontend/package-lock.json
+++ b/examples/storytelling-chatbot/frontend/package-lock.json
--- a/examples/storytelling-chatbot/frontend/package.json
+++ b/examples/storytelling-chatbot/frontend/package.json
@@ -9,7 +9,7 @@
    "lint": "next lint"
  },
  "dependencies": {
-    "@daily-co/daily-js": "^0.62.0",
+    "@daily-co/daily-js": "^0.74.0",
    "@daily-co/daily-react": "^0.18.0",
    "@radix-ui/react-select": "^2.1.2",
    "@radix-ui/react-slot": "^1.0.2",
@@ -33,6 +33,7 @@
    "eslint-config-next": "14.1.4",
    "postcss": "^8.4.47",
    "tailwindcss": "^3.4.13",
-    "typescript": "^5.6.2"
+    "typescript": "^5.6.2",
+    "vercel": "^41.0.1"
  }
 }
--- a/examples/storytelling-chatbot/requirements.txt
+++ b/examples/storytelling-chatbot/requirements.txt
@@ -2,5 +2,5 @@ async_timeout
 fastapi
 uvicorn
 python-dotenv
-e "../..[daily,silero,openai,fal,cartesia,google]"
-e "../../../python-genai"
+pipecat-ai[daily,silero,openai,fal,cartesia,google]~=0.0.55
+pipecatcloud @ git+https://github.com/daily-co/pipecat-cloud@main
--- a/examples/storytelling-chatbot/src/bot.py
+++ b/examples/storytelling-chatbot/src/bot.py
@@ -12,19 +12,22 @@ import sys
 import aiohttp
 from dotenv import load_dotenv
 from loguru import logger
-from processors import StoryImageProcessor, StoryProcessor
+from processors import StoryBreakReinsertProcessor, StoryImageProcessor, StoryProcessor
 from prompts import CUE_USER_TURN, LLM_BASE_PROMPT
 from utils.helpers import load_images, load_sounds

 from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.frames.frames import EndFrame
+from pipecat.pipeline.parallel_pipeline import ParallelPipeline
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.sync_parallel_pipeline import SyncParallelPipeline
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
-from pipecat.services.elevenlabs import ElevenLabsTTSService
+from pipecat.processors.logger import FrameLogger
+from pipecat.services.elevenlabs import ElevenLabsHttpTTSService, ElevenLabsTTSService
 from pipecat.services.fal import FalImageGenService
-from pipecat.services.google import GoogleLLMService
+from pipecat.services.google import GoogleImageGenService, GoogleLLMService
 from pipecat.transports.services.daily import (
    DailyParams,
    DailyTransport,
@@ -63,13 +66,20 @@ async def main(room_url, token=None):

        # -------------- Services --------------- #

-        llm_service = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"))
-
-        tts_service = ElevenLabsTTSService(
-            api_key=os.getenv("ELEVENLABS_API_KEY"), voice_id=os.getenv("ELEVENLABS_VOICE_ID")
+        llm_service = GoogleLLMService(
+            api_key=os.getenv("GOOGLE_API_KEY"),
+            model="gemini-2.0-flash-exp",
        )

-        image_gen = GoogleImageGenService(api_key=os.getenv("GOOGLE_API_KEY"))
+        tts_service = ElevenLabsHttpTTSService(
+            aiohttp_session=session,
+            api_key=os.getenv("ELEVENLABS_API_KEY"),
+            voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
+        )
+
+        image_gen = GoogleImageGenService(
+            api_key=os.getenv("GOOGLE_API_KEY"),  # model="imagen-3.0-fast-generate-001"
+        )

        # --------------- Setup ----------------- #

@@ -99,13 +109,14 @@ async def main(room_url, token=None):
                image_processor,
                tts_service,
                transport.output(),
+                StoryBreakReinsertProcessor(),
                context_aggregator.assistant(),
            ]
        )

        main_task = PipelineTask(
-            main_pipeline,
-            PipelineParams(
+            pipeline=main_pipeline,
+            params=PipelineParams(
                allow_interruptions=True,
                enable_metrics=True,
                enable_usage_metrics=True,
@@ -121,7 +132,6 @@ async def main(room_url, token=None):
                    images["book1"],
                    context_aggregator.user().get_context_frame(),
                    DailyTransportMessageFrame(CUE_USER_TURN),
-                    # sounds["listening"],
                    images["book2"],
                ]
            )
@@ -140,6 +150,10 @@ async def main(room_url, token=None):
        await runner.run(main_task)


+async def bot(data, daily_room, daily_token):
+    await main(daily_room, daily_token)
+
+
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Daily Storyteller Bot")
    parser.add_argument("-u", type=str, help="Room URL")
--- a/examples/storytelling-chatbot/src/bot_runner.py
+++ b/examples/storytelling-chatbot/src/bot_runner.py
@@ -127,8 +127,8 @@ async def start_bot(request: Request) -> JSONResponse:

    return JSONResponse(
        {
-            "room_url": room.url,
-            "token": user_token,
+            "dailyRoom": room.url,
+            "dailyToken": user_token,
        }
    )

--- a/examples/storytelling-chatbot/src/processors.py
+++ b/examples/storytelling-chatbot/src/processors.py
@@ -44,6 +44,15 @@ class StoryPromptFrame(TextFrame):
    pass


+class StoryBreakFrame(Frame):
+    """Frame for storing story text that needs a [break] tag reinserted.
+    Does not inherit from TextFrame to avoid TTS processing.
+    """
+
+    def __init__(self):
+        super().__init__()
+
+
 # ------------ Frame Processors ----------- #


@@ -62,7 +71,10 @@ class StoryImageProcessor(FrameProcessor):
        super().__init__()
        self._image_gen_service = image_gen_service
        # Create a new LLM service to use a different system prompt, etc
-        self._llm_service = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"))
+        self._llm_service = GoogleLLMService(
+            api_key=os.getenv("GOOGLE_API_KEY"),
+            model="gemini-2.0-flash-exp",
+        )

        self.pages = []
        self.image_descriptions = []
@@ -188,8 +200,25 @@ class StoryProcessor(FrameProcessor):
                if len(before_break) > 2:
                    self._story.append(before_break)
                    await self.push_frame(StoryPageFrame(before_break))
-                    # await self.push_frame(sounds["ding"])
+                    await self.push_frame(StoryBreakFrame())
                    await self.push_frame(DailyTransportMessageFrame(CUE_ASSISTANT_TURN))

                # Keep the remainder (if any) in the buffer
                self._text = parts[1].strip() if len(parts) > 1 else ""
+
+
+class StoryBreakReinsertProcessor(FrameProcessor):
+    """Re-inserts [break] tags into story text before it reaches the assistant context aggregator.
+
+    This processor looks for StoryBreakFrames (which aren't processed by TTS) and creates
+    TextFrames with [break] tags for the context aggregator.
+    """
+
+    async def process_frame(self, frame: Frame, direction: FrameDirection):
+        await super().process_frame(frame, direction)
+
+        if isinstance(frame, StoryBreakFrame):
+            # Create a new TextFrame with [break] tag
+            await self.push_frame(TextFrame(" [break]"))
+        else:
+            await self.push_frame(frame)
Author	SHA1	Message	Date
Chad Bailey	06beec1826	pipecat cloud	2025-03-05 21:30:42 +00:00
Chad Bailey	38c62a7db3	more cleanup	2025-02-06 01:31:05 +00:00
Chad Bailey	fb0a1548ab	added api route	2025-02-06 01:20:35 +00:00
Chad Bailey	5f9e24791e	cleanup	2025-02-05 20:33:06 +00:00
Chad Bailey	9e64724618	works, but still needs a parallel pipeline	2025-02-05 16:06:27 +00:00