Compare commits

...

5 Commits

Author SHA1 Message Date
Chad Bailey
06beec1826 pipecat cloud 2025-03-05 21:30:42 +00:00
Chad Bailey
38c62a7db3 more cleanup 2025-02-06 01:31:05 +00:00
Chad Bailey
fb0a1548ab added api route 2025-02-06 01:20:35 +00:00
Chad Bailey
5f9e24791e cleanup 2025-02-05 20:33:06 +00:00
Chad Bailey
9e64724618 works, but still needs a parallel pipeline 2025-02-05 16:06:27 +00:00
12 changed files with 2249 additions and 217 deletions

View File

@@ -63,7 +63,7 @@ async def main():
)
llm = GoogleLLMService(
model="gemini-1.5-flash-latest",
model="gemini-2.0-flash-exp",
# model="gemini-exp-1114",
api_key=os.getenv("GOOGLE_API_KEY"),
)

View File

@@ -1,54 +1,11 @@
FROM python:3.11-slim-bookworm
FROM pipecatai/cloud-base:latest
ARG DEBIAN_FRONTEND=noninteractive
ARG USE_PERSISTENT_DATA
ENV PYTHONUNBUFFERED=1
ENV NODE_MAJOR=20
# Expose FastAPI port
ENV FAST_API_PORT=7860
EXPOSE 7860
# Install system dependencies
RUN apt-get update && apt-get install --no-install-recommends -y \
build-essential \
git \
ffmpeg \
google-perftools \
ca-certificates curl gnupg \
&& apt-get clean && rm -rf /var/lib/apt/lists/*
# Install Node.js
RUN mkdir -p /etc/apt/keyrings
RUN curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg
RUN echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_${NODE_MAJOR}.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list > /dev/null
RUN apt-get update && apt-get install nodejs -y
# Set up a new user named "user" with user ID 1000
RUN useradd -m -u 1000 user
# Set home to the user's home directory
ENV HOME=/home/user \
PATH=/home/user/.local/bin:$PATH \
PYTHONPATH=$HOME/app \
PYTHONUNBUFFERED=1
# Switch to the "user" user
USER user
# Set the working directory to the user's home directory
WORKDIR $HOME/app
# Install Python dependencies
COPY ./requirements.txt requirements.txt
RUN pip3 install --no-cache-dir --upgrade -r requirements.txt
# Copy everything else
COPY --chown=user ./src/ src/
RUN pip install --no-cache-dir --upgrade -r requirements.txt
# Copy frontend app and build
COPY --chown=user ./frontend/ frontend/
RUN cd frontend && npm install && npm run build
# Start the FastAPI server
CMD python3 src/bot_runner.py --port ${FAST_API_PORT}
COPY ./src/bot.py bot.py
COPY ./src/processors.py processors.py
COPY ./src/prompts.py prompts.py
COPY ./src/assets assets
COPY ./src/utils utils

View File

@@ -0,0 +1,27 @@
// [POST] /api
export async function POST(request: Request) {
const params = await request.json();
console.log("in POST, params is ", params)
const url = process.env.BOT_START_URL || "http://localhost:7860"
const req = await fetch(url, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${process.env.PCC_API_KEY}`,
},
body: JSON.stringify(params),
});
const res = await req.json();
if (req.status !== 200) {
return Response.json(res, { status: req.status });
}
console.log({res});
return Response.json(res);
}
export async function GET(request: Request) {
return Response.json({message: "Hello World"});
}

View File

@@ -27,22 +27,26 @@ export default function Call() {
// Create a new room for the story session
try {
const response = await fetch("/", {
console.log("POSTing to /api")
const response = await fetch("/api", {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
"createDailyRoom": true
})
});
const { room_url, token } = await response.json();
const {dailyRoom, dailyToken} = await response.json();
console.log({dailyRoom, dailyToken})
// Keep a reference to the room url for later
setRoom(room_url);
setRoom(dailyRoom);
// Join the WebRTC session
await daily.join({
url: room_url,
token,
url: dailyRoom,
token: dailyToken,
videoSource: false,
startAudioOff: true,
});
@@ -54,6 +58,7 @@ export default function Call() {
setState("started");
} catch (error) {
console.log("caught error:", error)
setState("error");
}
}

View File

@@ -0,0 +1,3 @@
SITE_URL=
PCC_API_KEY=
BOT_START_URL=

View File

@@ -1,15 +0,0 @@
/** @type {import('next').NextConfig} */
const nextConfig = {
output: "export",
async rewrites() {
return [
{
source: "/:path*",
destination: "http://localhost:7860/:path*",
},
];
},
};
export default nextConfig;

File diff suppressed because it is too large Load Diff

View File

@@ -9,7 +9,7 @@
"lint": "next lint"
},
"dependencies": {
"@daily-co/daily-js": "^0.62.0",
"@daily-co/daily-js": "^0.74.0",
"@daily-co/daily-react": "^0.18.0",
"@radix-ui/react-select": "^2.1.2",
"@radix-ui/react-slot": "^1.0.2",
@@ -33,6 +33,7 @@
"eslint-config-next": "14.1.4",
"postcss": "^8.4.47",
"tailwindcss": "^3.4.13",
"typescript": "^5.6.2"
"typescript": "^5.6.2",
"vercel": "^41.0.1"
}
}

View File

@@ -2,5 +2,5 @@ async_timeout
fastapi
uvicorn
python-dotenv
-e "../..[daily,silero,openai,fal,cartesia,google]"
-e "../../../python-genai"
pipecat-ai[daily,silero,openai,fal,cartesia,google]~=0.0.55
pipecatcloud @ git+https://github.com/daily-co/pipecat-cloud@main

View File

@@ -12,19 +12,22 @@ import sys
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from processors import StoryImageProcessor, StoryProcessor
from processors import StoryBreakReinsertProcessor, StoryImageProcessor, StoryProcessor
from prompts import CUE_USER_TURN, LLM_BASE_PROMPT
from utils.helpers import load_images, load_sounds
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame
from pipecat.pipeline.parallel_pipeline import ParallelPipeline
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.sync_parallel_pipeline import SyncParallelPipeline
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.elevenlabs import ElevenLabsTTSService
from pipecat.processors.logger import FrameLogger
from pipecat.services.elevenlabs import ElevenLabsHttpTTSService, ElevenLabsTTSService
from pipecat.services.fal import FalImageGenService
from pipecat.services.google import GoogleLLMService
from pipecat.services.google import GoogleImageGenService, GoogleLLMService
from pipecat.transports.services.daily import (
DailyParams,
DailyTransport,
@@ -63,13 +66,20 @@ async def main(room_url, token=None):
# -------------- Services --------------- #
llm_service = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"))
tts_service = ElevenLabsTTSService(
api_key=os.getenv("ELEVENLABS_API_KEY"), voice_id=os.getenv("ELEVENLABS_VOICE_ID")
llm_service = GoogleLLMService(
api_key=os.getenv("GOOGLE_API_KEY"),
model="gemini-2.0-flash-exp",
)
image_gen = GoogleImageGenService(api_key=os.getenv("GOOGLE_API_KEY"))
tts_service = ElevenLabsHttpTTSService(
aiohttp_session=session,
api_key=os.getenv("ELEVENLABS_API_KEY"),
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
)
image_gen = GoogleImageGenService(
api_key=os.getenv("GOOGLE_API_KEY"), # model="imagen-3.0-fast-generate-001"
)
# --------------- Setup ----------------- #
@@ -99,13 +109,14 @@ async def main(room_url, token=None):
image_processor,
tts_service,
transport.output(),
StoryBreakReinsertProcessor(),
context_aggregator.assistant(),
]
)
main_task = PipelineTask(
main_pipeline,
PipelineParams(
pipeline=main_pipeline,
params=PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
@@ -121,7 +132,6 @@ async def main(room_url, token=None):
images["book1"],
context_aggregator.user().get_context_frame(),
DailyTransportMessageFrame(CUE_USER_TURN),
# sounds["listening"],
images["book2"],
]
)
@@ -140,6 +150,10 @@ async def main(room_url, token=None):
await runner.run(main_task)
async def bot(data, daily_room, daily_token):
await main(daily_room, daily_token)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Daily Storyteller Bot")
parser.add_argument("-u", type=str, help="Room URL")

View File

@@ -127,8 +127,8 @@ async def start_bot(request: Request) -> JSONResponse:
return JSONResponse(
{
"room_url": room.url,
"token": user_token,
"dailyRoom": room.url,
"dailyToken": user_token,
}
)

View File

@@ -44,6 +44,15 @@ class StoryPromptFrame(TextFrame):
pass
class StoryBreakFrame(Frame):
"""Frame for storing story text that needs a [break] tag reinserted.
Does not inherit from TextFrame to avoid TTS processing.
"""
def __init__(self):
super().__init__()
# ------------ Frame Processors ----------- #
@@ -62,7 +71,10 @@ class StoryImageProcessor(FrameProcessor):
super().__init__()
self._image_gen_service = image_gen_service
# Create a new LLM service to use a different system prompt, etc
self._llm_service = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"))
self._llm_service = GoogleLLMService(
api_key=os.getenv("GOOGLE_API_KEY"),
model="gemini-2.0-flash-exp",
)
self.pages = []
self.image_descriptions = []
@@ -188,8 +200,25 @@ class StoryProcessor(FrameProcessor):
if len(before_break) > 2:
self._story.append(before_break)
await self.push_frame(StoryPageFrame(before_break))
# await self.push_frame(sounds["ding"])
await self.push_frame(StoryBreakFrame())
await self.push_frame(DailyTransportMessageFrame(CUE_ASSISTANT_TURN))
# Keep the remainder (if any) in the buffer
self._text = parts[1].strip() if len(parts) > 1 else ""
class StoryBreakReinsertProcessor(FrameProcessor):
"""Re-inserts [break] tags into story text before it reaches the assistant context aggregator.
This processor looks for StoryBreakFrames (which aren't processed by TTS) and creates
TextFrames with [break] tags for the context aggregator.
"""
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
if isinstance(frame, StoryBreakFrame):
# Create a new TextFrame with [break] tag
await self.push_frame(TextFrame(" [break]"))
else:
await self.push_frame(frame)