Compare commits
54 Commits
aleix/queu
...
aleix/clea
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3933ba57b8 | ||
|
|
a51af35024 | ||
|
|
119fd5ba7d | ||
|
|
0718a812bd | ||
|
|
3814501b48 | ||
|
|
7a5205dbda | ||
|
|
04c02c9a20 | ||
|
|
0ff7195a83 | ||
|
|
3b91aa013a | ||
|
|
50f6235edb | ||
|
|
6f4d94f91b | ||
|
|
83a4c7d443 | ||
|
|
8171fec925 | ||
|
|
175f352ea7 | ||
|
|
5290161ac4 | ||
|
|
8762019ed7 | ||
|
|
61a59fa158 | ||
|
|
55eea20c8e | ||
|
|
9a621f0c54 | ||
|
|
55fc24e933 | ||
|
|
b14608f09b | ||
|
|
4a25c57337 | ||
|
|
f800e35ccb | ||
|
|
12d49a9b9d | ||
|
|
b25b251a44 | ||
|
|
64b2a75a94 | ||
|
|
b33a60f3a5 | ||
|
|
d22dbb1a6d | ||
|
|
983199a6cd | ||
|
|
133d7ee33a | ||
|
|
0bd888afc7 | ||
|
|
537bd1c58d | ||
|
|
5ef519fe2c | ||
|
|
20498fb47f | ||
|
|
b57dfb3b5d | ||
|
|
0355ed4aa1 | ||
|
|
1e76cc7bdc | ||
|
|
18c0374126 | ||
|
|
7072fba7e7 | ||
|
|
3d702a5c39 | ||
|
|
f31efa42c9 | ||
|
|
74b369ff20 | ||
|
|
46eed0a59a | ||
|
|
9643296e29 | ||
|
|
c83c5b5a34 | ||
|
|
277e2d7fc0 | ||
|
|
56ca7360ae | ||
|
|
d5ab3251f0 | ||
|
|
915c284420 | ||
|
|
40154824e8 | ||
|
|
8cda4512ad | ||
|
|
fc90bdc638 | ||
|
|
5a88165a26 | ||
|
|
3466842cd4 |
60
CHANGELOG.md
@@ -5,6 +5,66 @@ All notable changes to **Pipecat** will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
|
||||
- Added support for a new TTS service, `MiniMaxHttpTTSService`.
|
||||
(see https://www.minimax.io/audio)
|
||||
|
||||
- A new function `FrameProcessor.setup()` has been added to allow setting up
|
||||
frame processors before receiving a `StartFrame`. This is what's happening
|
||||
internally: `FrameProcessor.setup()` is called, `StartFrame` is pushed from
|
||||
the beginning of the pipeline, your regular pipeline operations, `EndFrame` or
|
||||
`CancelFrame` are pushed from the beginning of the pipeline and finally
|
||||
`FrameProcessor.cleanup()` is called.
|
||||
|
||||
- Added support for OpenTelemetry tracing in Pipecat. This initial
|
||||
implementation includes:
|
||||
|
||||
- A `setup_tracing` method where you can specify your OpenTelemetry exporter
|
||||
- Service decorators for STT (`@traced_stt`), LLM (`@traced_llm`), and TTS
|
||||
(`@traced_tts`) which trace the execution and collect properties and
|
||||
metrics (TTFB, token usage, character counts, etc.)
|
||||
- Class decorators that provide execution tracking; these are generic and can
|
||||
be used for service tracking as needed
|
||||
- Spans that help track traces on a per conversations and turn basis:
|
||||
|
||||
```
|
||||
conversation-uuid
|
||||
├── turn-1
|
||||
│ ├── stt_deepgramsttservice
|
||||
│ ├── llm_openaillmservice
|
||||
│ └── tts_cartesiattsservice
|
||||
...
|
||||
└── turn-n
|
||||
└── ...
|
||||
```
|
||||
|
||||
By default, Pipecat has implemented service decorators to trace execution of
|
||||
STT, LLM, and TTS services. You can enable tracing by setting `enable_tracing`
|
||||
to `True` in the PipelineTask.
|
||||
|
||||
- Added `TurnTrackingObserver`, which tracks the start and end of a user/bot
|
||||
turn pair and emits events `on_turn_started` and `on_turn_stopped`
|
||||
corresponding to the start and end of a turn, respectively.
|
||||
|
||||
- Allow passing observers to `run_test()` while running unit tests.
|
||||
|
||||
### Changed
|
||||
|
||||
- `GoogleLLMService` has been updated to use `google-genai` instead of the
|
||||
deprecated `google-generativeai`.
|
||||
|
||||
### Other
|
||||
|
||||
- Added an `open-telemetry-tracing` example, showing how to setup tracing. The
|
||||
example also includes Jaeger as an open source OpenTelemetry client to review
|
||||
traces from the example runs.
|
||||
|
||||
- Added foundational example `29-turn-tracking-observer.py` to show how to use
|
||||
the `TurnTrackingObserver.
|
||||
|
||||
## [0.0.67] - 2025-05-07
|
||||
|
||||
### Added
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
"@daily-co/daily-js": "0.74.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"vite": "^6.0.9"
|
||||
"vite": "^6.3.5"
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/runtime": {
|
||||
@@ -999,9 +999,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/vite": {
|
||||
"version": "6.3.3",
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-6.3.3.tgz",
|
||||
"integrity": "sha512-5nXH+QsELbFKhsEfWLkHrvgRpTdGJzqOZ+utSdmPTvwHmvU6ITTm3xx+mRusihkcI8GeC7lCDyn3kDtiki9scw==",
|
||||
"version": "6.3.5",
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-6.3.5.tgz",
|
||||
"integrity": "sha512-cZn6NDFE7wdTpINgs++ZJ4N49W2vRp8LCKrn3Ob1kYNtOo21vfDoaV5GzBfLU4MovSAB8uNRm4jgzVQZ+mBzPQ==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"esbuild": "^0.25.0",
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
"license": "ISC",
|
||||
"description": "",
|
||||
"devDependencies": {
|
||||
"vite": "^6.0.9"
|
||||
"vite": "^6.3.5"
|
||||
},
|
||||
"dependencies": {
|
||||
"@daily-co/daily-js": "0.74.0"
|
||||
|
||||
3
examples/deployment/modal-example/.gitignore
vendored
@@ -1,3 +1,6 @@
|
||||
# Modal clone
|
||||
modal-examples
|
||||
|
||||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
|
||||
@@ -1,24 +1,44 @@
|
||||
# Deploying Pipecat to Modal.com
|
||||
|
||||
Barebones deployment example for [modal.com](https://www.modal.com)
|
||||
Deployment example for [modal.com](https://www.modal.com). This example demonstrates how to deploy a FastAPI webapp to Modal with an RTVI compatible `/connect` endpoint that launches a Pipecat pipeline in a separate Modal container and returns a room/token for the client to join. This example also supports providing a parameter to the `/connect` endpoint for specifying which Pipecat pipeline to launch; openai, gemini, or vllm. The vllm pipeline points to a self-hosted OpenAI compatible LLM, using a llama model (neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16), deployed to Modal.
|
||||
|
||||
1. Install dependencies
|
||||
|
||||
```bash
|
||||
python -m venv venv
|
||||
source venv/bin/active # or OS equivalent
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
2. Setup .env
|
||||
# Running this Example
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Setup a Modal account and install it on your machine if you have not already, following their easy 3-steps in their [Getting Started Guide](https://modal.com/docs/guide#getting-started)
|
||||
|
||||
## Deploy a self-serve LLM
|
||||
|
||||
1. Follow the Modal Guide and example for [Deploying an OpenAI-compatible LLM service with vLLM](https://modal.com/docs/examples/vllm_inference).
|
||||
|
||||
The TLDR, though, is to simply do the following from within this directory:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/modal-labs/modal-examples
|
||||
cd modal-examples
|
||||
modal deploy 06_gpu_and_ml/llm-serving/vllm_inference.py
|
||||
```
|
||||
|
||||
2. Jot down the endpoint from the previous step to use in the bot_vllm file mentioned below. It will look something like: `https://<Modal workspace>--example-vllm-openai-compatible-serve.modal.run`
|
||||
|
||||
**Note:** This Modal example is their [initial getting started example](https://modal.com/docs/examples/vllm_inference) with a Llama-3.1 model. By default, it will tear down the container after 15 minutes of inactivity and can take 5-10 minutes to re-start, during which time it is unusable. So for the purposes of just getting started and this example, we recommend visiting the `/docs` endpoint (`https://<Modal workspace>--example-vllm-openai-compatible-serve.modal.run/docs`) for your deployed llm in a browser to trigger the cold start. Then wait for the page to load, indicating its ready before trying to connect your client.
|
||||
|
||||
## Deploy FastAPI App and Pipecat pipeline to Modal
|
||||
|
||||
1. Setup environment variables
|
||||
|
||||
```bash
|
||||
cd server
|
||||
cp env.example .env
|
||||
# Modify .env to provide your service API Keys
|
||||
```
|
||||
|
||||
Alternatively, you can configure your Modal app to use [secrets](https://modal.com/docs/guide/secrets)
|
||||
|
||||
3. Test the app locally
|
||||
1. Update the `modal_url` in `server/src/bot_vllm.py` to point to the url produced from the self-serve llm deploy, mentioned above.
|
||||
|
||||
2. From within the `server` directory, test the app locally:
|
||||
|
||||
```bash
|
||||
modal serve app.py
|
||||
@@ -30,8 +50,34 @@ modal serve app.py
|
||||
modal deploy app.py
|
||||
```
|
||||
|
||||
## Configuration options
|
||||
5. Jot down the endpoint from the previous step to use in the client's app.js file mentioned its README. It will look something like: `https://<Modal workspace>--pipecat-modal-fastapi-app.modal.run`
|
||||
|
||||
This app sets some sensible defaults for reducing cold starts, such as `minkeep_warm=1`, which will keep at least 1 warm instance ready for your bot function.
|
||||
## Launch and Talk to your Bots running on Modal
|
||||
|
||||
It has been configured to only allow a concurrency of 1 (`max_inputs=1`) as each user will require their own running function.
|
||||
## Option 1: Direct Link
|
||||
|
||||
Simply click on the url displayed after running the server or deploy step to launch an agent and be redirected to a Daily room to talk with the launched bot. This will use the OpenAI pipeline.
|
||||
|
||||
## Option 2: Connect via an RTVI Client
|
||||
|
||||
Follow the instructions provided in the [client folder's README](client/javascript/README.md) for building and running a custom client that connects to your Modal endpoint. The provided client provides a dropdown for choosing which bot pipeline to run.
|
||||
|
||||
# Navigating your llm, server, and Pipecat logs
|
||||
|
||||
In your [Modal dashboard](https://modal.com/apps), you should have two Apps listed under Live Apps:
|
||||
|
||||
1. `example-vllm-openai-compatible`: This App contains the containers and logs used to run your self-hosted LLM. There will be just one App Function listed: `serve`. Click on this function to view logs for your LLM.
|
||||
2. `pipecat-modal`: This App contains the containers and logs used to run your `connect` endpoints and Pipecat pipelines. It will list two App Functions:
|
||||
1. `fastapi_app`: This function is running the endpoints that your client will interact with and initiate starting a new pipeline (`/`, `/connect`, `/status`). Click on this function to see logs for each endpoint hit.
|
||||
2. `bot_runner`: This function handles launching and running a bot pipeline. Click on this function to get a list of all pipeline runs and access each run's logs.
|
||||
|
||||
## Diagram of Deployment
|
||||
|
||||

|
||||
|
||||
# Modal + Pipecat Tips
|
||||
|
||||
- In most other Pipecat examples, we use Popen to launch the pipeline process from the /connect endpoint. In this example, we instead use a Modal function with its own Modal image defined. This change ensures that each run of the Pipeline happens in a isolated, customizable container.
|
||||
- For the FastAPI and most common Pipecat Pipeline containers, a default debian_slim CPU-only should be all that's required to run. GPU containers are needed for self-hosted services.
|
||||
- To minimize cold starts of the pipeline and reduce latency for users, set `min_containers=1` on the Modal Function that launches the pipeline to ensure at least one warm instance of your function is always available.
|
||||
- For next steps on running a self-hosted llm and reducing latency, check out all of [Modal's LLM examples](https://modal.com/docs/examples/vllm_inference).
|
||||
|
||||
@@ -1,80 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import os
|
||||
|
||||
import aiohttp
|
||||
import modal
|
||||
from bot import _voice_bot_process
|
||||
from fastapi import HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
from loguru import logger
|
||||
|
||||
MAX_SESSION_TIME = 15 * 60 # 15 minutes
|
||||
|
||||
app = modal.App("pipecat-modal")
|
||||
|
||||
|
||||
image = modal.Image.debian_slim(python_version="3.12").pip_install_from_requirements(
|
||||
"requirements.txt"
|
||||
)
|
||||
|
||||
|
||||
@app.function(
|
||||
image=image,
|
||||
cpu=1.0,
|
||||
secrets=[modal.Secret.from_dotenv()],
|
||||
keep_warm=1,
|
||||
enable_memory_snapshot=True,
|
||||
max_inputs=1, # Do not reuse instances across requests
|
||||
retries=0,
|
||||
)
|
||||
def launch_bot_process(room_url: str, token: str):
|
||||
_voice_bot_process(room_url, token)
|
||||
|
||||
|
||||
@app.function(
|
||||
image=image,
|
||||
secrets=[modal.Secret.from_dotenv()],
|
||||
)
|
||||
@modal.web_endpoint(method="POST")
|
||||
async def start():
|
||||
from pipecat.transports.services.helpers.daily_rest import (
|
||||
DailyRESTHelper,
|
||||
DailyRoomParams,
|
||||
)
|
||||
|
||||
logger.info("Request received")
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
daily_rest_helper = DailyRESTHelper(
|
||||
daily_api_key=os.getenv("DAILY_API_KEY", ""),
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=session,
|
||||
)
|
||||
|
||||
# Create new Daily room
|
||||
room = await daily_rest_helper.create_room(DailyRoomParams())
|
||||
if not room.url:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Unable to create room",
|
||||
)
|
||||
logger.info(f"Created room: {room.url}")
|
||||
|
||||
# Create bot token for room
|
||||
token = await daily_rest_helper.get_token(room.url, MAX_SESSION_TIME)
|
||||
if not token:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room.url}")
|
||||
|
||||
logger.info(f"Bot token created: {token}")
|
||||
|
||||
# Spawn a new bot process
|
||||
launch_bot_process.spawn(room_url=room.url, token=token)
|
||||
|
||||
# Return room URL to the user to join
|
||||
# Note: in production, you would want to return a token to the user
|
||||
return JSONResponse(content={"room_url": room.url, token: token})
|
||||
@@ -1,95 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token: str):
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"bot",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY", ""), voice_id="71a7ad14-091c-4e8e-a314-022ece01c121"
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
def _voice_bot_process(room_url: str, token: str):
|
||||
asyncio.run(main(room_url, token))
|
||||
1
examples/deployment/modal-example/client/javascript/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
node_modules
|
||||
@@ -0,0 +1,29 @@
|
||||
# JavaScript Implementation
|
||||
|
||||
Basic implementation using the [Pipecat JavaScript SDK](https://docs.pipecat.ai/client/js/introduction).
|
||||
|
||||
## Setup
|
||||
|
||||
1. Deploy the Modal server. See the main [README](../../README).
|
||||
|
||||
2. Navigate to the `client/javascript` directory:
|
||||
|
||||
```bash
|
||||
cd client/javascript
|
||||
```
|
||||
|
||||
3. Modify the baseUrl in src/app.js to point to your deployed Modal endpoint
|
||||
|
||||
4. Install dependencies:
|
||||
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
5. Run the client app:
|
||||
|
||||
```
|
||||
npm run dev
|
||||
```
|
||||
|
||||
6. Visit http://localhost:5173 in your browser.
|
||||
@@ -0,0 +1,49 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>AI Chatbot</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<div class="container">
|
||||
<div class="status-bar">
|
||||
<div class="status">
|
||||
Status: <span id="connection-status">Disconnected</span>
|
||||
</div>
|
||||
<div class="controls">
|
||||
<select id="bot-selector">
|
||||
<option value="openai">OpenAI</option>
|
||||
<option value="gemini">Gemini</option>
|
||||
<option value="vllm">Llama</option>
|
||||
</select>
|
||||
<button id="connect-btn">Connect</button>
|
||||
<button id="disconnect-btn" disabled>Disconnect</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="main-content">
|
||||
<div class="bot-container">
|
||||
<div id="bot-video-container"></div>
|
||||
<audio id="bot-audio" autoplay></audio>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="device-bar">
|
||||
<div class="device-controls">
|
||||
<select id="device-selector"></select>
|
||||
<button id="mic-toggle-btn">Mute Mic</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="debug-panel">
|
||||
<h3>Debug Info</h3>
|
||||
<div id="debug-log"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script type="module" src="/src/app.js"></script>
|
||||
<link rel="stylesheet" href="/src/style.css" />
|
||||
</body>
|
||||
</html>
|
||||
1191
examples/deployment/modal-example/client/javascript/package-lock.json
generated
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"name": "client",
|
||||
"version": "1.0.0",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"dev": "vite",
|
||||
"build": "vite build",
|
||||
"preview": "vite preview"
|
||||
},
|
||||
"keywords": [],
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"description": "",
|
||||
"devDependencies": {
|
||||
"vite": "^6.3.5"
|
||||
},
|
||||
"dependencies": {
|
||||
"@pipecat-ai/client-js": "^0.3.5",
|
||||
"@pipecat-ai/daily-transport": "^0.3.10"
|
||||
}
|
||||
}
|
||||
381
examples/deployment/modal-example/client/javascript/src/app.js
Normal file
@@ -0,0 +1,381 @@
|
||||
/**
|
||||
* Copyright (c) 2024–2025, Daily
|
||||
*
|
||||
* SPDX-License-Identifier: BSD 2-Clause License
|
||||
*/
|
||||
|
||||
/**
|
||||
* RTVI Client Implementation
|
||||
*
|
||||
* This client connects to an RTVI-compatible bot server using WebRTC (via Daily).
|
||||
* It handles audio/video streaming and manages the connection lifecycle.
|
||||
*
|
||||
* Requirements:
|
||||
* - A running RTVI bot server (defaults to http://localhost:7860)
|
||||
* - The server must implement the /connect endpoint that returns Daily.co room credentials
|
||||
* - Browser with WebRTC support
|
||||
*/
|
||||
|
||||
import { RTVIClient, RTVIEvent } from '@pipecat-ai/client-js';
|
||||
import { DailyTransport } from '@pipecat-ai/daily-transport';
|
||||
|
||||
/**
|
||||
* ChatbotClient handles the connection and media management for a real-time
|
||||
* voice and video interaction with an AI bot.
|
||||
*/
|
||||
class ChatbotClient {
|
||||
constructor() {
|
||||
// Initialize client state
|
||||
this.rtviClient = null;
|
||||
this.setupDOMElements();
|
||||
this.initializeClientAndTransport();
|
||||
this.setupEventListeners();
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up references to DOM elements and create necessary media elements
|
||||
*/
|
||||
setupDOMElements() {
|
||||
// Get references to UI control elements
|
||||
this.connectBtn = document.getElementById('connect-btn');
|
||||
this.disconnectBtn = document.getElementById('disconnect-btn');
|
||||
this.statusSpan = document.getElementById('connection-status');
|
||||
this.debugLog = document.getElementById('debug-log');
|
||||
this.botVideoContainer = document.getElementById('bot-video-container');
|
||||
this.deviceSelector = document.getElementById('device-selector');
|
||||
|
||||
// Create an audio element for bot's voice output
|
||||
this.botAudio = document.createElement('audio');
|
||||
this.botAudio.autoplay = true;
|
||||
this.botAudio.playsInline = true;
|
||||
document.body.appendChild(this.botAudio);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up event listeners for connect/disconnect buttons
|
||||
*/
|
||||
setupEventListeners() {
|
||||
this.connectBtn.addEventListener('click', () => this.connect());
|
||||
this.disconnectBtn.addEventListener('click', () => this.disconnect());
|
||||
|
||||
// Populate device selector
|
||||
this.rtviClient.getAllMics().then((mics) => {
|
||||
console.log('Available mics:', mics);
|
||||
mics.forEach((device) => {
|
||||
const option = document.createElement('option');
|
||||
option.value = device.deviceId;
|
||||
option.textContent = device.label || `Microphone ${device.deviceId}`;
|
||||
this.deviceSelector.appendChild(option);
|
||||
});
|
||||
});
|
||||
this.deviceSelector.addEventListener('change', (event) => {
|
||||
const selectedDeviceId = event.target.value;
|
||||
console.log('Selected device ID:', selectedDeviceId);
|
||||
this.rtviClient.updateMic(selectedDeviceId);
|
||||
});
|
||||
|
||||
// Handle mic mute/unmute toggle
|
||||
const micToggleBtn = document.getElementById('mic-toggle-btn');
|
||||
|
||||
micToggleBtn.addEventListener('click', () => {
|
||||
let micEnabled = this.rtviClient.isMicEnabled;
|
||||
micToggleBtn.textContent = micEnabled ? 'Unmute Mic' : 'Mute Mic';
|
||||
this.rtviClient.enableMic(!micEnabled);
|
||||
// Add logic to mute/unmute the mic
|
||||
if (micEnabled) {
|
||||
console.log('Mic muted');
|
||||
// Add code to mute the mic
|
||||
} else {
|
||||
console.log('Mic unmuted');
|
||||
// Add code to unmute the mic
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up the RTVI client and Daily transport
|
||||
*/
|
||||
async initializeClientAndTransport() {
|
||||
// Initialize the RTVI client with a DailyTransport and our configuration
|
||||
this.rtviClient = new RTVIClient({
|
||||
transport: new DailyTransport(),
|
||||
params: {
|
||||
// REPLACE WITH YOUR MODAL URL ENDPOINT
|
||||
baseUrl:
|
||||
'https://<Modal workspace>--pipecat-modal-bot-launcher.modal.run',
|
||||
endpoints: {
|
||||
connect: '/connect',
|
||||
},
|
||||
requestData: {
|
||||
bot_name: 'openai',
|
||||
},
|
||||
},
|
||||
enableMic: true, // Enable microphone for user input
|
||||
enableCam: false,
|
||||
callbacks: {
|
||||
// Handle connection state changes
|
||||
onConnected: () => {
|
||||
this.updateStatus('Connected');
|
||||
this.connectBtn.disabled = true;
|
||||
this.disconnectBtn.disabled = false;
|
||||
this.log('Client connected');
|
||||
},
|
||||
onDisconnected: () => {
|
||||
this.updateStatus('Disconnected');
|
||||
this.connectBtn.disabled = false;
|
||||
this.disconnectBtn.disabled = true;
|
||||
this.log('Client disconnected');
|
||||
},
|
||||
// Handle transport state changes
|
||||
onTransportStateChanged: (state) => {
|
||||
this.updateStatus(`Transport: ${state}`);
|
||||
this.log(`Transport state changed: ${state}`);
|
||||
if (state === 'connecting') {
|
||||
window.startTime = Date.now();
|
||||
}
|
||||
if (state === 'ready') {
|
||||
this.setupMediaTracks();
|
||||
console.warn('TIME TO BOT READY:', Date.now() - window.startTime);
|
||||
}
|
||||
},
|
||||
// Handle bot connection events
|
||||
onBotConnected: (participant) => {
|
||||
this.log(`Bot connected: ${JSON.stringify(participant)}`);
|
||||
},
|
||||
onBotDisconnected: (participant) => {
|
||||
this.log(`Bot disconnected: ${JSON.stringify(participant)}`);
|
||||
},
|
||||
onBotReady: (data) => {
|
||||
this.log(`Bot ready: ${JSON.stringify(data)}`);
|
||||
this.setupMediaTracks();
|
||||
},
|
||||
// Transcript events
|
||||
onUserTranscript: (data) => {
|
||||
// Only log final transcripts
|
||||
if (data.final) {
|
||||
this.log(`User: ${data.text}`);
|
||||
}
|
||||
},
|
||||
onBotTranscript: (data) => {
|
||||
this.log(`Bot: ${data.text}`);
|
||||
},
|
||||
// Error handling
|
||||
onMessageError: (error) => {
|
||||
console.log('Message error:', error);
|
||||
},
|
||||
onMicUpdated: (data) => {
|
||||
console.log('Mic updated:', data);
|
||||
this.deviceSelector.value = data.deviceId;
|
||||
},
|
||||
onError: (error) => {
|
||||
console.log('Error:', JSON.stringify(error));
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
// Set up listeners for media track events
|
||||
this.setupTrackListeners();
|
||||
|
||||
await this.rtviClient.initDevices();
|
||||
window.client = this.rtviClient;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a timestamped message to the debug log
|
||||
*/
|
||||
log(message) {
|
||||
const entry = document.createElement('div');
|
||||
entry.textContent = `${new Date().toISOString()} - ${message}`;
|
||||
|
||||
// Add styling based on message type
|
||||
if (message.startsWith('User: ')) {
|
||||
entry.style.color = '#2196F3'; // blue for user
|
||||
} else if (message.startsWith('Bot: ')) {
|
||||
entry.style.color = '#4CAF50'; // green for bot
|
||||
}
|
||||
|
||||
this.debugLog.appendChild(entry);
|
||||
this.debugLog.scrollTop = this.debugLog.scrollHeight;
|
||||
console.log(message);
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the connection status display
|
||||
*/
|
||||
updateStatus(status) {
|
||||
this.statusSpan.textContent = status;
|
||||
this.log(`Status: ${status}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check for available media tracks and set them up if present
|
||||
* This is called when the bot is ready or when the transport state changes to ready
|
||||
*/
|
||||
setupMediaTracks() {
|
||||
if (!this.rtviClient) return;
|
||||
|
||||
// Get current tracks from the client
|
||||
const tracks = this.rtviClient.tracks();
|
||||
|
||||
// Set up any available bot tracks
|
||||
if (tracks.bot?.audio) {
|
||||
this.setupAudioTrack(tracks.bot.audio);
|
||||
}
|
||||
if (tracks.bot?.video) {
|
||||
this.setupVideoTrack(tracks.bot.video);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up listeners for track events (start/stop)
|
||||
* This handles new tracks being added during the session
|
||||
*/
|
||||
setupTrackListeners() {
|
||||
if (!this.rtviClient) return;
|
||||
|
||||
// Listen for new tracks starting
|
||||
this.rtviClient.on(RTVIEvent.TrackStarted, (track, participant) => {
|
||||
// Only handle non-local (bot) tracks
|
||||
if (!participant?.local) {
|
||||
if (track.kind === 'audio') {
|
||||
this.setupAudioTrack(track);
|
||||
} else if (track.kind === 'video') {
|
||||
this.setupVideoTrack(track);
|
||||
}
|
||||
this.log(
|
||||
`Track started event: ${track.kind} from ${
|
||||
participant?.name || 'unknown'
|
||||
}`
|
||||
);
|
||||
} else {
|
||||
this.log('Local mic unmuted');
|
||||
}
|
||||
});
|
||||
|
||||
// Listen for tracks stopping
|
||||
this.rtviClient.on(RTVIEvent.TrackStopped, (track, participant) => {
|
||||
if (participant.local) {
|
||||
this.log('Local mic muted');
|
||||
return;
|
||||
}
|
||||
this.log(
|
||||
`Track stopped event: ${track.kind} from ${
|
||||
participant?.name || 'unknown'
|
||||
}`
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up an audio track for playback
|
||||
* Handles both initial setup and track updates
|
||||
*/
|
||||
setupAudioTrack(track) {
|
||||
this.log('Setting up audio track');
|
||||
// Check if we're already playing this track
|
||||
if (this.botAudio.srcObject) {
|
||||
const oldTrack = this.botAudio.srcObject.getAudioTracks()[0];
|
||||
if (oldTrack?.id === track.id) return;
|
||||
}
|
||||
// Create a new MediaStream with the track and set it as the audio source
|
||||
this.botAudio.srcObject = new MediaStream([track]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up a video track for display
|
||||
* Handles both initial setup and track updates
|
||||
*/
|
||||
setupVideoTrack(track) {
|
||||
this.log('Setting up video track');
|
||||
const videoEl = document.createElement('video');
|
||||
videoEl.autoplay = true;
|
||||
videoEl.playsInline = true;
|
||||
videoEl.muted = true;
|
||||
videoEl.style.width = '100%';
|
||||
videoEl.style.height = '100%';
|
||||
videoEl.style.objectFit = 'cover';
|
||||
|
||||
// Check if we're already displaying this track
|
||||
if (this.botVideoContainer.querySelector('video')?.srcObject) {
|
||||
const oldTrack = this.botVideoContainer
|
||||
.querySelector('video')
|
||||
.srcObject.getVideoTracks()[0];
|
||||
if (oldTrack?.id === track.id) return;
|
||||
}
|
||||
|
||||
// Create a new MediaStream with the track and set it as the video source
|
||||
videoEl.srcObject = new MediaStream([track]);
|
||||
this.botVideoContainer.innerHTML = '';
|
||||
this.botVideoContainer.appendChild(videoEl);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize and connect to the bot
|
||||
* This sets up the RTVI client, initializes devices, and establishes the connection
|
||||
*/
|
||||
async connect() {
|
||||
try {
|
||||
const botSelector = document.getElementById('bot-selector');
|
||||
const selectedBot = botSelector.value;
|
||||
this.rtviClient.params.requestData.bot_name = selectedBot;
|
||||
|
||||
// Initialize audio/video devices
|
||||
this.log('Initializing devices...');
|
||||
await this.rtviClient.initDevices();
|
||||
|
||||
// Connect to the bot
|
||||
this.log(`Connecting to bot: ${selectedBot}`);
|
||||
await this.rtviClient.connect();
|
||||
|
||||
this.log('Connection complete');
|
||||
} catch (error) {
|
||||
// Handle any errors during connection
|
||||
console.error('Connection error:', error);
|
||||
this.log(`Error connecting: ${JSON.stringify(error.message)}`);
|
||||
this.log(`Error stack: ${error.stack}`);
|
||||
this.updateStatus('Error');
|
||||
|
||||
// Clean up if there's an error
|
||||
if (this.rtviClient) {
|
||||
try {
|
||||
await this.rtviClient.disconnect();
|
||||
} catch (disconnectError) {
|
||||
this.log(`Error during disconnect: ${disconnectError.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Disconnect from the bot and clean up media resources
|
||||
*/
|
||||
async disconnect() {
|
||||
if (this.rtviClient) {
|
||||
try {
|
||||
// Disconnect the RTVI client
|
||||
await this.rtviClient.disconnect();
|
||||
|
||||
// Clean up audio
|
||||
if (this.botAudio.srcObject) {
|
||||
this.botAudio.srcObject.getTracks().forEach((track) => track.stop());
|
||||
this.botAudio.srcObject = null;
|
||||
}
|
||||
|
||||
// Clean up video
|
||||
if (this.botVideoContainer.querySelector('video')?.srcObject) {
|
||||
const video = this.botVideoContainer.querySelector('video');
|
||||
video.srcObject.getTracks().forEach((track) => track.stop());
|
||||
video.srcObject = null;
|
||||
}
|
||||
this.botVideoContainer.innerHTML = '';
|
||||
} catch (error) {
|
||||
this.log(`Error disconnecting: ${error.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize the client when the page loads
|
||||
window.addEventListener('DOMContentLoaded', () => {
|
||||
new ChatbotClient();
|
||||
});
|
||||
@@ -0,0 +1,135 @@
|
||||
body {
|
||||
margin: 0;
|
||||
padding: 20px;
|
||||
font-family: Arial, sans-serif;
|
||||
background-color: #f0f0f0;
|
||||
}
|
||||
|
||||
.container {
|
||||
max-width: 1200px;
|
||||
margin: 0 auto;
|
||||
}
|
||||
|
||||
.status-bar,
|
||||
.device-bar {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
padding: 10px;
|
||||
background-color: #fff;
|
||||
border-radius: 8px;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
.controls,
|
||||
.device-controls {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 10px; /* Adds spacing between elements */
|
||||
}
|
||||
|
||||
.device-controls {
|
||||
margin-left: auto;
|
||||
}
|
||||
|
||||
.controls button,
|
||||
.device-controls button {
|
||||
padding: 8px 16px;
|
||||
margin-left: 10px;
|
||||
border: none;
|
||||
border-radius: 4px;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
#bot-selector,
|
||||
#device-selector {
|
||||
padding: 8px 16px;
|
||||
padding-right: 40px;
|
||||
border: none;
|
||||
border-radius: 4px;
|
||||
background-color: #6c757d; /* Gray background */
|
||||
color: white; /* White text */
|
||||
cursor: pointer;
|
||||
appearance: none; /* Removes default browser styling for dropdowns */
|
||||
background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' fill='white'%3E%3Cpath d='M7 10l5 5 5-5z'/%3E%3C/svg%3E"); /* Custom arrow */
|
||||
background-repeat: no-repeat;
|
||||
background-position: right 8px center; /* Position the arrow */
|
||||
}
|
||||
|
||||
#bot-selector:focus,
|
||||
#device-selector:focus {
|
||||
outline: none;
|
||||
box-shadow: 0 0 4px rgba(0, 0, 0, 0.3); /* Add a subtle focus effect */
|
||||
}
|
||||
|
||||
#connect-btn {
|
||||
background-color: #4caf50;
|
||||
color: white;
|
||||
}
|
||||
|
||||
#disconnect-btn {
|
||||
background-color: #f44336;
|
||||
color: white;
|
||||
}
|
||||
|
||||
#mic-toggle-btn {
|
||||
}
|
||||
|
||||
button:disabled {
|
||||
opacity: 0.5;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
|
||||
.main-content {
|
||||
background-color: #fff;
|
||||
border-radius: 8px;
|
||||
padding: 20px;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
.bot-container {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
#bot-video-container {
|
||||
width: 640px;
|
||||
height: 360px;
|
||||
background-color: #e0e0e0;
|
||||
border-radius: 8px;
|
||||
margin: 20px auto;
|
||||
overflow: hidden;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
#bot-video-container video {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
object-fit: cover;
|
||||
}
|
||||
|
||||
.debug-panel {
|
||||
background-color: #fff;
|
||||
border-radius: 8px;
|
||||
padding: 20px;
|
||||
}
|
||||
|
||||
.debug-panel h3 {
|
||||
margin: 0 0 10px 0;
|
||||
font-size: 16px;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
#debug-log {
|
||||
height: 200px;
|
||||
overflow-y: auto;
|
||||
background-color: #f8f8f8;
|
||||
padding: 10px;
|
||||
border-radius: 4px;
|
||||
font-family: monospace;
|
||||
font-size: 12px;
|
||||
line-height: 1.4;
|
||||
}
|
||||
BIN
examples/deployment/modal-example/diagram.jpg
Normal file
|
After Width: | Height: | Size: 1.4 MiB |
@@ -1,3 +0,0 @@
|
||||
DAILY_API_KEY=
|
||||
OPENAI_API_KEY=
|
||||
CARTESIA_API_KEY=
|
||||
@@ -1,4 +0,0 @@
|
||||
python-dotenv==1.0.1
|
||||
modal==0.71.3
|
||||
pipecat-ai[daily,silero,cartesia,openai]
|
||||
fastapi==0.115.6
|
||||
307
examples/deployment/modal-example/server/app.py
Normal file
@@ -0,0 +1,307 @@
|
||||
"""modal_example.
|
||||
|
||||
This module shows a simple example of how to deploy a bot using Modal and FastAPI.
|
||||
|
||||
It includes:
|
||||
- FastAPI endpoints for starting agents and checking bot statuses.
|
||||
- Dynamic loading of bot implementations.
|
||||
- Use of a Daily transport for bot communication.
|
||||
"""
|
||||
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import importlib
|
||||
import os
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import Any, Dict, Literal
|
||||
|
||||
import aiohttp
|
||||
import modal
|
||||
from fastapi import APIRouter, FastAPI, HTTPException
|
||||
from fastapi.responses import JSONResponse, RedirectResponse
|
||||
from pydantic import BaseModel
|
||||
|
||||
# container specifications for the FastAPI web server
|
||||
web_image = (
|
||||
modal.Image.debian_slim(python_version="3.13")
|
||||
.pip_install_from_requirements("requirements.txt")
|
||||
.pip_install("pipecat-ai[daily]")
|
||||
.add_local_dir("src", remote_path="/root/src")
|
||||
)
|
||||
|
||||
# container specifications for the Pipecat pipeline
|
||||
bot_image = (
|
||||
modal.Image.debian_slim(python_version="3.13")
|
||||
.apt_install("ffmpeg")
|
||||
.pip_install_from_requirements("requirements.txt")
|
||||
.pip_install("pipecat-ai[daily,elevenlabs,openai,silero,google]")
|
||||
.add_local_dir("src", remote_path="/root/src")
|
||||
)
|
||||
|
||||
app = modal.App("pipecat-modal", secrets=[modal.Secret.from_dotenv()])
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
bot_jobs = {}
|
||||
daily_helpers = {}
|
||||
|
||||
# Names of all supported bot implementations
|
||||
# These correspond to the bot files in the src directory
|
||||
BotName = Literal["openai", "gemini", "vllm"]
|
||||
|
||||
|
||||
def cleanup():
|
||||
"""Cleanup function to terminate all bot processes.
|
||||
|
||||
Called during server shutdown.
|
||||
"""
|
||||
for entry in bot_jobs.values():
|
||||
func = modal.FunctionCall.from_id(entry[0])
|
||||
if func:
|
||||
func.cancel()
|
||||
|
||||
|
||||
def get_bot_file(bot_name: BotName) -> str:
|
||||
"""Retrieve the bot file name corresponding to the provided bot_name.
|
||||
|
||||
Args:
|
||||
bot_name (BotName): The name of the bot (e.g., 'openai', 'gemini', 'vllm').
|
||||
|
||||
Returns:
|
||||
str: The file name corresponding to the bot implementation.
|
||||
|
||||
Raises:
|
||||
ValueError: If the bot name is invalid or not supported.
|
||||
"""
|
||||
# bot_implementation = os.getenv("BOT_IMPLEMENTATION", "openai").lower().strip()
|
||||
bot_implementation = bot_name.lower().strip()
|
||||
if not bot_implementation:
|
||||
bot_implementation = "openai"
|
||||
if bot_implementation not in ["openai", "gemini", "vllm"]:
|
||||
raise ValueError(
|
||||
f"Invalid BOT_IMPLEMENTATION: {bot_implementation}. Must be 'openai' or 'gemini' or 'vllm'"
|
||||
)
|
||||
|
||||
return f"bot_{bot_implementation}"
|
||||
|
||||
|
||||
def get_runner(path: str, bot_file: str) -> callable:
|
||||
"""Dynamically import the run_bot function based on the bot name.
|
||||
|
||||
Args:
|
||||
path (str): The path to the bot files (e.g., 'src').
|
||||
bot_file (str): The file name of the bot implementation (e.g., 'openai', 'gemini', 'vllm').
|
||||
|
||||
Returns:
|
||||
function: The run_bot function from the specified bot module.
|
||||
|
||||
Raises:
|
||||
ImportError: If the specified bot module or run_bot function is not found.
|
||||
"""
|
||||
try:
|
||||
# Dynamically construct the module name
|
||||
module_name = f"{path}.{bot_file}"
|
||||
# Import the module
|
||||
module = importlib.import_module(module_name)
|
||||
# Get the run_bot function from the module
|
||||
return getattr(module, "run_bot")
|
||||
except (ImportError, AttributeError) as e:
|
||||
raise ImportError(f"Failed to import run_bot from {module_name}: {e}")
|
||||
|
||||
|
||||
async def create_room_and_token() -> tuple[str, str]:
|
||||
"""Create a Daily room and generate an authentication token.
|
||||
|
||||
This function checks for existing room URL and token in the environment variables.
|
||||
If not found, it creates a new room using the Daily API and generates a token for it.
|
||||
|
||||
Returns:
|
||||
tuple[str, str]: A tuple containing the room URL and the authentication token.
|
||||
|
||||
Raises:
|
||||
HTTPException: If room creation or token generation fails.
|
||||
"""
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRoomParams
|
||||
|
||||
room_url = os.getenv("DAILY_SAMPLE_ROOM_URL", None)
|
||||
token = os.getenv("DAILY_SAMPLE_ROOM_TOKEN", None)
|
||||
if not room_url:
|
||||
room = await daily_helpers["rest"].create_room(DailyRoomParams())
|
||||
if not room.url:
|
||||
raise HTTPException(status_code=500, detail="Failed to create room")
|
||||
room_url = room.url
|
||||
|
||||
token = await daily_helpers["rest"].get_token(room_url)
|
||||
if not token:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room_url}")
|
||||
|
||||
return room_url, token
|
||||
|
||||
|
||||
@app.function(image=bot_image, min_containers=1)
|
||||
async def bot_runner(room_url, token, bot_name: BotName = "openai"):
|
||||
"""Launch the provided bot process, providing the given room URL and token for the bot to join.
|
||||
|
||||
Args:
|
||||
room_url (str): The URL of the Daily room where the bot and client will communicate.
|
||||
token (str): The authentication token for the room.
|
||||
bot_name (BotName): The name of the bot implementation to use. Defaults to "openai".
|
||||
|
||||
Raises:
|
||||
HTTPException: If the bot pipeline fails to start.
|
||||
"""
|
||||
try:
|
||||
path = "src"
|
||||
bot_file = get_bot_file(bot_name)
|
||||
run_bot = get_runner(path, bot_file)
|
||||
|
||||
print(f"Starting bot process: {bot_file} -u {room_url} -t {token}")
|
||||
await run_bot(room_url, token)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to start bot pipeline: {e}")
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""FastAPI lifespan manager that handles startup and shutdown tasks.
|
||||
|
||||
- Creates aiohttp session
|
||||
- Initializes Daily API helper
|
||||
- Cleans up resources on shutdown
|
||||
"""
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
|
||||
|
||||
aiohttp_session = aiohttp.ClientSession()
|
||||
daily_helpers["rest"] = DailyRESTHelper(
|
||||
daily_api_key=os.getenv("DAILY_API_KEY", ""),
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
yield
|
||||
await aiohttp_session.close()
|
||||
cleanup()
|
||||
|
||||
|
||||
class ConnectData(BaseModel):
|
||||
"""Data provided by client to specify the bot pipeline.
|
||||
|
||||
Attributes:
|
||||
bot_name (BotName): The name of the bot to connect to. Defaults to "openai".
|
||||
"""
|
||||
|
||||
bot_name: BotName = "openai"
|
||||
|
||||
|
||||
async def start(data: ConnectData):
|
||||
"""Internal method to start a bot agent and return the room URL and token.
|
||||
|
||||
Args:
|
||||
data (ConnectData): The data containing the bot name to use.
|
||||
|
||||
Returns:
|
||||
tuple[str, str]: A tuple containing the room URL and token.
|
||||
"""
|
||||
room_url, token = await create_room_and_token()
|
||||
launch_bot_func = modal.Function.from_name("pipecat-modal", "bot_runner")
|
||||
function_id = launch_bot_func.spawn(room_url, token, data.bot_name)
|
||||
bot_jobs[function_id] = (function_id, room_url)
|
||||
|
||||
return room_url, token
|
||||
|
||||
|
||||
@router.get("/")
|
||||
async def start_agent():
|
||||
"""A user endpoint for launching a bot agent and redirecting to the created room URL.
|
||||
|
||||
This function retrieves the bot implementation from the environment,
|
||||
starts the bot agent, and redirects the user to the room URL to
|
||||
interact with the bot through a Daily Prebuilt Interface.
|
||||
|
||||
Returns:
|
||||
RedirectResponse: A response that redirects to the room URL.
|
||||
"""
|
||||
bot_name = os.getenv("BOT_IMPLEMENTATION", "openai").lower().strip()
|
||||
print(f"Starting bot: {bot_name}")
|
||||
room_url, token = await start(ConnectData(bot_name=bot_name))
|
||||
|
||||
return RedirectResponse(room_url)
|
||||
|
||||
|
||||
@router.post("/connect")
|
||||
async def rtvi_connect(data: ConnectData) -> Dict[Any, Any]:
|
||||
"""A user endpoint for launching a bot agent and retrieving the room/token credentials.
|
||||
|
||||
This function retrieves the bot implementation from the request, if provided,
|
||||
starts the bot agent, and returns the room URL and token for the bot. This allows the
|
||||
client to then connect to the bot using their own RTVI interface.
|
||||
|
||||
Args:
|
||||
data (ConnectData): Optional. The data containing the bot name to use.
|
||||
|
||||
Returns:
|
||||
Dict[Any, Any]: A dictionary containing the room URL and token.
|
||||
"""
|
||||
print(f"Starting bot: {data.bot_name}")
|
||||
if data is None or not data.bot_name:
|
||||
data.bot_name = os.getenv("BOT_IMPLEMENTATION", "openai").lower().strip()
|
||||
room_url, token = await start(data)
|
||||
|
||||
return {"room_url": room_url, "token": token}
|
||||
|
||||
|
||||
@router.get("/status/{fid}")
|
||||
def get_status(fid: str):
|
||||
"""Retrieve the status of a bot process by its function ID.
|
||||
|
||||
Args:
|
||||
fid (str): The function ID of the bot process.
|
||||
|
||||
Returns:
|
||||
JSONResponse: A JSON response containing the bot's status and result code.
|
||||
|
||||
Raises:
|
||||
HTTPException: If the bot process with the given ID is not found.
|
||||
"""
|
||||
func = modal.FunctionCall.from_id(fid)
|
||||
if not func:
|
||||
raise HTTPException(status_code=404, detail=f"Bot with process id: {fid} not found")
|
||||
|
||||
try:
|
||||
result = func.get(timeout=0)
|
||||
return JSONResponse({"bot_id": fid, "status": "finished", "code": result})
|
||||
except modal.exception.OutputExpiredError:
|
||||
return JSONResponse({"bot_id": fid, "status": "finished", "code": 404})
|
||||
except TimeoutError:
|
||||
return JSONResponse({"bot_id": fid, "status": "running", "code": 202})
|
||||
|
||||
|
||||
@app.function(image=web_image, min_containers=1)
|
||||
@modal.concurrent(max_inputs=1)
|
||||
@modal.asgi_app()
|
||||
def fastapi_app():
|
||||
"""Create and configure the FastAPI application.
|
||||
|
||||
This function initializes the FastAPI app with middleware, routes, and lifespan management.
|
||||
It is decorated to be used as a Modal ASGI app.
|
||||
"""
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
# Initialize FastAPI app
|
||||
web_app = FastAPI(lifespan=lifespan)
|
||||
|
||||
web_app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Include the endpoints from endpoints.py
|
||||
web_app.include_router(router)
|
||||
|
||||
return web_app
|
||||
14
examples/deployment/modal-example/server/env.example
Normal file
@@ -0,0 +1,14 @@
|
||||
DAILY_API_KEY=
|
||||
|
||||
# determines which bot file to default to: 'openai', 'gemini', or 'vllm'
|
||||
BOT_IMPLEMENTATION=openai
|
||||
|
||||
# needed for the openai bot pipeline
|
||||
OPENAI_API_KEY=
|
||||
ELEVENLABS_API_KEY=
|
||||
|
||||
# needed for the gemini live bot pipeline
|
||||
GOOGLE_API_KEY=
|
||||
|
||||
# needed if you modified the API Key for your self-hosted LLM
|
||||
VLLM_API_KEY=
|
||||
@@ -0,0 +1,2 @@
|
||||
python-dotenv==1.0.1
|
||||
modal==0.71.3
|
||||
BIN
examples/deployment/modal-example/server/src/assets/robot01.png
Normal file
|
After Width: | Height: | Size: 759 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot010.png
Normal file
|
After Width: | Height: | Size: 884 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot011.png
Normal file
|
After Width: | Height: | Size: 876 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot012.png
Normal file
|
After Width: | Height: | Size: 881 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot013.png
Normal file
|
After Width: | Height: | Size: 866 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot014.png
Normal file
|
After Width: | Height: | Size: 874 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot015.png
Normal file
|
After Width: | Height: | Size: 882 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot016.png
Normal file
|
After Width: | Height: | Size: 885 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot017.png
Normal file
|
After Width: | Height: | Size: 888 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot018.png
Normal file
|
After Width: | Height: | Size: 890 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot019.png
Normal file
|
After Width: | Height: | Size: 898 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot02.png
Normal file
|
After Width: | Height: | Size: 836 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot020.png
Normal file
|
After Width: | Height: | Size: 903 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot021.png
Normal file
|
After Width: | Height: | Size: 908 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot022.png
Normal file
|
After Width: | Height: | Size: 908 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot023.png
Normal file
|
After Width: | Height: | Size: 905 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot024.png
Normal file
|
After Width: | Height: | Size: 903 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot025.png
Normal file
|
After Width: | Height: | Size: 866 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot03.png
Normal file
|
After Width: | Height: | Size: 849 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot04.png
Normal file
|
After Width: | Height: | Size: 866 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot05.png
Normal file
|
After Width: | Height: | Size: 866 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot06.png
Normal file
|
After Width: | Height: | Size: 864 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot07.png
Normal file
|
After Width: | Height: | Size: 858 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot08.png
Normal file
|
After Width: | Height: | Size: 875 KiB |
BIN
examples/deployment/modal-example/server/src/assets/robot09.png
Normal file
|
After Width: | Height: | Size: 881 KiB |
198
examples/deployment/modal-example/server/src/bot_gemini.py
Normal file
@@ -0,0 +1,198 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Gemini Bot Implementation.
|
||||
|
||||
This module implements a chatbot using Google's Gemini Multimodal Live model.
|
||||
It includes:
|
||||
- Real-time audio/video interaction through Daily
|
||||
- Animated robot avatar
|
||||
- Speech-to-speech model
|
||||
|
||||
The bot runs as part of a pipeline that processes audio/video frames and manages
|
||||
the conversation flow using Gemini's streaming capabilities.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from PIL import Image
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import (
|
||||
BotStartedSpeakingFrame,
|
||||
BotStoppedSpeakingFrame,
|
||||
Frame,
|
||||
OutputImageRawFrame,
|
||||
SpriteFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.processors.frameworks.rtvi import RTVIConfig, RTVIObserver, RTVIProcessor
|
||||
from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
try:
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
except ValueError:
|
||||
# Handle the case where logger is already initialized
|
||||
pass
|
||||
|
||||
sprites = []
|
||||
script_dir = os.path.dirname(__file__)
|
||||
|
||||
for i in range(1, 26):
|
||||
# Build the full path to the image file
|
||||
full_path = os.path.join(script_dir, f"assets/robot0{i}.png")
|
||||
# Get the filename without the extension to use as the dictionary key
|
||||
# Open the image and convert it to bytes
|
||||
with Image.open(full_path) as img:
|
||||
sprites.append(OutputImageRawFrame(image=img.tobytes(), size=img.size, format=img.format))
|
||||
|
||||
# Create a smooth animation by adding reversed frames
|
||||
flipped = sprites[::-1]
|
||||
sprites.extend(flipped)
|
||||
|
||||
# Define static and animated states
|
||||
quiet_frame = sprites[0] # Static frame for when bot is listening
|
||||
talking_frame = SpriteFrame(images=sprites) # Animation sequence for when bot is talking
|
||||
|
||||
|
||||
class TalkingAnimation(FrameProcessor):
|
||||
"""Manages the bot's visual animation states.
|
||||
|
||||
Switches between static (listening) and animated (talking) states based on
|
||||
the bot's current speaking status.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self._is_talking = False
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process incoming frames and update animation state.
|
||||
|
||||
Args:
|
||||
frame: The incoming frame to process
|
||||
direction: The direction of frame flow in the pipeline
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
# Switch to talking animation when bot starts speaking
|
||||
if isinstance(frame, BotStartedSpeakingFrame):
|
||||
if not self._is_talking:
|
||||
await self.push_frame(talking_frame)
|
||||
self._is_talking = True
|
||||
# Return to static frame when bot stops speaking
|
||||
elif isinstance(frame, BotStoppedSpeakingFrame):
|
||||
await self.push_frame(quiet_frame)
|
||||
self._is_talking = False
|
||||
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def run_bot(room_url: str, token: str):
|
||||
"""Main bot execution function.
|
||||
|
||||
Sets up and runs the bot pipeline including:
|
||||
- Daily video transport with specific audio parameters
|
||||
- Gemini Live multimodal model integration
|
||||
- Voice activity detection
|
||||
- Animation processing
|
||||
- RTVI event handling
|
||||
"""
|
||||
# Set up Daily transport with specific audio/video parameters for Gemini
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=576,
|
||||
vad_enabled=True,
|
||||
vad_audio_passthrough=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)),
|
||||
),
|
||||
)
|
||||
|
||||
# Initialize the Gemini Multimodal Live model
|
||||
llm = GeminiMultimodalLiveLLMService(
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
voice_id="Puck", # Aoede, Charon, Fenrir, Kore, Puck
|
||||
transcribe_user_audio=True,
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by introducing yourself.",
|
||||
},
|
||||
]
|
||||
|
||||
# Set up conversation context and management
|
||||
# The context_aggregator will automatically collect conversation context
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
ta = TalkingAnimation()
|
||||
|
||||
#
|
||||
# RTVI events for Pipecat client UI
|
||||
#
|
||||
rtvi = RTVIProcessor(config=RTVIConfig(config=[]))
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
rtvi,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
ta,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
observers=[RTVIObserver(rtvi)],
|
||||
)
|
||||
await task.queue_frame(quiet_frame)
|
||||
|
||||
@rtvi.event_handler("on_client_ready")
|
||||
async def on_client_ready(rtvi):
|
||||
await rtvi.set_bot_ready()
|
||||
# Kick off the conversation
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
print(f"Participant left: {participant}")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
226
examples/deployment/modal-example/server/src/bot_openai.py
Normal file
@@ -0,0 +1,226 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""OpenAI Bot Implementation.
|
||||
|
||||
This module implements a chatbot using OpenAI's GPT-4 model for natural language
|
||||
processing. It includes:
|
||||
- Real-time audio/video interaction through Daily
|
||||
- Animated robot avatar
|
||||
- Text-to-speech using ElevenLabs
|
||||
- Support for both English and Spanish
|
||||
|
||||
The bot runs as part of a pipeline that processes audio/video frames and manages
|
||||
the conversation flow.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from PIL import Image
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import (
|
||||
BotStartedSpeakingFrame,
|
||||
BotStoppedSpeakingFrame,
|
||||
Frame,
|
||||
OutputImageRawFrame,
|
||||
SpriteFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.processors.frameworks.rtvi import RTVIConfig, RTVIObserver, RTVIProcessor
|
||||
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
try:
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
except ValueError:
|
||||
# Handle the case where logger is already initialized
|
||||
pass
|
||||
|
||||
sprites = []
|
||||
script_dir = os.path.dirname(__file__)
|
||||
|
||||
# Load sequential animation frames
|
||||
for i in range(1, 26):
|
||||
# Build the full path to the image file
|
||||
full_path = os.path.join(script_dir, f"assets/robot0{i}.png")
|
||||
# Get the filename without the extension to use as the dictionary key
|
||||
# Open the image and convert it to bytes
|
||||
with Image.open(full_path) as img:
|
||||
sprites.append(OutputImageRawFrame(image=img.tobytes(), size=img.size, format=img.format))
|
||||
|
||||
# Create a smooth animation by adding reversed frames
|
||||
flipped = sprites[::-1]
|
||||
sprites.extend(flipped)
|
||||
|
||||
# Define static and animated states
|
||||
quiet_frame = sprites[0] # Static frame for when bot is listening
|
||||
talking_frame = SpriteFrame(images=sprites) # Animation sequence for when bot is talking
|
||||
|
||||
|
||||
class TalkingAnimation(FrameProcessor):
|
||||
"""Manages the bot's visual animation states.
|
||||
|
||||
Switches between static (listening) and animated (talking) states based on
|
||||
the bot's current speaking status.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self._is_talking = False
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process incoming frames and update animation state.
|
||||
|
||||
Args:
|
||||
frame: The incoming frame to process
|
||||
direction: The direction of frame flow in the pipeline
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
# Switch to talking animation when bot starts speaking
|
||||
if isinstance(frame, BotStartedSpeakingFrame):
|
||||
if not self._is_talking:
|
||||
await self.push_frame(talking_frame)
|
||||
self._is_talking = True
|
||||
# Return to static frame when bot stops speaking
|
||||
elif isinstance(frame, BotStoppedSpeakingFrame):
|
||||
await self.push_frame(quiet_frame)
|
||||
self._is_talking = False
|
||||
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def run_bot(room_url: str, token: str):
|
||||
"""Main bot execution function.
|
||||
|
||||
Sets up and runs the bot pipeline including:
|
||||
- Daily video transport
|
||||
- Speech-to-text and text-to-speech services
|
||||
- Language model integration
|
||||
- Animation processing
|
||||
- RTVI event handling
|
||||
"""
|
||||
# Set up Daily transport with video/audio parameters
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=576,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
# transcription_settings=DailyTranscriptionSettings(
|
||||
# language="es",
|
||||
# tier="nova",
|
||||
# model="2-general"
|
||||
# )
|
||||
),
|
||||
)
|
||||
|
||||
# Initialize text-to-speech service
|
||||
tts = ElevenLabsTTSService(
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
#
|
||||
# English
|
||||
#
|
||||
voice_id="SAz9YHcvj6GT2YYXdXww",
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
# model="eleven_multilingual_v2",
|
||||
# voice_id="gD1IexrzCvsXPHUuT0s3",
|
||||
)
|
||||
|
||||
# Initialize LLM service
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
#
|
||||
# English
|
||||
#
|
||||
"content": "You are an incessant one-upper. Start by asking the user how their day is going.",
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
# "content": "Eres Chatbot, un amigable y útil robot. Tu objetivo es demostrar tus capacidades de una manera breve. Tus respuestas se convertiran a audio así que nunca no debes incluir caracteres especiales. Contesta a lo que el usuario pregunte de una manera creativa, útil y breve. Empieza por presentarte a ti mismo.",
|
||||
},
|
||||
]
|
||||
|
||||
# Set up conversation context and management
|
||||
# The context_aggregator will automatically collect conversation context
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
ta = TalkingAnimation()
|
||||
|
||||
#
|
||||
# RTVI events for Pipecat client UI
|
||||
#
|
||||
rtvi = RTVIProcessor(config=RTVIConfig(config=[]))
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
rtvi,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
ta,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
observers=[RTVIObserver(rtvi)],
|
||||
)
|
||||
await task.queue_frame(quiet_frame)
|
||||
|
||||
@rtvi.event_handler("on_client_ready")
|
||||
async def on_client_ready(rtvi):
|
||||
await rtvi.set_bot_ready()
|
||||
# Kick off the conversation
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
print(f"Participant left: {participant}")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
239
examples/deployment/modal-example/server/src/bot_vllm.py
Normal file
@@ -0,0 +1,239 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""OpenAI Bot Implementation.
|
||||
|
||||
This module implements a chatbot using OpenAI's GPT-4 model for natural language
|
||||
processing. It includes:
|
||||
- Real-time audio/video interaction through Daily
|
||||
- Animated robot avatar
|
||||
- Text-to-speech using ElevenLabs
|
||||
- Support for both English and Spanish
|
||||
|
||||
The bot runs as part of a pipeline that processes audio/video frames and manages
|
||||
the conversation flow.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from typing import List
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from openai.types.chat import ChatCompletionMessageParam
|
||||
from PIL import Image
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import (
|
||||
BotStartedSpeakingFrame,
|
||||
BotStoppedSpeakingFrame,
|
||||
Frame,
|
||||
OutputImageRawFrame,
|
||||
SpriteFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.processors.frameworks.rtvi import RTVIConfig, RTVIObserver, RTVIProcessor
|
||||
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
try:
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
except ValueError:
|
||||
# Handle the case where logger is already initialized
|
||||
pass
|
||||
|
||||
# REPLACE WITH YOUR MODAL URL ENDPOINT
|
||||
modal_url = "https://<Modal workspace>--example-vllm-openai-compatible-serve.modal.run"
|
||||
api_key = os.getenv("VLLM_API_KEY", "super-secret-key")
|
||||
|
||||
|
||||
sprites = []
|
||||
script_dir = os.path.dirname(__file__)
|
||||
|
||||
# Load sequential animation frames
|
||||
for i in range(1, 26):
|
||||
# Build the full path to the image file
|
||||
full_path = os.path.join(script_dir, f"assets/robot0{i}.png")
|
||||
# Get the filename without the extension to use as the dictionary key
|
||||
# Open the image and convert it to bytes
|
||||
with Image.open(full_path) as img:
|
||||
sprites.append(OutputImageRawFrame(image=img.tobytes(), size=img.size, format=img.format))
|
||||
|
||||
# Create a smooth animation by adding reversed frames
|
||||
flipped = sprites[::-1]
|
||||
sprites.extend(flipped)
|
||||
|
||||
# Define static and animated states
|
||||
quiet_frame = sprites[0] # Static frame for when bot is listening
|
||||
talking_frame = SpriteFrame(images=sprites) # Animation sequence for when bot is talking
|
||||
|
||||
|
||||
class TalkingAnimation(FrameProcessor):
|
||||
"""Manages the bot's visual animation states.
|
||||
|
||||
Switches between static (listening) and animated (talking) states based on
|
||||
the bot's current speaking status.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self._is_talking = False
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process incoming frames and update animation state.
|
||||
|
||||
Args:
|
||||
frame: The incoming frame to process
|
||||
direction: The direction of frame flow in the pipeline
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
# Switch to talking animation when bot starts speaking
|
||||
if isinstance(frame, BotStartedSpeakingFrame):
|
||||
if not self._is_talking:
|
||||
await self.push_frame(talking_frame)
|
||||
self._is_talking = True
|
||||
# Return to static frame when bot stops speaking
|
||||
elif isinstance(frame, BotStoppedSpeakingFrame):
|
||||
await self.push_frame(quiet_frame)
|
||||
self._is_talking = False
|
||||
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def run_bot(room_url: str, token: str):
|
||||
"""Main bot execution function.
|
||||
|
||||
Sets up and runs the bot pipeline including:
|
||||
- Daily video transport
|
||||
- Speech-to-text and text-to-speech services
|
||||
- Language model integration
|
||||
- Animation processing
|
||||
- RTVI event handling
|
||||
"""
|
||||
# Set up Daily transport with video/audio parameters
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=576,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
# transcription_settings=DailyTranscriptionSettings(
|
||||
# language="es",
|
||||
# tier="nova",
|
||||
# model="2-general"
|
||||
# )
|
||||
),
|
||||
)
|
||||
|
||||
# Initialize text-to-speech service
|
||||
tts = ElevenLabsTTSService(
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
#
|
||||
# English
|
||||
#
|
||||
voice_id="D38z5RcWu1voky8WS1ja",
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
# model="eleven_multilingual_v2",
|
||||
# voice_id="gD1IexrzCvsXPHUuT0s3",
|
||||
)
|
||||
|
||||
# Initialize LLM service
|
||||
llm = OpenAILLMService(
|
||||
# To use OpenAI
|
||||
api_key=api_key,
|
||||
# Or, to use a local vLLM (or similar) api server
|
||||
model="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
|
||||
base_url=f"{modal_url}/v1",
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
#
|
||||
# English
|
||||
#
|
||||
"content": "You are a salesman for Modal, the cloud-native serverless Python computing platform.",
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
# "content": "Eres Chatbot, un amigable y útil robot. Tu objetivo es demostrar tus capacidades de una manera breve. Tus respuestas se convertiran a audio así que nunca no debes incluir caracteres especiales. Contesta a lo que el usuario pregunte de una manera creativa, útil y breve. Empieza por presentarte a ti mismo.",
|
||||
},
|
||||
]
|
||||
|
||||
# Set up conversation context and management
|
||||
# The context_aggregator will automatically collect conversation context
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
ta = TalkingAnimation()
|
||||
|
||||
#
|
||||
# RTVI events for Pipecat client UI
|
||||
#
|
||||
rtvi = RTVIProcessor(config=RTVIConfig(config=[]))
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
rtvi,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
ta,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
observers=[RTVIObserver(rtvi)],
|
||||
)
|
||||
await task.queue_frame(quiet_frame)
|
||||
|
||||
@rtvi.event_handler("on_client_ready")
|
||||
async def on_client_ready(rtvi):
|
||||
await rtvi.set_bot_ready()
|
||||
# Kick off the conversation
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
print(f"Participant left: {participant}")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
84
examples/deployment/modal-example/server/src/runner.py
Normal file
@@ -0,0 +1,84 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import importlib
|
||||
import os
|
||||
|
||||
|
||||
def get_bot_file(arg_bot: str | None) -> str:
|
||||
bot_implementation = arg_bot or os.getenv("BOT_IMPLEMENTATION", "openai").lower().strip()
|
||||
if not bot_implementation:
|
||||
bot_implementation = "openai"
|
||||
if bot_implementation not in ["openai", "gemini", "vllm"]:
|
||||
raise ValueError(
|
||||
f"Invalid BOT_IMPLEMENTATION: {bot_implementation}. Must be 'openai' or 'gemini'"
|
||||
)
|
||||
return f"bot_{bot_implementation}"
|
||||
|
||||
|
||||
def get_runner(bot_file: str):
|
||||
"""Dynamically import the run_bot function based on the bot name.
|
||||
|
||||
Args:
|
||||
bot_name (str): The name of the bot implementation (e.g., 'openai', 'gemini').
|
||||
|
||||
Returns:
|
||||
function: The run_bot function from the specified bot module.
|
||||
|
||||
Raises:
|
||||
ImportError: If the specified bot module or run_bot function is not found.
|
||||
"""
|
||||
try:
|
||||
# Dynamically construct the module name
|
||||
module_name = f"{bot_file}"
|
||||
# Import the module
|
||||
module = importlib.import_module(module_name)
|
||||
# Get the run_bot function from the module
|
||||
return getattr(module, "run_bot")
|
||||
except (ImportError, AttributeError) as e:
|
||||
raise ImportError(f"Failed to import run_bot from {module_name}: {e}")
|
||||
|
||||
|
||||
def main():
|
||||
"""Parse the args to launch the appropriate bot using the given room/token."""
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
parser.add_argument(
|
||||
"-u", "--url", type=str, required=False, help="URL of the Daily room to join"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-t",
|
||||
"--token",
|
||||
type=str,
|
||||
required=False,
|
||||
help="Daily room token",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-b",
|
||||
"--bot",
|
||||
type=str,
|
||||
required=False,
|
||||
help="Bot runner to use (e.g., openai, gemini)",
|
||||
)
|
||||
|
||||
args, unknown = parser.parse_known_args()
|
||||
|
||||
url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL")
|
||||
token = args.token or os.getenv("DAILY_SAMPLE_ROOM_TOKEN")
|
||||
bot_file = get_bot_file(args.bot)
|
||||
|
||||
if not url:
|
||||
raise Exception(
|
||||
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL."
|
||||
)
|
||||
|
||||
run_bot = get_runner(bot_file)
|
||||
asyncio.run(run_bot(url, token))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -100,7 +100,28 @@ phone numbers with valid values for your use case.
|
||||
|
||||
### Dialin Request
|
||||
|
||||
The server will receive a request when a call is received from Daily.
|
||||
The server will receive a request when a call is received from Daily.
|
||||
The payload that the webhook received is as follows:
|
||||
```json
|
||||
{
|
||||
// for dial-in from webhook
|
||||
"To": "+14152251493",
|
||||
"From": "+14158483432",
|
||||
"callId": "string-contains-uuid",
|
||||
"callDomain": "string-contains-uuid",
|
||||
"sipHeaders": {
|
||||
"X-My-Custom-Header": "value",
|
||||
"x-caller": "+1234567890",
|
||||
"x-called": "+1987654321",
|
||||
},
|
||||
}
|
||||
```
|
||||
The `To`, `From`, `callId`, `callDomain` fields are converted to
|
||||
`snake_case` and mapped to `dialin_settings`. In addition, `sipHeader`
|
||||
contains any custom SIP headers received by Daily on the SIP
|
||||
interconnect address (`sip_uri`). These are headers sent from
|
||||
Twilio or other external SIP platforms, for example, to send the
|
||||
caller's phone number.
|
||||
|
||||
### Dialout Request
|
||||
|
||||
@@ -158,6 +179,7 @@ curl -X POST http://localhost:3000/api/dial \
|
||||
"From": "+1987654321",
|
||||
"callId": "call-uuid-123",
|
||||
"callDomain": "domain-uuid-456",
|
||||
"sipHeader": {},
|
||||
"dialout_settings": [
|
||||
{
|
||||
"phoneNumber": "+1234567890",
|
||||
|
||||
@@ -39,6 +39,11 @@ class RoomRequest(BaseModel):
|
||||
None, description="A flag to perform voicemail or answeing-machine detection"
|
||||
)
|
||||
call_transfer: Optional[Dict[str, Any]] = Field(None, description="to initiate a call transfer")
|
||||
sipHeaders: Optional[Dict[str, Any]] = Field(
|
||||
None,
|
||||
alias="sip_headers",
|
||||
description="Custom SIP headers received from the external SIP provider",
|
||||
)
|
||||
|
||||
class Config:
|
||||
populate_by_name = True
|
||||
@@ -57,6 +62,14 @@ class RoomRequest(BaseModel):
|
||||
"callDomain": "string-contains-uuid"
|
||||
These need to be remapped to dialin_settings
|
||||
|
||||
In addition, we may receive in the body that can be
|
||||
sent to the bot as a custom field, sip_headers
|
||||
"sipHeaders": {
|
||||
"X-My-Custom-Header": "value",
|
||||
"x-caller": "+14158483432",
|
||||
"x-called": "+14152251493",
|
||||
},
|
||||
|
||||
"dialout_settings": [
|
||||
{"phoneNumber": "+14158483432", "callerId": "+14152251493"},
|
||||
{"sipUri": "sip:username@sip.hostname"}
|
||||
@@ -157,6 +170,7 @@ async def dial(request: RoomRequest, raw_request: Request):
|
||||
"dialout_settings": request.dialout_settings,
|
||||
"voicemail_detection": request.voicemail_detection,
|
||||
"call_transfer": request.call_transfer,
|
||||
"sip_headers": request.sipHeaders, # passing the SIP headers to the bot
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -65,6 +65,7 @@ export default async function handler(req, res) {
|
||||
From,
|
||||
callId,
|
||||
callDomain,
|
||||
sipHeaders,
|
||||
dialout_settings,
|
||||
voicemail_detection,
|
||||
call_transfer
|
||||
@@ -117,6 +118,7 @@ export default async function handler(req, res) {
|
||||
dialout_settings,
|
||||
voicemail_detection,
|
||||
call_transfer,
|
||||
sip_headers: sipHeaders,
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
111
examples/foundational/04c-transports-daily-audio-source.py
Normal file
@@ -0,0 +1,111 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from daily_runner import configure
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService, Language, LiveOptions
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_in_passthrough=False,
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=16000,
|
||||
transcription_enabled=False,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
stt = DeepgramSTTService(
|
||||
api_key=os.getenv("DEEPGRAM_API_KEY"),
|
||||
live_options=LiveOptions(language=Language.EN),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt,
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_audio(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
119
examples/foundational/29-turn-tracking-observer.py
Normal file
@@ -0,0 +1,119 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.network.small_webrtc import SmallWebRTCTransport
|
||||
from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
transport = SmallWebRTCTransport(
|
||||
webrtc_connection=webrtc_connection,
|
||||
params=TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt,
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
turn_observer = task.turn_tracking_observer
|
||||
if turn_observer:
|
||||
|
||||
@turn_observer.event_handler("on_turn_started")
|
||||
async def on_turn_started(observer, turn_number):
|
||||
logger.info(f"🔄 Turn {turn_number} started")
|
||||
|
||||
@turn_observer.event_handler("on_turn_ended")
|
||||
async def on_turn_ended(observer, turn_number, duration, was_interrupted):
|
||||
if was_interrupted:
|
||||
logger.info(f"🔄 Turn {turn_number} interrupted after {duration:.2f}s")
|
||||
else:
|
||||
logger.info(f"🏁 Turn {turn_number} completed in {duration:.2f}s")
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
|
||||
@transport.event_handler("on_client_closed")
|
||||
async def on_client_closed(transport, client):
|
||||
logger.info(f"Client closed connection")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=False)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from run import main
|
||||
|
||||
main()
|
||||
@@ -11,18 +11,17 @@ from pathlib import Path
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from openai import audio
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import Frame
|
||||
from pipecat.observers.base_observer import BaseObserver, FramePushed
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.google.llm import GoogleLLMService, LLMSearchResponseFrame
|
||||
from pipecat.services.llm_service import LLMService
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.network.small_webrtc import SmallWebRTCTransport
|
||||
from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection
|
||||
@@ -33,7 +32,7 @@ load_dotenv(override=True)
|
||||
|
||||
|
||||
# Function handlers for the LLM
|
||||
search_tool = {"google_search_retrieval": {}}
|
||||
search_tool = {"google_search": {}}
|
||||
tools = [search_tool]
|
||||
|
||||
system_instruction = """
|
||||
@@ -50,14 +49,22 @@ Start each interaction by asking the user about which place they would like to k
|
||||
"""
|
||||
|
||||
|
||||
class LLMSearchLoggerProcessor(FrameProcessor):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
class LLMSearchLoggerObserver(BaseObserver):
|
||||
async def on_push_frame(self, data: FramePushed):
|
||||
src = data.source
|
||||
dst = data.destination
|
||||
frame = data.frame
|
||||
timestamp = data.timestamp
|
||||
|
||||
if not isinstance(src, LLMService) and not isinstance(dst, LLMService):
|
||||
return
|
||||
|
||||
time_sec = timestamp / 1_000_000_000
|
||||
|
||||
arrow = "→"
|
||||
|
||||
if isinstance(frame, LLMSearchResponseFrame):
|
||||
print(f"LLMSearchLoggerProcessor: {frame}")
|
||||
|
||||
await self.push_frame(frame)
|
||||
logger.debug(f"🧠 {arrow} {dst} LLM SEARCH RESPONSE FRAME: {frame} at {time_sec:.2f}s")
|
||||
|
||||
|
||||
async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace):
|
||||
@@ -84,7 +91,6 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
system_instruction=system_instruction,
|
||||
tools=tools,
|
||||
model="gemini-1.5-flash-002",
|
||||
)
|
||||
|
||||
context = OpenAILLMContext(
|
||||
@@ -97,22 +103,23 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac
|
||||
)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
llm_search_logger = LLMSearchLoggerProcessor()
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
llm_search_logger,
|
||||
tts,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(allow_interruptions=True),
|
||||
observers=[LLMSearchLoggerObserver()],
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import io
|
||||
import os
|
||||
@@ -80,7 +81,7 @@ class UrlToImageProcessor(FrameProcessor):
|
||||
logger.error(error_msg)
|
||||
|
||||
|
||||
async def run_bot(webrtc_connection: SmallWebRTCConnection):
|
||||
async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
transport = SmallWebRTCTransport(
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
|
||||
@@ -28,7 +29,7 @@ from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
async def run_bot(webrtc_connection: SmallWebRTCConnection):
|
||||
async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
transport = SmallWebRTCTransport(
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import io
|
||||
import os
|
||||
@@ -81,7 +82,7 @@ class UrlToImageProcessor(FrameProcessor):
|
||||
logger.error(error_msg)
|
||||
|
||||
|
||||
async def run_bot(webrtc_connection: SmallWebRTCConnection):
|
||||
async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
transport = SmallWebRTCTransport(
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
"@types/node": "^22.13.1",
|
||||
"@vitejs/plugin-react-swc": "^3.7.2",
|
||||
"typescript": "^5.7.3",
|
||||
"vite": "^6.0.2"
|
||||
"vite": "^6.3.5"
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/runtime": {
|
||||
@@ -1370,9 +1370,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/vite": {
|
||||
"version": "6.3.3",
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-6.3.3.tgz",
|
||||
"integrity": "sha512-5nXH+QsELbFKhsEfWLkHrvgRpTdGJzqOZ+utSdmPTvwHmvU6ITTm3xx+mRusihkcI8GeC7lCDyn3kDtiki9scw==",
|
||||
"version": "6.3.5",
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-6.3.5.tgz",
|
||||
"integrity": "sha512-cZn6NDFE7wdTpINgs++ZJ4N49W2vRp8LCKrn3Ob1kYNtOo21vfDoaV5GzBfLU4MovSAB8uNRm4jgzVQZ+mBzPQ==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"esbuild": "^0.25.0",
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
"@types/node": "^22.13.1",
|
||||
"@vitejs/plugin-react-swc": "^3.7.2",
|
||||
"typescript": "^5.7.3",
|
||||
"vite": "^6.0.2"
|
||||
"vite": "^6.3.5"
|
||||
},
|
||||
"dependencies": {
|
||||
"@pipecat-ai/client-js": "^0.3.5",
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
"@pipecat-ai/daily-transport": "^0.3.8"
|
||||
},
|
||||
"devDependencies": {
|
||||
"vite": "^6.0.9"
|
||||
"vite": "^6.3.5"
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/runtime": {
|
||||
@@ -1114,9 +1114,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/vite": {
|
||||
"version": "6.3.3",
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-6.3.3.tgz",
|
||||
"integrity": "sha512-5nXH+QsELbFKhsEfWLkHrvgRpTdGJzqOZ+utSdmPTvwHmvU6ITTm3xx+mRusihkcI8GeC7lCDyn3kDtiki9scw==",
|
||||
"version": "6.3.5",
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-6.3.5.tgz",
|
||||
"integrity": "sha512-cZn6NDFE7wdTpINgs++ZJ4N49W2vRp8LCKrn3Ob1kYNtOo21vfDoaV5GzBfLU4MovSAB8uNRm4jgzVQZ+mBzPQ==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"esbuild": "^0.25.0",
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
"license": "ISC",
|
||||
"description": "",
|
||||
"devDependencies": {
|
||||
"vite": "^6.0.9"
|
||||
"vite": "^6.3.5"
|
||||
},
|
||||
"dependencies": {
|
||||
"@pipecat-ai/client-js": "^0.3.5",
|
||||
|
||||
@@ -102,9 +102,9 @@ async def main():
|
||||
|
||||
llm = GoogleLLMService(
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
model="gemini-1.5-flash-002",
|
||||
system_instruction=system_instruction,
|
||||
tools=tools,
|
||||
model="gemini-1.5-flash",
|
||||
)
|
||||
|
||||
context = OpenAILLMContext(
|
||||
@@ -153,7 +153,6 @@ async def main():
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
logger.debug("First participant joined: {}", participant["id"])
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
|
||||
169
examples/open-telemetry-tracing/README.md
Normal file
@@ -0,0 +1,169 @@
|
||||
# OpenTelemetry Tracing for Pipecat
|
||||
|
||||
This demo showcases OpenTelemetry tracing integration for Pipecat services, allowing you to visualize service calls, performance metrics, and dependencies in a Jaeger dashboard.
|
||||
|
||||
## Features
|
||||
|
||||
- **Hierarchical Tracing**: Track entire conversations, turns, and service calls
|
||||
- **Service Tracing**: Detailed spans for TTS, STT, and LLM services with rich context
|
||||
- **TTFB Metrics**: Capture Time To First Byte metrics for latency analysis
|
||||
- **Usage Statistics**: Track character counts for TTS and token usage for LLMs
|
||||
- **Flexible Exporters**: Use Jaeger, Zipkin, or any OpenTelemetry-compatible backend
|
||||
|
||||
## Trace Structure
|
||||
|
||||
Traces are organized hierarchically:
|
||||
|
||||
```
|
||||
Conversation (conversation-uuid)
|
||||
├── turn-1
|
||||
│ ├── stt_deepgramsttservice
|
||||
│ ├── llm_openaillmservice
|
||||
│ └── tts_cartesiattsservice
|
||||
└── turn-2
|
||||
├── stt_deepgramsttservice
|
||||
├── llm_openaillmservice
|
||||
└── tts_cartesiattsservice
|
||||
turn-N
|
||||
└── ...
|
||||
```
|
||||
|
||||
This organization helps you track conversation-to-conversation and turn-to-turn.
|
||||
|
||||
## Setup Instructions
|
||||
|
||||
### 1. Start the Jaeger Container
|
||||
|
||||
Run Jaeger in Docker to collect and visualize traces:
|
||||
|
||||
```bash
|
||||
docker run -d --name jaeger \
|
||||
-e COLLECTOR_ZIPKIN_HOST_PORT=:9411 \
|
||||
-p 16686:16686 \
|
||||
-p 4317:4317 \
|
||||
-p 4318:4318 \
|
||||
jaegertracing/all-in-one:latest
|
||||
```
|
||||
|
||||
### 2. Environment Configuration
|
||||
|
||||
Create a `.env` file with your API keys and enable tracing:
|
||||
|
||||
```
|
||||
ENABLE_TRACING=true
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 # Point to your preferred backend
|
||||
# OTEL_CONSOLE_EXPORT=true # Set to any value for debug output to console
|
||||
|
||||
# Service API keys
|
||||
DEEPGRAM_API_KEY=your_key_here
|
||||
CARTESIA_API_KEY=your_key_here
|
||||
OPENAI_API_KEY=your_key_here
|
||||
```
|
||||
|
||||
### 3. Configure Your Pipeline Task
|
||||
|
||||
Enable tracing in your Pipecat application:
|
||||
|
||||
```python
|
||||
# Initialize OpenTelemetry with your chosen exporter
|
||||
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
|
||||
|
||||
exporter = OTLPSpanExporter(
|
||||
endpoint="http://localhost:4317", # Jaeger OTLP endpoint
|
||||
insecure=True,
|
||||
)
|
||||
|
||||
setup_tracing(
|
||||
service_name="pipecat-demo",
|
||||
exporter=exporter,
|
||||
console_export=os.getenv("OTEL_CONSOLE_EXPORT", "false").lower() == "true",
|
||||
)
|
||||
|
||||
# Enable tracing in your PipelineTask
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True, # Required for some service metrics
|
||||
),
|
||||
enable_tracing=True, # Enables both turn and conversation tracing
|
||||
conversation_id="customer-123", # Optional - will auto-generate if not provided
|
||||
)
|
||||
```
|
||||
|
||||
### 4. Exporter Options
|
||||
|
||||
While this demo uses Jaeger, you can configure any OpenTelemetry-compatible exporter:
|
||||
|
||||
#### Jaeger (Default for the demo)
|
||||
|
||||
```python
|
||||
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
|
||||
|
||||
exporter = OTLPSpanExporter(
|
||||
endpoint="http://localhost:4317", # Jaeger OTLP endpoint
|
||||
insecure=True,
|
||||
)
|
||||
```
|
||||
|
||||
#### Cloud Providers
|
||||
|
||||
Many cloud providers offer OpenTelemetry-compatible observability services:
|
||||
|
||||
- AWS X-Ray
|
||||
- Google Cloud Trace
|
||||
- Azure Monitor
|
||||
- Datadog APM
|
||||
|
||||
See the OpenTelemetry documentation for specific exporter configurations:
|
||||
https://opentelemetry.io/ecosystem/vendors/
|
||||
|
||||
### 5. Install Dependencies
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
### 6. Run the Demo
|
||||
|
||||
```bash
|
||||
python bot.py
|
||||
```
|
||||
|
||||
### 7. View Traces in Jaeger
|
||||
|
||||
Open your browser to [http://localhost:16686](http://localhost:16686) and select the "pipecat-demo" service to view traces.
|
||||
|
||||
## Understanding the Traces
|
||||
|
||||
- **Conversation Spans**: The top-level span representing an entire conversation
|
||||
- **Turn Spans**: Child spans of conversations that represent each turn in the dialog
|
||||
- **Service Spans**: Detailed service operations nested under turns
|
||||
- **Service Attributes**: Each service includes rich context about its operation:
|
||||
- **TTS**: Voice ID, character count, service type
|
||||
- **STT**: Transcription text, language, model
|
||||
- **LLM**: Messages, tokens used, model, service configuration
|
||||
- **Metrics**: Performance data like `metrics.ttfb_ms` and processing durations
|
||||
|
||||
## How It Works
|
||||
|
||||
The tracing system consists of:
|
||||
|
||||
1. **TurnTrackingObserver**: Detects conversation turns
|
||||
2. **TurnTraceObserver**: Creates spans for turns and conversations
|
||||
3. **Service Decorators**: `@traced_tts`, `@traced_stt`, `@traced_llm` for service-specific tracing
|
||||
4. **Context Providers**: Share context between different parts of the pipeline
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
- **No Traces in Jaeger**: Ensure the Docker container is running and the OTLP endpoint is correct
|
||||
- **Debugging Traces**: Set `OTEL_CONSOLE_EXPORT=true` to print traces to the console for debugging
|
||||
- **Missing Metrics**: Check that `enable_metrics=True` in PipelineParams
|
||||
- **Connection Errors**: Verify network connectivity to the Jaeger container
|
||||
- **Exporter Issues**: Try the Console exporter (`OTEL_CONSOLE_EXPORT=true`) to verify tracing works
|
||||
- **Other Backends**: If using a different backend, ensure you've configured the correct exporter and endpoint
|
||||
|
||||
## References
|
||||
|
||||
- [OpenTelemetry Python Documentation](https://opentelemetry-python.readthedocs.io/)
|
||||
- [Jaeger Documentation](https://www.jaegertracing.io/docs/latest/)
|
||||
159
examples/open-telemetry-tracing/bot.py
Normal file
@@ -0,0 +1,159 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.network.small_webrtc import SmallWebRTCTransport
|
||||
from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection
|
||||
from pipecat.utils.tracing.setup import setup_tracing
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
IS_TRACING_ENABLED = bool(os.getenv("ENABLE_TRACING"))
|
||||
|
||||
# Initialize tracing if enabled
|
||||
if IS_TRACING_ENABLED:
|
||||
# Create the exporter
|
||||
otlp_exporter = OTLPSpanExporter(
|
||||
endpoint=os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4317"),
|
||||
insecure=True,
|
||||
)
|
||||
|
||||
# Set up tracing with the exporter
|
||||
setup_tracing(
|
||||
service_name="pipecat-demo",
|
||||
exporter=otlp_exporter,
|
||||
console_export=bool(os.getenv("OTEL_CONSOLE_EXPORT")),
|
||||
)
|
||||
logger.info("OpenTelemetry tracing initialized")
|
||||
|
||||
|
||||
async def fetch_weather_from_api(params: FunctionCallParams):
|
||||
await params.llm.push_frame(TTSSpeakFrame("Let me check on that."))
|
||||
await params.result_callback({"conditions": "nice", "temperature": "75"})
|
||||
|
||||
|
||||
async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
transport = SmallWebRTCTransport(
|
||||
webrtc_connection=webrtc_connection,
|
||||
params=TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"), params=OpenAILLMService.InputParams(temperature=0.5)
|
||||
)
|
||||
|
||||
# You can also register a function_name of None to get all functions
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function("get_current_weather", fetch_weather_from_api)
|
||||
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location", "format"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function])
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
enable_tracing=IS_TRACING_ENABLED,
|
||||
# Optionally, add a conversation ID to track the conversation
|
||||
# conversation_id="8df26cc1-6db0-4a7a-9930-1e037c8f1fa2",
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
|
||||
@transport.event_handler("on_client_closed")
|
||||
async def on_client_closed(transport, client):
|
||||
logger.info(f"Client closed connection")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=False)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from run import main
|
||||
|
||||
main()
|
||||
10
examples/open-telemetry-tracing/env.example
Normal file
@@ -0,0 +1,10 @@
|
||||
DEEPGRAM_API_KEY=your_deepgram_key
|
||||
CARTESIA_API_KEY=your_cartesia_key
|
||||
OPENAI_API_KEY=your_openai_key
|
||||
|
||||
# Set to any value to enable tracing
|
||||
ENABLE_TRACING=true
|
||||
# OTLP endpoint (defaults to localhost:4317 if not set)
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317
|
||||
# Set to any value to enable console output for debugging
|
||||
# OTEL_CONSOLE_EXPORT=true
|
||||
6
examples/open-telemetry-tracing/requirements.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
fastapi
|
||||
uvicorn
|
||||
python-dotenv
|
||||
pipecat-ai[webrtc,silero,cartesia,deepgram,openai,tracing]
|
||||
pipecat-ai-small-webrtc-prebuilt
|
||||
opentelemetry-exporter-otlp-proto-grpc
|
||||
205
examples/open-telemetry-tracing/run.py
Normal file
@@ -0,0 +1,205 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import importlib.util
|
||||
import os
|
||||
import sys
|
||||
from contextlib import asynccontextmanager
|
||||
from inspect import iscoroutinefunction, signature
|
||||
from typing import Any, Callable, Dict, Optional, Tuple
|
||||
|
||||
import uvicorn
|
||||
from dotenv import load_dotenv
|
||||
from fastapi import BackgroundTasks, FastAPI
|
||||
from fastapi.responses import RedirectResponse
|
||||
from loguru import logger
|
||||
from pipecat_ai_small_webrtc_prebuilt.frontend import SmallWebRTCPrebuiltUI
|
||||
|
||||
from pipecat.transports.network.webrtc_connection import IceServer, SmallWebRTCConnection
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv(override=True)
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
# Store connections by pc_id
|
||||
pcs_map: Dict[str, SmallWebRTCConnection] = {}
|
||||
|
||||
ice_servers = [
|
||||
IceServer(
|
||||
urls="stun:stun.l.google.com:19302",
|
||||
)
|
||||
]
|
||||
|
||||
# Mount the frontend at /
|
||||
app.mount("/client", SmallWebRTCPrebuiltUI)
|
||||
|
||||
# Store program arguments
|
||||
args: argparse.Namespace = argparse.Namespace()
|
||||
|
||||
# Store the bot module and function info
|
||||
bot_module: Any = None
|
||||
run_bot_func: Optional[Callable] = None
|
||||
is_webrtc_bot: bool = True
|
||||
|
||||
|
||||
def import_bot_file(file_path: str) -> Tuple[Any, Callable, bool]:
|
||||
"""Dynamically import the bot file and determine how to run it.
|
||||
|
||||
Returns:
|
||||
tuple: (module, run_function, is_webrtc_bot)
|
||||
- module: The imported module
|
||||
- run_function: Either run_bot or main function
|
||||
- is_webrtc_bot: True if run_bot function exists and accepts a WebRTC connection
|
||||
"""
|
||||
if not os.path.exists(file_path):
|
||||
raise FileNotFoundError(f"Bot file not found: {file_path}")
|
||||
|
||||
# Extract module name without extension
|
||||
module_name = os.path.splitext(os.path.basename(file_path))[0]
|
||||
|
||||
# Load the module
|
||||
spec = importlib.util.spec_from_file_location(module_name, file_path)
|
||||
if not spec or not spec.loader:
|
||||
raise ImportError(f"Could not load spec for {file_path}")
|
||||
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
sys.modules[module_name] = module
|
||||
spec.loader.exec_module(module)
|
||||
|
||||
# Check for run_bot function first
|
||||
if hasattr(module, "run_bot"):
|
||||
run_func = module.run_bot
|
||||
# Check if the function accepts a WebRTC connection
|
||||
sig = signature(run_func)
|
||||
is_webrtc = len(sig.parameters) > 0
|
||||
return module, run_func, is_webrtc
|
||||
|
||||
# Fall back to main function
|
||||
if hasattr(module, "main") and iscoroutinefunction(module.main):
|
||||
return module, module.main, False
|
||||
|
||||
raise AttributeError(f"No run_bot or async main function found in {file_path}")
|
||||
|
||||
|
||||
@app.get("/", include_in_schema=False)
|
||||
async def root_redirect():
|
||||
return RedirectResponse(url="/client/")
|
||||
|
||||
|
||||
@app.post("/api/offer")
|
||||
async def offer(request: dict, background_tasks: BackgroundTasks):
|
||||
global run_bot_func, is_webrtc_bot
|
||||
|
||||
if not run_bot_func:
|
||||
raise RuntimeError("No bot file has been loaded")
|
||||
|
||||
if not is_webrtc_bot:
|
||||
return {
|
||||
"error": "This bot doesn't support WebRTC connections, it's running in standalone mode"
|
||||
}
|
||||
|
||||
pc_id = request.get("pc_id")
|
||||
|
||||
if pc_id and pc_id in pcs_map:
|
||||
pipecat_connection = pcs_map[pc_id]
|
||||
logger.info(f"Reusing existing connection for pc_id: {pc_id}")
|
||||
await pipecat_connection.renegotiate(
|
||||
sdp=request["sdp"], type=request["type"], restart_pc=request.get("restart_pc", False)
|
||||
)
|
||||
else:
|
||||
pipecat_connection = SmallWebRTCConnection(ice_servers)
|
||||
await pipecat_connection.initialize(sdp=request["sdp"], type=request["type"])
|
||||
|
||||
@pipecat_connection.event_handler("closed")
|
||||
async def handle_disconnected(webrtc_connection: SmallWebRTCConnection):
|
||||
logger.info(f"Discarding peer connection for pc_id: {webrtc_connection.pc_id}")
|
||||
pcs_map.pop(webrtc_connection.pc_id, None)
|
||||
|
||||
# We've already checked that run_bot_func exists
|
||||
assert run_bot_func is not None
|
||||
background_tasks.add_task(run_bot_func, pipecat_connection, args)
|
||||
|
||||
answer = pipecat_connection.get_answer()
|
||||
# Updating the peer connection inside the map
|
||||
pcs_map[answer["pc_id"]] = pipecat_connection
|
||||
|
||||
return answer
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
yield # Run app
|
||||
coros = [pc.close() for pc in pcs_map.values()]
|
||||
await asyncio.gather(*coros)
|
||||
pcs_map.clear()
|
||||
|
||||
|
||||
async def run_standalone_bot() -> None:
|
||||
"""Run a standalone bot that doesn't require WebRTC"""
|
||||
global run_bot_func
|
||||
if run_bot_func is not None:
|
||||
await run_bot_func()
|
||||
else:
|
||||
raise RuntimeError("No bot function available to run")
|
||||
|
||||
|
||||
def main(parser: Optional[argparse.ArgumentParser] = None):
|
||||
global args
|
||||
|
||||
if not parser:
|
||||
parser = argparse.ArgumentParser(description="Pipecat Bot Runner")
|
||||
parser.add_argument("bot_file", nargs="?", help="Path to the bot file", default=None)
|
||||
parser.add_argument(
|
||||
"--host", default="localhost", help="Host for HTTP server (default: localhost)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--port", type=int, default=7860, help="Port for HTTP server (default: 7860)"
|
||||
)
|
||||
parser.add_argument("--verbose", "-v", action="count", default=0)
|
||||
args = parser.parse_args()
|
||||
|
||||
logger.remove(0)
|
||||
if args.verbose:
|
||||
logger.add(sys.stderr, level="TRACE")
|
||||
else:
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
# Infer the bot file from the caller if not provided explicitly
|
||||
bot_file = args.bot_file
|
||||
if bot_file is None:
|
||||
# Get the __file__ of the script that called main()
|
||||
import inspect
|
||||
|
||||
caller_frame = inspect.stack()[1]
|
||||
caller_globals = caller_frame.frame.f_globals
|
||||
bot_file = caller_globals.get("__file__")
|
||||
|
||||
if not bot_file:
|
||||
print("❌ Could not determine the bot file. Pass it explicitly to main().")
|
||||
sys.exit(1)
|
||||
|
||||
# Import the bot file
|
||||
try:
|
||||
global run_bot_func, bot_module, is_webrtc_bot
|
||||
bot_module, run_bot_func, is_webrtc_bot = import_bot_file(bot_file)
|
||||
logger.info(f"Successfully loaded bot from {bot_file}")
|
||||
|
||||
if is_webrtc_bot:
|
||||
logger.info("Detected WebRTC-compatible bot, starting web server...")
|
||||
uvicorn.run(app, host=args.host, port=args.port)
|
||||
else:
|
||||
logger.info("Detected standalone bot, running directly...")
|
||||
asyncio.run(run_standalone_bot())
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading bot file: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -16,7 +16,7 @@
|
||||
"@types/node": "^22.13.1",
|
||||
"@vitejs/plugin-react-swc": "^3.7.2",
|
||||
"typescript": "^5.7.3",
|
||||
"vite": "^6.0.2"
|
||||
"vite": "^6.3.5"
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/runtime": {
|
||||
@@ -1371,9 +1371,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/vite": {
|
||||
"version": "6.3.3",
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-6.3.3.tgz",
|
||||
"integrity": "sha512-5nXH+QsELbFKhsEfWLkHrvgRpTdGJzqOZ+utSdmPTvwHmvU6ITTm3xx+mRusihkcI8GeC7lCDyn3kDtiki9scw==",
|
||||
"version": "6.3.5",
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-6.3.5.tgz",
|
||||
"integrity": "sha512-cZn6NDFE7wdTpINgs++ZJ4N49W2vRp8LCKrn3Ob1kYNtOo21vfDoaV5GzBfLU4MovSAB8uNRm4jgzVQZ+mBzPQ==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"esbuild": "^0.25.0",
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
"@types/node": "^22.13.1",
|
||||
"@vitejs/plugin-react-swc": "^3.7.2",
|
||||
"typescript": "^5.7.3",
|
||||
"vite": "^6.0.2"
|
||||
"vite": "^6.3.5"
|
||||
},
|
||||
"dependencies": {
|
||||
"@pipecat-ai/client-js": "^0.3.2",
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
"@pipecat-ai/daily-transport": "^0.3.8"
|
||||
},
|
||||
"devDependencies": {
|
||||
"vite": "^6.0.9"
|
||||
"vite": "^6.3.5"
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/runtime": {
|
||||
@@ -1114,9 +1114,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/vite": {
|
||||
"version": "6.3.3",
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-6.3.3.tgz",
|
||||
"integrity": "sha512-5nXH+QsELbFKhsEfWLkHrvgRpTdGJzqOZ+utSdmPTvwHmvU6ITTm3xx+mRusihkcI8GeC7lCDyn3kDtiki9scw==",
|
||||
"version": "6.3.5",
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-6.3.5.tgz",
|
||||
"integrity": "sha512-cZn6NDFE7wdTpINgs++ZJ4N49W2vRp8LCKrn3Ob1kYNtOo21vfDoaV5GzBfLU4MovSAB8uNRm4jgzVQZ+mBzPQ==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"esbuild": "^0.25.0",
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
"license": "ISC",
|
||||
"description": "",
|
||||
"devDependencies": {
|
||||
"vite": "^6.0.9"
|
||||
"vite": "^6.3.5"
|
||||
},
|
||||
"dependencies": {
|
||||
"@pipecat-ai/client-js": "^0.3.5",
|
||||
|
||||
5
examples/simple-chatbot/client/react-native/.gitignore
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
ios
|
||||
android
|
||||
node_modules
|
||||
.expo
|
||||
.env
|
||||
1
examples/simple-chatbot/client/react-native/.nvmrc
Normal file
@@ -0,0 +1 @@
|
||||
22.14
|
||||
21
examples/simple-chatbot/client/react-native/App.js
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
// Disabling the logs from react-native-webrtc
|
||||
import debug from 'debug';
|
||||
debug.disable('rn-webrtc:*');
|
||||
|
||||
// Ignoring the warnings from react-native-background-timer while they don't fix this issue:
|
||||
// https://github.com/ocetnik/react-native-background-timer/issues/366
|
||||
import { LogBox } from 'react-native';
|
||||
LogBox.ignoreLogs([
|
||||
"`new NativeEventEmitter()` was called with a non-null argument without the required `addListener` method.",
|
||||
"`new NativeEventEmitter()` was called with a non-null argument without the required `removeListeners` method."
|
||||
]);
|
||||
|
||||
// Enable debug logs
|
||||
/*window.localStorage = window.localStorage || {};
|
||||
window.localStorage.debug = '*';
|
||||
window.localStorage.getItem = (itemName) => {
|
||||
console.log('Requesting the localStorage item ', itemName);
|
||||
return window.localStorage[itemName];
|
||||
};*/
|
||||
|
||||
export { default } from './src/App';
|
||||
89
examples/simple-chatbot/client/react-native/README.md
Normal file
@@ -0,0 +1,89 @@
|
||||
# React Native implementation
|
||||
|
||||
Basic implementation using the [Pipecat RN SDK](https://docs.pipecat.ai/client/react-native/introduction).
|
||||
|
||||
## Usage
|
||||
|
||||
### Expo requirements
|
||||
|
||||
This project cannot be used with an [Expo Go](https://docs.expo.dev/workflow/expo-go/) app because [it requires custom native code](https://docs.expo.io/workflow/customizing/).
|
||||
|
||||
When a project requires custom native code or a config plugin, we need to transition from using [Expo Go](https://docs.expo.dev/workflow/expo-go/)
|
||||
to a [development build](https://docs.expo.dev/development/introduction/).
|
||||
|
||||
More details about the custom native code used by this demo can be found in [rn-daily-js-expo-config-plugin](https://github.com/daily-co/rn-daily-js-expo-config-plugin).
|
||||
|
||||
### Building remotely
|
||||
|
||||
If you do not have experience with Xcode and Android Studio builds or do not have them installed locally on your computer, you will need to follow [this guide from Expo to use EAS Build](https://docs.expo.dev/development/create-development-builds/#create-and-install-eas-build).
|
||||
|
||||
### Building locally
|
||||
|
||||
You will need to have installed locally on your computer:
|
||||
- [Xcode](https://developer.apple.com/xcode/) to build for iOS;
|
||||
- [Android Studio](https://developer.android.com/studio) to build for Android;
|
||||
|
||||
#### Install the demo dependencies
|
||||
|
||||
```bash
|
||||
# Use the version of node specified in .nvmrc
|
||||
nvm i
|
||||
|
||||
# Install dependencies
|
||||
yarn install
|
||||
|
||||
# Before a native app can be compiled, the native source code must be generated.
|
||||
npx expo prebuild
|
||||
```
|
||||
|
||||
#### Running on Android
|
||||
|
||||
After plugging in an Android device [configured for debugging](https://developer.android.com/studio/debug/dev-options), run the following command:
|
||||
|
||||
```
|
||||
npm run android
|
||||
```
|
||||
|
||||
#### Running on iOS
|
||||
|
||||
First, you'll need to do a one-time setup. This is required to build to a physical device.
|
||||
|
||||
If you're familiar with Xcode, open `ios/RNSimpleChatbot.xcworkspace` and, in the target settings, provide a development team registered with Apple.
|
||||
|
||||
If you're newer to Xcode, here are some more detailed instructions to get you started.
|
||||
|
||||
First, open the project in Xcode. Make sure to specifically select `RNSimpleChatbot.xcworkspace` from `/ios`. The `/ios` directory will have been generated by running `npx expo prebuild` as instructed above. This is also a good time to plug in your iOS device to be sure the following steps are successful.
|
||||
|
||||
From the main menu, select `Settings` and then `Accounts`. Click the `+` sign to add an account (e.g. an Apple ID).
|
||||
|
||||

|
||||
|
||||
Once an account is added, perform the following steps:
|
||||
|
||||
1. Close `Settings`.
|
||||
1. Select the folder icon in the top left corner.
|
||||
1. Select `RNSimpleChatbot` from the side panel
|
||||
1. Navigate to `Signing & Capabilities` in the top nav bar.
|
||||
1. Open the "Team" dropdown
|
||||
1. Select the account added in the previous step.
|
||||
|
||||
The "Signing Certificate" section should update accordingly with your account information.
|
||||
|
||||

|
||||
|
||||
**Troubleshooting common errors:**
|
||||
|
||||
- If you see the error `Change your bundle identifier to a unique string to try again`, update the "Bundle Identifier" input in `Signing & Capabilities` to make it unique. This should resolve the error.
|
||||
|
||||
- If you see an error that says `Xcode was unable to launch because it has an invalid code signature, inadequate entitlements or its profile has not been explicitly trusted by the user`, you may need to update the settings on your iPhone to enable the required permissions as follows:
|
||||
|
||||
1. Open `Settings` on your iPhone
|
||||
1. Select `General`, then `Device Management`
|
||||
1. Click `Trust` for DailyPlayground
|
||||
|
||||
- You may also be prompted to enter you login keychain password. Be sure to click `Always trust` to avoid the prompt showing multiple times.
|
||||
|
||||
After, run the following command:
|
||||
```
|
||||
npm run ios
|
||||
```
|
||||
75
examples/simple-chatbot/client/react-native/app.json
Normal file
@@ -0,0 +1,75 @@
|
||||
{
|
||||
"expo": {
|
||||
"name": "RN Simple Chatbot",
|
||||
"slug": "simple-chatbot-demo",
|
||||
"newArchEnabled": false,
|
||||
"version": "1.0.0",
|
||||
"orientation": "portrait",
|
||||
"icon": "./assets/images/pipecat.png",
|
||||
"userInterfaceStyle": "light",
|
||||
"splash": {
|
||||
"image": "./assets/images/splash.png",
|
||||
"resizeMode": "contain",
|
||||
"backgroundColor": "#ffffff"
|
||||
},
|
||||
"updates": {
|
||||
"fallbackToCacheTimeout": 0
|
||||
},
|
||||
"assetBundlePatterns": [
|
||||
"**/*"
|
||||
],
|
||||
"ios": {
|
||||
"supportsTablet": true,
|
||||
"bitcode": false,
|
||||
"bundleIdentifier": "co.daily.SimpleChatbot",
|
||||
"infoPlist": {
|
||||
"UIBackgroundModes": [
|
||||
"voip"
|
||||
]
|
||||
}
|
||||
},
|
||||
"android": {
|
||||
"adaptiveIcon": {
|
||||
"foregroundImage": "./assets/images/pipecat.png",
|
||||
"backgroundColor": "#FFFFFF"
|
||||
},
|
||||
"package": "co.daily.SimpleChatbot",
|
||||
"permissions": [
|
||||
"android.permission.ACCESS_NETWORK_STATE",
|
||||
"android.permission.BLUETOOTH",
|
||||
"android.permission.CAMERA",
|
||||
"android.permission.INTERNET",
|
||||
"android.permission.MODIFY_AUDIO_SETTINGS",
|
||||
"android.permission.RECORD_AUDIO",
|
||||
"android.permission.SYSTEM_ALERT_WINDOW",
|
||||
"android.permission.WAKE_LOCK",
|
||||
"android.permission.FOREGROUND_SERVICE",
|
||||
"android.permission.FOREGROUND_SERVICE_CAMERA",
|
||||
"android.permission.FOREGROUND_SERVICE_MICROPHONE",
|
||||
"android.permission.FOREGROUND_SERVICE_MEDIA_PROJECTION",
|
||||
"android.permission.POST_NOTIFICATIONS"
|
||||
]
|
||||
},
|
||||
"web": {
|
||||
"favicon": "./assets/images/pipecat.png"
|
||||
},
|
||||
"plugins": [
|
||||
"@config-plugins/react-native-webrtc",
|
||||
"@daily-co/config-plugin-rn-daily-js",
|
||||
[
|
||||
"expo-build-properties",
|
||||
{
|
||||
"android": {
|
||||
"minSdkVersion": 24,
|
||||
"compileSdkVersion": 35,
|
||||
"targetSdkVersion": 35,
|
||||
"buildToolsVersion": "35.0.0"
|
||||
},
|
||||
"ios": {
|
||||
"deploymentTarget": "15.1"
|
||||
}
|
||||
}
|
||||
]
|
||||
]
|
||||
}
|
||||
}
|
||||
|
After Width: | Height: | Size: 3.7 KiB |
|
After Width: | Height: | Size: 29 KiB |
|
After Width: | Height: | Size: 46 KiB |
@@ -0,0 +1,6 @@
|
||||
module.exports = function(api) {
|
||||
api.cache(true);
|
||||
return {
|
||||
presets: ['babel-preset-expo'],
|
||||
};
|
||||
};
|
||||
|
After Width: | Height: | Size: 177 KiB |
|
After Width: | Height: | Size: 213 KiB |
1
examples/simple-chatbot/client/react-native/env.example
Normal file
@@ -0,0 +1 @@
|
||||
EXPO_SIMPLE_CHATBOT_SERVER=http://$YOUR_IP:7860
|
||||
@@ -0,0 +1,2 @@
|
||||
const { getDefaultConfig } = require('expo/metro-config');
|
||||
module.exports = getDefaultConfig(__dirname);
|
||||
44
examples/simple-chatbot/client/react-native/package.json
Normal file
@@ -0,0 +1,44 @@
|
||||
{
|
||||
"name": "simple-chatbot-demo",
|
||||
"version": "1.0.0",
|
||||
"scripts": {
|
||||
"start": "expo start --dev-client",
|
||||
"android": "expo run:android --device",
|
||||
"ios": "expo run:ios --device",
|
||||
"web": "expo start --web",
|
||||
"update": "(cd ../rtvi-client-react-native-daily; yarn build); cp -R ../rtvi-client-react-native-daily/lib/* ./node_modules/react-native-realtime-ai-daily/lib/;"
|
||||
},
|
||||
"dependencies": {
|
||||
"@config-plugins/react-native-webrtc": "^10.0.0",
|
||||
"@daily-co/config-plugin-rn-daily-js": "0.0.7",
|
||||
"@daily-co/react-native-daily-js": "^0.76.0",
|
||||
"@daily-co/react-native-webrtc": "^118.0.3-daily.2",
|
||||
"@react-native-async-storage/async-storage": "1.24.0",
|
||||
"@react-navigation/native": "^7.0.14",
|
||||
"@react-navigation/stack": "^7.1.1",
|
||||
"expo": "^53.0.7",
|
||||
"expo-build-properties": "~0.14.6",
|
||||
"expo-dev-client": "~5.1.8",
|
||||
"expo-splash-screen": "~0.30.8",
|
||||
"expo-status-bar": "~2.2.3",
|
||||
"react": "19.0.0",
|
||||
"react-native": "0.79.2",
|
||||
"react-native-background-timer": "^2.4.1",
|
||||
"react-native-gesture-handler": "^2.25.0",
|
||||
"react-native-get-random-values": "^1.11.0",
|
||||
"@pipecat-ai/react-native-daily-transport": "^0.3.5",
|
||||
"react-native-safe-area-context": "^5.4.0",
|
||||
"react-native-screens": "^4.10.0",
|
||||
"react-native-toast-message": "^2.3.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@babel/core": "^7.27.1",
|
||||
"@types/react-native": "^0.73.0",
|
||||
"typescript": "~5.8.3"
|
||||
},
|
||||
"private": true,
|
||||
"resolutions": {
|
||||
"@daily-co/react-native-webrtc/debug": "^4.0.0",
|
||||
"@daily-co/react-native-webrtc/@types/react-native": "^0.73.0"
|
||||
}
|
||||
}
|
||||
34
examples/simple-chatbot/client/react-native/src/App.tsx
Normal file
@@ -0,0 +1,34 @@
|
||||
import React from "react"
|
||||
|
||||
import { NavigationContainer } from '@react-navigation/native';
|
||||
import { createStackNavigator } from '@react-navigation/stack';
|
||||
import PreJoinView from './views/PreJoinView';
|
||||
import MeetingView from './views/MeetingView';
|
||||
import { VoiceClientProvider } from './context/VoiceClientContext';
|
||||
import Toast from 'react-native-toast-message';
|
||||
|
||||
import { useVoiceClientNavigation } from './hooks/useVoiceClientNavigation';
|
||||
|
||||
const Stack = createStackNavigator();
|
||||
|
||||
const NavigationManager: React.FC = () => {
|
||||
useVoiceClientNavigation(); // This hook now controls the navigation based on the connection state.
|
||||
return null; // This component doesn't render anything but manages navigation.
|
||||
};
|
||||
|
||||
const App: React.FC = () => {
|
||||
return (
|
||||
<VoiceClientProvider>
|
||||
<NavigationContainer>
|
||||
<Stack.Navigator initialRouteName="Prejoin">
|
||||
<Stack.Screen name="Prejoin" component={PreJoinView} options={{ headerShown: false }}/>
|
||||
<Stack.Screen name="Meeting" component={MeetingView} options={{ headerShown: false }}/>
|
||||
</Stack.Navigator>
|
||||
<NavigationManager />
|
||||
<Toast />
|
||||
</NavigationContainer>
|
||||
</VoiceClientProvider>
|
||||
);
|
||||
};
|
||||
|
||||
export default App;
|
||||
@@ -0,0 +1,94 @@
|
||||
import React, { useState, useMemo } from 'react';
|
||||
import { View, StyleSheet, LayoutChangeEvent, ViewStyle } from 'react-native';
|
||||
import { MaterialIcons } from '@expo/vector-icons';
|
||||
import Colors from '../theme/Colors';
|
||||
import { useVoiceClient } from '../context/VoiceClientContext';
|
||||
|
||||
interface MicrophoneViewProps {
|
||||
style?: ViewStyle; // Optional additional styles for the button container
|
||||
}
|
||||
|
||||
const MicrophoneView: React.FC<MicrophoneViewProps> = ({ style }) => {
|
||||
const { isMicEnabled, localAudioLevel: audioLevel } = useVoiceClient();
|
||||
const [dimensions, setDimensions] = useState({ width: 0, height: 0 });
|
||||
|
||||
const onLayout = (event: LayoutChangeEvent) => {
|
||||
const { width, height } = event.nativeEvent.layout;
|
||||
setDimensions({ width, height });
|
||||
};
|
||||
|
||||
const { width } = dimensions;
|
||||
|
||||
const circleSize = useMemo(() => width * 0.9, [width]);
|
||||
const innerCircleSize = useMemo(() => width * 0.82, [width]);
|
||||
const audioCircleSize = useMemo(() => audioLevel * width * 0.95, [audioLevel, width]);
|
||||
|
||||
return (
|
||||
<View style={[styles.container, style]} onLayout={onLayout}>
|
||||
<View
|
||||
style={[
|
||||
styles.outerCircle,
|
||||
{ width: circleSize, height: circleSize, borderRadius: circleSize / 2 },
|
||||
]}
|
||||
>
|
||||
<View
|
||||
style={[
|
||||
styles.innerCircle,
|
||||
{
|
||||
backgroundColor: !isMicEnabled ? Colors.disabledMic : Colors.backgroundCircle,
|
||||
width: innerCircleSize,
|
||||
height: innerCircleSize,
|
||||
borderRadius: innerCircleSize / 2,
|
||||
},
|
||||
]}
|
||||
/>
|
||||
|
||||
{isMicEnabled && (
|
||||
<View
|
||||
style={[
|
||||
styles.audioCircle,
|
||||
{
|
||||
width: audioCircleSize,
|
||||
height: audioCircleSize,
|
||||
borderRadius: audioCircleSize / 2,
|
||||
},
|
||||
]}
|
||||
/>
|
||||
)}
|
||||
|
||||
<MaterialIcons
|
||||
name={!isMicEnabled ? "mic-off" : "mic"}
|
||||
size={width * 0.2}
|
||||
color="white"
|
||||
style={styles.micIcon}
|
||||
/>
|
||||
</View>
|
||||
</View>
|
||||
);
|
||||
};
|
||||
|
||||
const styles = StyleSheet.create({
|
||||
container: {
|
||||
justifyContent: 'center',
|
||||
alignItems: 'center',
|
||||
} as ViewStyle,
|
||||
outerCircle: {
|
||||
borderWidth: 1,
|
||||
borderColor: Colors.buttonsBorder,
|
||||
justifyContent: 'center',
|
||||
alignItems: 'center',
|
||||
} as ViewStyle,
|
||||
innerCircle: {
|
||||
position: 'absolute',
|
||||
} as ViewStyle,
|
||||
audioCircle: {
|
||||
position: 'absolute',
|
||||
backgroundColor: Colors.micVolume,
|
||||
opacity: 0.5,
|
||||
} as ViewStyle,
|
||||
micIcon: {
|
||||
position: 'absolute',
|
||||
},
|
||||
});
|
||||
|
||||
export default MicrophoneView;
|
||||
@@ -0,0 +1,128 @@
|
||||
import React, { useEffect, useState } from 'react';
|
||||
import { LayoutChangeEvent, StyleSheet, Text, View, ViewStyle } from 'react-native';
|
||||
import { MaterialIcons } from '@expo/vector-icons';
|
||||
import Colors from '../theme/Colors';
|
||||
import { useVoiceClient } from '../context/VoiceClientContext';
|
||||
|
||||
const dotCount = 5;
|
||||
|
||||
const WaveformView: React.FC = () => {
|
||||
const [audioLevels, setAudioLevels] = useState(Array(dotCount).fill(0));
|
||||
const [dimensions, setDimensions] = useState({ width: 0, height: 0 });
|
||||
|
||||
const { currentState: voiceClientStatus, botReady: isBotReady, remoteAudioLevel: audioLevel } = useVoiceClient();
|
||||
|
||||
const onLayout = (event: LayoutChangeEvent) => {
|
||||
const { width, height } = event.nativeEvent.layout;
|
||||
setDimensions({ width, height });
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
setAudioLevels((prevLevels) => [...prevLevels.slice(1), audioLevel]);
|
||||
}, [audioLevel]);
|
||||
|
||||
const { width, height } = dimensions;
|
||||
const circleSize = width * 0.9;
|
||||
const innerCircleSize = width * 0.82;
|
||||
const barWidth = (width * 0.5) / dotCount;
|
||||
|
||||
return (
|
||||
<View style={styles.container} onLayout={onLayout}>
|
||||
<View style={[styles.outerCircle, { width: circleSize, height: circleSize, borderRadius: circleSize / 2 }]}>
|
||||
<View
|
||||
style={[
|
||||
styles.innerCircle,
|
||||
{
|
||||
backgroundColor: isBotReady ? Colors.backgroundCircle : Colors.backgroundCircleNotConnected,
|
||||
width: innerCircleSize,
|
||||
height: innerCircleSize,
|
||||
borderRadius: innerCircleSize / 2,
|
||||
},
|
||||
]}
|
||||
>
|
||||
{isBotReady ? (
|
||||
audioLevel > 0 ? (
|
||||
<View style={[styles.waveformContainer, { width: width * 0.5, height: width * 0.5 }]}>
|
||||
{audioLevels.map((level, index) => (
|
||||
<View
|
||||
key={index}
|
||||
style={[
|
||||
styles.waveformBar,
|
||||
{
|
||||
width: barWidth - 10, // Subtract some margin
|
||||
height: level * height,
|
||||
},
|
||||
]}
|
||||
/>
|
||||
))}
|
||||
</View>
|
||||
) : (
|
||||
<View style={[styles.dotContainer, { width: width * 0.5, height: height * 0.5 }]}>
|
||||
{Array(dotCount)
|
||||
.fill(0)
|
||||
.map((_, index) => (
|
||||
<View key={index} style={[styles.dot, { width: height * 0.1, height: height * 0.1 }]} />
|
||||
))}
|
||||
</View>
|
||||
)
|
||||
) : (
|
||||
<View style={styles.notReadyContainer}>
|
||||
<MaterialIcons name="hourglass-empty" size={32} color="white" />
|
||||
<Text style={styles.voiceClientStatusText}>{voiceClientStatus}</Text>
|
||||
</View>
|
||||
)}
|
||||
</View>
|
||||
</View>
|
||||
</View>
|
||||
);
|
||||
};
|
||||
|
||||
const styles = StyleSheet.create({
|
||||
container: {
|
||||
justifyContent: 'center',
|
||||
alignItems: 'center',
|
||||
width: "100%",
|
||||
} as ViewStyle,
|
||||
outerCircle: {
|
||||
borderWidth: 1,
|
||||
borderColor: 'gray',
|
||||
justifyContent: 'center',
|
||||
alignItems: 'center',
|
||||
} as ViewStyle,
|
||||
innerCircle: {
|
||||
justifyContent: 'center',
|
||||
alignItems: 'center',
|
||||
position: 'relative',
|
||||
} as ViewStyle,
|
||||
waveformContainer: {
|
||||
flexDirection: 'row',
|
||||
justifyContent: 'space-between',
|
||||
alignItems: 'center',
|
||||
} as ViewStyle,
|
||||
waveformBar: {
|
||||
backgroundColor: 'white',
|
||||
maxHeight: '100%',
|
||||
borderRadius: 12,
|
||||
} as ViewStyle,
|
||||
dotContainer: {
|
||||
flexDirection: 'row',
|
||||
justifyContent: 'space-between',
|
||||
alignItems: 'center',
|
||||
} as ViewStyle,
|
||||
dot: {
|
||||
backgroundColor: 'white',
|
||||
borderRadius: 50,
|
||||
} as ViewStyle,
|
||||
notReadyContainer: {
|
||||
justifyContent: 'center',
|
||||
alignItems: 'center',
|
||||
} as ViewStyle,
|
||||
voiceClientStatusText: {
|
||||
color: 'white',
|
||||
marginTop: 10,
|
||||
fontSize: 16,
|
||||
fontWeight: 'bold',
|
||||
} as ViewStyle,
|
||||
});
|
||||
|
||||
export default WaveformView;
|
||||
@@ -0,0 +1,229 @@
|
||||
import React, { createContext, useState, useContext, ReactNode, useCallback, useMemo, useRef, useEffect } from 'react'
|
||||
import Toast from 'react-native-toast-message'
|
||||
import { RNDailyTransport } from '@pipecat-ai/react-native-daily-transport'
|
||||
import { RTVIClient, TransportState, RTVIMessage, Participant } from '@pipecat-ai/client-js'
|
||||
import { MediaStreamTrack } from '@daily-co/react-native-webrtc'
|
||||
import { SettingsManager } from '../settings/SettingsManager';
|
||||
|
||||
interface VoiceClientContextProps {
|
||||
voiceClient: RTVIClient | null
|
||||
inCall: boolean
|
||||
currentState: string
|
||||
botReady: boolean
|
||||
localAudioLevel: number
|
||||
remoteAudioLevel: number
|
||||
isMicEnabled: boolean
|
||||
isCamEnabled: boolean
|
||||
videoTrack?: MediaStreamTrack
|
||||
timerCountDown: number
|
||||
// methods
|
||||
start: (url: string) => Promise<void>
|
||||
leave: () => void
|
||||
toggleMicInput: () => void
|
||||
toggleCamInput: () => void
|
||||
}
|
||||
|
||||
export const VoiceClientContext = createContext<VoiceClientContextProps | undefined>(undefined)
|
||||
|
||||
interface VoiceClientProviderProps {
|
||||
children: ReactNode
|
||||
}
|
||||
|
||||
export const VoiceClientProvider: React.FC<VoiceClientProviderProps> = ({ children }) => {
|
||||
const [voiceClient, setVoiceClient] = useState<RTVIClient | null>(null)
|
||||
const [inCall, setInCall] = useState<boolean>(false)
|
||||
const [currentState, setCurrentState] = useState<TransportState>("disconnected")
|
||||
const [botReady, setBotReady] = useState<boolean>(false)
|
||||
const [isMicEnabled, setIsMicEnabled] = useState<boolean>(false)
|
||||
const [isCamEnabled, setIsCamEnabled] = useState<boolean>(false)
|
||||
const [videoTrack, setVideoTrack] = useState<MediaStreamTrack>()
|
||||
const [localAudioLevel, setLocalAudioLevel] = useState<number>(0)
|
||||
const [remoteAudioLevel, setRemoteAudioLevel] = useState<number>(0)
|
||||
const [timerCountDown, setTimerCountDown] = useState<number>(0)
|
||||
|
||||
const botSpeakingRef = useRef(false)
|
||||
let meetingTimer: NodeJS.Timeout | null
|
||||
|
||||
const createVoiceClient = useCallback((url: string): RTVIClient => {
|
||||
return new RTVIClient({
|
||||
transport: new RNDailyTransport(),
|
||||
params: {
|
||||
baseUrl: url,
|
||||
endpoints: {
|
||||
connect: "/connect"
|
||||
}
|
||||
},
|
||||
enableMic: true,
|
||||
enableCam: false
|
||||
})
|
||||
}, [])
|
||||
|
||||
const handleError = useCallback((error: any) => {
|
||||
console.log("Error occurred:", error)
|
||||
const errorMessage = error.message || error.data?.error || "An unexpected error occurred"
|
||||
Toast.show({
|
||||
type: 'error',
|
||||
text1: errorMessage,
|
||||
})
|
||||
}, [])
|
||||
|
||||
const setupListeners = useCallback((voiceClient: RTVIClient): void => {
|
||||
const inCallStates = new Set(["authenticating", "connecting", "connected", "ready"])
|
||||
|
||||
voiceClient
|
||||
.on("transportStateChanged", (state: TransportState) => {
|
||||
setCurrentState(voiceClient.state)
|
||||
setInCall(inCallStates.has(state))
|
||||
})
|
||||
.on("error", (error: RTVIMessage) => {
|
||||
handleError(error)
|
||||
})
|
||||
.on("botReady", () => {
|
||||
setBotReady(true)
|
||||
let expirationTime = voiceClient.transportExpiry
|
||||
if (expirationTime) {
|
||||
startTimer(expirationTime)
|
||||
}
|
||||
})
|
||||
.on("disconnected", () => {
|
||||
setBotReady(false)
|
||||
stopTimer()
|
||||
setIsMicEnabled(false)
|
||||
setIsCamEnabled(false)
|
||||
})
|
||||
.on("localAudioLevel", (level: number) => {
|
||||
setLocalAudioLevel(level)
|
||||
})
|
||||
.on("remoteAudioLevel", (level: number) => {
|
||||
if (botSpeakingRef.current) {
|
||||
setRemoteAudioLevel(level)
|
||||
}
|
||||
})
|
||||
.on("userStartedSpeaking", () => {
|
||||
// nothing to do here
|
||||
})
|
||||
.on("userStoppedSpeaking", () => {
|
||||
// nothing to do here
|
||||
})
|
||||
.on("botStartedSpeaking", () => {
|
||||
botSpeakingRef.current = true
|
||||
})
|
||||
.on("botStoppedSpeaking", () => {
|
||||
botSpeakingRef.current = false
|
||||
setRemoteAudioLevel(0)
|
||||
})
|
||||
.on("connected", () => {
|
||||
setIsMicEnabled(voiceClient.isMicEnabled)
|
||||
setIsCamEnabled(voiceClient.isCamEnabled)
|
||||
})
|
||||
.on("trackStarted", (track: MediaStreamTrack, p?: Participant) => {
|
||||
if (p?.local && track.kind === 'video'){
|
||||
setVideoTrack(track)
|
||||
}
|
||||
})
|
||||
}, [handleError])
|
||||
|
||||
const start = useCallback(async (url: string): Promise<void> => {
|
||||
const client = createVoiceClient(url)
|
||||
setVoiceClient(client)
|
||||
setupListeners(client)
|
||||
try {
|
||||
await client.connect()
|
||||
// updating the preferences
|
||||
const newSettings = await SettingsManager.getSettings();
|
||||
newSettings.backendURL = url
|
||||
await SettingsManager.updateSettings(newSettings)
|
||||
} catch (error) {
|
||||
handleError(error)
|
||||
}
|
||||
}, [createVoiceClient, setupListeners, handleError])
|
||||
|
||||
const leave = useCallback(async (): Promise<void> => {
|
||||
if (voiceClient) {
|
||||
await voiceClient.disconnect()
|
||||
setVoiceClient(null)
|
||||
}
|
||||
}, [voiceClient])
|
||||
|
||||
const toggleMicInput = useCallback(async (): Promise<void> => {
|
||||
if (voiceClient) {
|
||||
try {
|
||||
let enableMic = !isMicEnabled
|
||||
voiceClient.enableMic(enableMic)
|
||||
setIsMicEnabled(enableMic)
|
||||
} catch (e) {
|
||||
handleError(e)
|
||||
}
|
||||
}
|
||||
}, [voiceClient, isMicEnabled])
|
||||
|
||||
const toggleCamInput = useCallback(async (): Promise<void> => {
|
||||
if (voiceClient) {
|
||||
try {
|
||||
let enableCam = !isCamEnabled
|
||||
voiceClient.enableCam(enableCam)
|
||||
setIsCamEnabled(enableCam)
|
||||
} catch (e) {
|
||||
handleError(e)
|
||||
}
|
||||
}
|
||||
}, [voiceClient, isCamEnabled])
|
||||
|
||||
const startTimer = (expirationTime: number): void => {
|
||||
const currentTime = Math.floor(Date.now() / 1000)
|
||||
const leftTime = expirationTime - currentTime
|
||||
setTimerCountDown(leftTime)
|
||||
meetingTimer = setInterval(() => {
|
||||
setTimerCountDown((prevCountDown) => {
|
||||
return prevCountDown - 1
|
||||
})
|
||||
}, 1000)
|
||||
}
|
||||
|
||||
const stopTimer = (): void => {
|
||||
if (meetingTimer) {
|
||||
clearInterval(meetingTimer)
|
||||
meetingTimer = null
|
||||
}
|
||||
setTimerCountDown(0)
|
||||
}
|
||||
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
if (voiceClient) {
|
||||
voiceClient.removeAllListeners() // Cleanup on unmount
|
||||
}
|
||||
}
|
||||
}, [voiceClient])
|
||||
|
||||
const contextValue = useMemo(() => ({
|
||||
voiceClient,
|
||||
inCall,
|
||||
currentState,
|
||||
botReady,
|
||||
isMicEnabled,
|
||||
isCamEnabled,
|
||||
localAudioLevel,
|
||||
remoteAudioLevel,
|
||||
videoTrack,
|
||||
timerCountDown,
|
||||
start,
|
||||
leave,
|
||||
toggleMicInput,
|
||||
toggleCamInput
|
||||
}), [voiceClient, inCall, currentState, botReady, isMicEnabled, isCamEnabled, localAudioLevel, remoteAudioLevel, videoTrack, timerCountDown, start, leave, toggleMicInput, toggleCamInput])
|
||||
|
||||
return (
|
||||
<VoiceClientContext.Provider value={contextValue}>
|
||||
{children}
|
||||
</VoiceClientContext.Provider>
|
||||
)
|
||||
}
|
||||
|
||||
export const useVoiceClient = (): VoiceClientContextProps => {
|
||||
const context = useContext(VoiceClientContext)
|
||||
if (!context) {
|
||||
throw new Error('useVoiceClient must be used within a VoiceClientProvider')
|
||||
}
|
||||
return context
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
import { useEffect } from 'react';
|
||||
import { useNavigation, NavigationProp } from '@react-navigation/native';
|
||||
import { useVoiceClient } from '../context/VoiceClientContext';
|
||||
|
||||
export type RootStackParamList = {
|
||||
Meeting: undefined;
|
||||
Prejoin: undefined;
|
||||
};
|
||||
|
||||
export const useVoiceClientNavigation = () => {
|
||||
const navigation = useNavigation<NavigationProp<RootStackParamList>>();
|
||||
const { inCall } = useVoiceClient();
|
||||
|
||||
useEffect(() => {
|
||||
if (inCall) {
|
||||
navigation.navigate('Meeting');
|
||||
} else {
|
||||
navigation.navigate('Prejoin');
|
||||
}
|
||||
}, [inCall, navigation]);
|
||||
|
||||
};
|
||||
@@ -0,0 +1,42 @@
|
||||
import AsyncStorage from '@react-native-async-storage/async-storage';
|
||||
|
||||
export interface SettingsManager {
|
||||
enableCam: boolean;
|
||||
enableMic: boolean;
|
||||
backendURL: string;
|
||||
}
|
||||
|
||||
// Define the settings object
|
||||
const defaultSettings: SettingsManager = {
|
||||
enableCam: false,
|
||||
enableMic: true,
|
||||
backendURL: process.env.EXPO_SIMPLE_CHATBOT_SERVER || "",
|
||||
};
|
||||
|
||||
export class SettingsManager {
|
||||
private static preferencesKey = 'settingsPreference';
|
||||
|
||||
static async getSettings(): Promise<SettingsManager> {
|
||||
try {
|
||||
const data = await AsyncStorage.getItem(this.preferencesKey);
|
||||
if (data !== null) {
|
||||
return JSON.parse(data) as SettingsManager;
|
||||
} else {
|
||||
return defaultSettings;
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("Failed to load settings:", error);
|
||||
return defaultSettings;
|
||||
}
|
||||
}
|
||||
|
||||
static async updateSettings(settings: SettingsManager): Promise<void> {
|
||||
try {
|
||||
const data = JSON.stringify(settings);
|
||||
await AsyncStorage.setItem(this.preferencesKey, data);
|
||||
} catch (error) {
|
||||
console.error("Failed to save settings:", error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
export const Images = {
|
||||
dailyBot: require('../../assets/images/pipecat.png'),
|
||||
};
|
||||
|
||||
export const Icons = {
|
||||
vision: require('../../assets/icons/vision.png'),
|
||||
};
|
||||
@@ -0,0 +1,27 @@
|
||||
type ColorsType = {
|
||||
white: string;
|
||||
black: string;
|
||||
backgroundCircle: string;
|
||||
backgroundCircleNotConnected: string;
|
||||
backgroundApp: string;
|
||||
buttonsBorder: string;
|
||||
micVolume: string;
|
||||
timer: string;
|
||||
disabledMic: string;
|
||||
disabledVision: string;
|
||||
};
|
||||
|
||||
const Colors: ColorsType = {
|
||||
white: '#ffffff',
|
||||
black: '#000000',
|
||||
backgroundCircle: '#374151',
|
||||
backgroundCircleNotConnected: '#D1D5DB',
|
||||
backgroundApp: '#F9FAFB',
|
||||
buttonsBorder: '#E5E7EB',
|
||||
micVolume: '#86EFAC',
|
||||
timer: '#E5E7EB',
|
||||
disabledMic: '#ee6b6e',
|
||||
disabledVision: '#BBF7D0',
|
||||
};
|
||||
|
||||
export default Colors;
|
||||
@@ -0,0 +1,62 @@
|
||||
import React from 'react';
|
||||
import { TouchableOpacity, Text, StyleSheet, ViewStyle, TextStyle, GestureResponderEvent } from 'react-native';
|
||||
import { MaterialIcons } from '@expo/vector-icons';
|
||||
|
||||
interface CustomButtonProps {
|
||||
title: string;
|
||||
onPress: (event: GestureResponderEvent) => void;
|
||||
backgroundColor?: string; // Optional prop for background color
|
||||
textColor?: string; // Optional prop for text color
|
||||
style?: ViewStyle; // Optional additional styles for the button container
|
||||
textStyle?: TextStyle; // Optional additional styles for the text
|
||||
iconName?: string; // Optional prop for the icon name from MaterialIcons
|
||||
iconPosition?: 'left' | 'right'; // Optional prop to control icon position
|
||||
iconSize?: number; // Optional prop for icon size
|
||||
iconColor?: string; // Optional prop for icon color
|
||||
}
|
||||
|
||||
const CustomButton: React.FC<CustomButtonProps> = ({
|
||||
title,
|
||||
onPress,
|
||||
backgroundColor = 'black',
|
||||
textColor = 'white',
|
||||
style,
|
||||
textStyle,
|
||||
iconName,
|
||||
iconPosition = 'left',
|
||||
iconSize = 24,
|
||||
iconColor = 'white',
|
||||
}) => {
|
||||
return (
|
||||
<TouchableOpacity
|
||||
onPress={onPress}
|
||||
style={[styles.button, { backgroundColor }, style]}>
|
||||
{iconName && iconPosition === 'left' && (
|
||||
<MaterialIcons name={iconName as keyof typeof MaterialIcons.glyphMap} size={iconSize} color={iconColor} style={styles.icon} />
|
||||
)}
|
||||
<Text style={[styles.text, { color: textColor }, textStyle]}>{title}</Text>
|
||||
{iconName && iconPosition === 'right' && (
|
||||
<MaterialIcons name={iconName as keyof typeof MaterialIcons.glyphMap} size={iconSize} color={iconColor} style={styles.icon} />
|
||||
)}
|
||||
</TouchableOpacity>
|
||||
);
|
||||
};
|
||||
|
||||
const styles = StyleSheet.create({
|
||||
button: {
|
||||
padding: 12,
|
||||
borderRadius: 8,
|
||||
alignItems: 'center',
|
||||
justifyContent: 'center',
|
||||
flexDirection: 'row', // Ensures icon and text are aligned in a row
|
||||
},
|
||||
text: {
|
||||
fontSize: 16,
|
||||
fontWeight: 'bold',
|
||||
},
|
||||
icon: {
|
||||
marginHorizontal: 5, // Adds space between the icon and text
|
||||
},
|
||||
});
|
||||
|
||||
export default CustomButton;
|
||||
@@ -0,0 +1,139 @@
|
||||
import {
|
||||
View,
|
||||
StyleSheet,
|
||||
Text,
|
||||
Image,
|
||||
TouchableOpacity,
|
||||
} from 'react-native';
|
||||
|
||||
import React from "react"
|
||||
|
||||
import { useVoiceClient } from '../context/VoiceClientContext';
|
||||
|
||||
import { Images } from '../theme/Assets';
|
||||
import { MaterialIcons } from '@expo/vector-icons';
|
||||
|
||||
import WaveformView from '../components/WaveformView';
|
||||
import MicrophoneView from '../components/MicrophoneView';
|
||||
import { SafeAreaView } from 'react-native-safe-area-context';
|
||||
import Colors from '../theme/Colors';
|
||||
import CustomButton from '../theme/CustomButton';
|
||||
|
||||
const MeetingView: React.FC = () => {
|
||||
|
||||
const { leave, toggleMicInput, toggleCamInput, timerCountDown } = useVoiceClient();
|
||||
|
||||
const timerString = (count: number): string => {
|
||||
const hours = Math.floor(count / 3600);
|
||||
const minutes = Math.floor((count % 3600) / 60);
|
||||
const seconds = count % 60;
|
||||
return `${String(hours).padStart(2, '0')}:${String(minutes).padStart(2, '0')}:${String(seconds).padStart(2, '0')}`;
|
||||
};
|
||||
|
||||
return (
|
||||
<SafeAreaView style={styles.safeArea}>
|
||||
<View style={styles.container}>
|
||||
<View style={styles.header}>
|
||||
<Image source={Images.dailyBot} style={styles.botImage} />
|
||||
<View style={styles.timerContainer}>
|
||||
<MaterialIcons name="timelapse" size={24} color="black" />
|
||||
<Text style={styles.timerText}>{timerString(timerCountDown)}</Text>
|
||||
</View>
|
||||
</View>
|
||||
|
||||
<View style={styles.mainPanel}>
|
||||
<WaveformView/>
|
||||
<View style={styles.bottomControls}>
|
||||
<TouchableOpacity onPress={toggleMicInput}>
|
||||
<MicrophoneView
|
||||
style={styles.microphone}
|
||||
/>
|
||||
</TouchableOpacity>
|
||||
</View>
|
||||
</View>
|
||||
|
||||
{/* Bottom Panel */}
|
||||
<View style={styles.bottomPanel}>
|
||||
<CustomButton
|
||||
title="End"
|
||||
iconName={"exit-to-app"}
|
||||
onPress={leave}
|
||||
backgroundColor={Colors.black}
|
||||
/>
|
||||
</View>
|
||||
</View>
|
||||
</SafeAreaView>
|
||||
);
|
||||
};
|
||||
|
||||
const styles = StyleSheet.create({
|
||||
safeArea: {
|
||||
flex: 1,
|
||||
width: "100%",
|
||||
backgroundColor: Colors.backgroundApp,
|
||||
},
|
||||
container: {
|
||||
flex: 1,
|
||||
padding: 20,
|
||||
},
|
||||
header: {
|
||||
flexDirection: 'row',
|
||||
alignItems: 'center',
|
||||
justifyContent: 'space-between',
|
||||
paddingBottom: 10,
|
||||
},
|
||||
botImage: {
|
||||
width: 48,
|
||||
height: 48,
|
||||
},
|
||||
timerContainer: {
|
||||
flexDirection: 'row',
|
||||
alignItems: 'center',
|
||||
backgroundColor: Colors.timer,
|
||||
padding: 10,
|
||||
borderRadius: 12,
|
||||
},
|
||||
timerText: {
|
||||
color: 'black',
|
||||
fontWeight: '500',
|
||||
fontSize: 18,
|
||||
marginLeft: 5,
|
||||
},
|
||||
mainPanel: {
|
||||
flex: 1,
|
||||
justifyContent: 'center',
|
||||
alignItems: 'center',
|
||||
},
|
||||
bottomControls: {
|
||||
flexDirection: 'row',
|
||||
justifyContent: 'center',
|
||||
alignItems: 'center',
|
||||
width: '100%',
|
||||
paddingBottom: 20,
|
||||
},
|
||||
microphone: {
|
||||
width: 160,
|
||||
height: 160,
|
||||
},
|
||||
camera: {
|
||||
width: 120,
|
||||
height: 120,
|
||||
},
|
||||
bottomPanel: {
|
||||
paddingVertical: 10,
|
||||
},
|
||||
endButton: {
|
||||
flexDirection: 'row',
|
||||
alignItems: 'center',
|
||||
justifyContent: 'center',
|
||||
backgroundColor: 'black',
|
||||
borderRadius: 12,
|
||||
padding: 10,
|
||||
},
|
||||
endText: {
|
||||
marginLeft: 5,
|
||||
color: 'white',
|
||||
},
|
||||
});
|
||||
|
||||
export default MeetingView;
|
||||
@@ -0,0 +1,82 @@
|
||||
import {
|
||||
View,
|
||||
StyleSheet,
|
||||
Text,
|
||||
TextInput,
|
||||
Image
|
||||
} from "react-native"
|
||||
|
||||
import React, { useEffect, useState } from 'react';
|
||||
|
||||
import { useVoiceClient } from '../context/VoiceClientContext';
|
||||
|
||||
import Colors from '../theme/Colors';
|
||||
import { Images } from '../theme/Assets';
|
||||
import CustomButton from '../theme/CustomButton';
|
||||
import { SettingsManager } from '../settings/SettingsManager';
|
||||
|
||||
const styles = StyleSheet.create({
|
||||
container: {
|
||||
flex: 1,
|
||||
padding: 20,
|
||||
backgroundColor: Colors.backgroundApp,
|
||||
justifyContent: 'center',
|
||||
alignItems: 'center',
|
||||
},
|
||||
image: {
|
||||
width: 64,
|
||||
height: 64,
|
||||
marginBottom: 20,
|
||||
},
|
||||
header: {
|
||||
fontSize: 18,
|
||||
fontWeight: 'bold',
|
||||
marginBottom: 20,
|
||||
},
|
||||
textInput: {
|
||||
width: '100%',
|
||||
padding: 10,
|
||||
borderColor: Colors.buttonsBorder,
|
||||
backgroundColor: Colors.white,
|
||||
borderWidth: 1,
|
||||
borderRadius: 5,
|
||||
marginBottom: 10,
|
||||
},
|
||||
lastTextInput: {
|
||||
marginBottom: 20,
|
||||
},
|
||||
});
|
||||
|
||||
const PreJoinView: React.FC = () => {
|
||||
const { start } = useVoiceClient();
|
||||
|
||||
const [backendURL, setBackendURL] = useState<string>('')
|
||||
|
||||
useEffect(() => {
|
||||
const loadSettings = async () => {
|
||||
const loadedSettings = await SettingsManager.getSettings();
|
||||
setBackendURL(loadedSettings.backendURL)
|
||||
};
|
||||
loadSettings();
|
||||
}, []);
|
||||
|
||||
return (
|
||||
<View style={styles.container}>
|
||||
<Image source={Images.dailyBot} style={styles.image} />
|
||||
<Text style={styles.header}>Connect to Pipecat.</Text>
|
||||
<TextInput
|
||||
placeholder="Server URL"
|
||||
value={backendURL}
|
||||
onChangeText={setBackendURL}
|
||||
style={[styles.textInput, styles.lastTextInput]}
|
||||
/>
|
||||
<CustomButton
|
||||
title="Connect"
|
||||
onPress={() => start(backendURL)}
|
||||
backgroundColor={Colors.backgroundCircle}
|
||||
/>
|
||||
</View>
|
||||
)
|
||||
};
|
||||
|
||||
export default PreJoinView;
|
||||