Compare commits

...

54 Commits

Author SHA1 Message Date
Aleix Conchillo Flaqué
3933ba57b8 MiniMaxHttpTTSService: renamed from Hailuo and move to services/minimax/tts.py 2025-05-15 12:07:37 -07:00
Aleix Conchillo Flaqué
a51af35024 Merge pull request #1814 from pipecat-ai/aleix/examples-dependabot-05142025
examples: updates for dependabot 05/14/2025
2025-05-15 11:38:45 -07:00
Mark Backman
119fd5ba7d Merge pull request #1025 from fatwang2/main
added hailuo tts service
2025-05-15 14:29:24 -04:00
Aleix Conchillo Flaqué
0718a812bd examples: updates for dependabot 05/14/2025 2025-05-14 22:51:08 -07:00
Mark Backman
3814501b48 Merge pull request #1811 from pipecat-ai/mb/dont-require-tracing-dep
Fix: Resolve an issue where tracing imports were required
2025-05-14 12:35:47 -04:00
Mark Backman
7a5205dbda Fix: Resolve an issue where tracing imports were required 2025-05-14 12:29:08 -04:00
Varun Singh
04c02c9a20 Merge pull request #1810 from pipecat-ai/vr000m-receiving-custom-sip-headers
added handling for sipHeaders
2025-05-13 23:02:14 -07:00
Varun Singh
0ff7195a83 Update README.md
updating docs
2025-05-13 19:08:43 -04:00
Varun Singh
3b91aa013a added handling for sipHeaders 2025-05-13 16:00:05 -07:00
Mark Backman
50f6235edb Add support for OpenTelemetry tracing (#1729)
* Also added TurnTrackingObserver, TurnTraceObserver, foundational 29, open-telemetry-example
2025-05-13 17:18:11 -04:00
Aleix Conchillo Flaqué
6f4d94f91b Merge pull request #1800 from pipecat-ai/aleix/frame-processors-setup
introduce frame processors setup
2025-05-13 13:18:06 -07:00
Aleix Conchillo Flaqué
83a4c7d443 RTVIProcessor: remove unused code 2025-05-13 11:26:37 -07:00
Aleix Conchillo Flaqué
8171fec925 SmallWebRTCConnection: complain if av package not found 2025-05-13 11:26:37 -07:00
Aleix Conchillo Flaqué
175f352ea7 add FrameProcessor.setup() to setup processors before StartFrame 2025-05-13 11:26:35 -07:00
Filipi da Silva Fuchter
5290161ac4 Merge pull request #1746 from pipecat-ai/simple_chatbot-react-native
Simple chatbot: React Native client
2025-05-13 10:48:09 -03:00
Filipi Fuchter
8762019ed7 Not setting the local audio level when the user stopped speaking. 2025-05-13 10:46:30 -03:00
Filipi Fuchter
61a59fa158 Fixing useNavigation typescript warning. 2025-05-13 10:36:39 -03:00
Filipi Fuchter
55eea20c8e Renaming expo environment variable 2025-05-13 10:32:27 -03:00
kompfner
9a621f0c54 Merge pull request #1805 from pipecat-ai/pk/aws-nova-sonic-aggregate-user-transcription-text
AWS Nova Sonic service - aggregate user transcription text; it was fr…
2025-05-13 09:13:58 -04:00
Paul Kompfner
55fc24e933 AWS Nova Sonic service - aggregate user transcription text; it was fragmented across many conversation history messages before 2025-05-13 09:13:28 -04:00
Filipi da Silva Fuchter
b14608f09b Merge pull request #1799 from pipecat-ai/daily_audio_source
Using audio source for capturing Daily's participant audio
2025-05-13 08:15:10 -03:00
Mark Backman
4a25c57337 Merge pull request #1806 from pipecat-ai/aleix/run-test-observers
tests: allow passing observers to run_test()
2025-05-12 22:10:44 -04:00
Aleix Conchillo Flaqué
f800e35ccb tests: allow passing observers to run_test() 2025-05-12 17:53:02 -07:00
Vanessa Pyne
12d49a9b9d Merge pull request #1801 from pipecat-ai/vp-fix-typo
update examples
2025-05-12 15:33:56 -05:00
vipyne
b25b251a44 update examples 2025-05-12 14:07:17 -05:00
Mattie Ruth
64b2a75a94 Update Modal App: (#1755)
* Update Modal App:

Updated Modal App to include:

1. Latest Modal API usage
2. Ability to launch different Pipecat pipelines, much like the
   simple chatbot example
3. Ability to choose which pipeline is launched via the
   /connect endpoint
4. Added a pipeline option for connecting to a self-hosted LLM
   on Modal
5. Improved READMEs
6. Added a web client for interacting with the Modal deployment

tmp

* Update README
2025-05-12 12:45:43 -05:00
Aleix Conchillo Flaqué
b33a60f3a5 Merge pull request #1793 from pipecat-ai/khk/deepgram-async-fix
Fix Deepgram TTS streaming
2025-05-12 09:59:46 -07:00
Filipi Fuchter
d22dbb1a6d Fixing ruff format. 2025-05-12 10:36:21 -03:00
Filipi Fuchter
983199a6cd New example capturing the audio from the participant using the custom audio source. 2025-05-12 10:18:43 -03:00
Filipi Fuchter
133d7ee33a Fixing the default audio source for capture_participant_audio 2025-05-12 10:16:32 -03:00
Mark Backman
0bd888afc7 Merge pull request #1796 from nikp06/patch-1
Wrong deprecation warning when importing ai_services.py
2025-05-12 09:12:48 -04:00
nikp06
537bd1c58d Update ai_services.py
fix: correct deprecation warning format in ai_services module
2025-05-12 12:01:13 +02:00
Kwindla Hultman Kramer
5ef519fe2c Fix Deepgram TTS to use stream_raw() 2025-05-11 15:40:31 -07:00
Mark Backman
20498fb47f Merge pull request #1790 from AngeloGiacco/angelo/fix-api-key
[elevenlabs tts ] fix api key
2025-05-10 19:16:27 -04:00
Angelo Giacco
b57dfb3b5d fix lint 2025-05-10 16:36:26 +01:00
Angelo Giacco
0355ed4aa1 move api key to ws header 2025-05-10 16:34:01 +01:00
Angelo Giacco
1e76cc7bdc fix: elevenlabs api key 2025-05-10 16:09:20 +01:00
Vanessa Pyne
18c0374126 Merge pull request #1785 from pipecat-ai/vp-small-filenmae-change
39-aws-nova-sonic.py -> 40-aws-nova-sonic.py
2025-05-09 12:19:09 -05:00
Aleix Conchillo Flaqué
7072fba7e7 Merge pull request #1780 from pipecat-ai/aleix/deprecate-google-generativeai
GoogleLLMService: deprecate google-generativeai
2025-05-09 09:18:30 -07:00
Aleix Conchillo Flaqué
3d702a5c39 minor examples cleanup 2025-05-09 09:16:10 -07:00
Aleix Conchillo Flaqué
f31efa42c9 GoogleLLMService: deprecate google-generativeai 2025-05-09 09:14:43 -07:00
vipyne
74b369ff20 39-aws-nova-sonic.py -> 40-aws-nova-sonic.py 2025-05-09 08:30:59 -05:00
Filipi Fuchter
46eed0a59a Bumping to use the latest version of @pipecat-ai/react-native-daily-transport, and removing code not needed. 2025-05-08 18:18:00 -03:00
kompfner
9643296e29 Merge pull request #1779 from pipecat-ai/pk/aws-nova-sonic-missing-params-export
Add missing `Params` export to AWS Nova Sonic module
2025-05-08 16:04:38 -04:00
Paul Kompfner
c83c5b5a34 Add missing Params export to AWS Nova Sonic module 2025-05-08 15:23:25 -04:00
Filipi Fuchter
277e2d7fc0 Merge branch 'main' into simple_chatbot-react-native 2025-05-08 09:03:16 -03:00
Filipi Fuchter
56ca7360ae Fixing versions 2025-05-05 19:11:59 -03:00
Filipi Fuchter
d5ab3251f0 Bumping the dependencies, updating readme, adding .gitignore. 2025-05-05 18:43:04 -03:00
Filipi Fuchter
915c284420 Fixing readme 2025-05-05 18:32:04 -03:00
Filipi Fuchter
40154824e8 Creating a RN example for simple-chatbot 2025-05-05 18:17:39 -03:00
fatwang2
8cda4512ad Merge branch 'pipecat-ai:main' into main 2025-02-06 10:50:25 +08:00
fatwang2
fc90bdc638 changed to HailuoHttpTTSService 2025-01-19 09:43:48 +08:00
fatwang2
5a88165a26 Merge branch 'pipecat-ai:main' into main 2025-01-19 09:40:08 +08:00
fatwang2
3466842cd4 add hailuo tts service 2025-01-17 12:46:05 +08:00
167 changed files with 12671 additions and 560 deletions

View File

@@ -5,6 +5,66 @@ All notable changes to **Pipecat** will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
### Added
- Added support for a new TTS service, `MiniMaxHttpTTSService`.
(see https://www.minimax.io/audio)
- A new function `FrameProcessor.setup()` has been added to allow setting up
frame processors before receiving a `StartFrame`. This is what's happening
internally: `FrameProcessor.setup()` is called, `StartFrame` is pushed from
the beginning of the pipeline, your regular pipeline operations, `EndFrame` or
`CancelFrame` are pushed from the beginning of the pipeline and finally
`FrameProcessor.cleanup()` is called.
- Added support for OpenTelemetry tracing in Pipecat. This initial
implementation includes:
- A `setup_tracing` method where you can specify your OpenTelemetry exporter
- Service decorators for STT (`@traced_stt`), LLM (`@traced_llm`), and TTS
(`@traced_tts`) which trace the execution and collect properties and
metrics (TTFB, token usage, character counts, etc.)
- Class decorators that provide execution tracking; these are generic and can
be used for service tracking as needed
- Spans that help track traces on a per conversations and turn basis:
```
conversation-uuid
├── turn-1
│ ├── stt_deepgramsttservice
│ ├── llm_openaillmservice
│ └── tts_cartesiattsservice
...
└── turn-n
└── ...
```
By default, Pipecat has implemented service decorators to trace execution of
STT, LLM, and TTS services. You can enable tracing by setting `enable_tracing`
to `True` in the PipelineTask.
- Added `TurnTrackingObserver`, which tracks the start and end of a user/bot
turn pair and emits events `on_turn_started` and `on_turn_stopped`
corresponding to the start and end of a turn, respectively.
- Allow passing observers to `run_test()` while running unit tests.
### Changed
- `GoogleLLMService` has been updated to use `google-genai` instead of the
deprecated `google-generativeai`.
### Other
- Added an `open-telemetry-tracing` example, showing how to setup tracing. The
example also includes Jaeger as an open source OpenTelemetry client to review
traces from the example runs.
- Added foundational example `29-turn-tracking-observer.py` to show how to use
the `TurnTrackingObserver.
## [0.0.67] - 2025-05-07
### Added

View File

@@ -12,7 +12,7 @@
"@daily-co/daily-js": "0.74.0"
},
"devDependencies": {
"vite": "^6.0.9"
"vite": "^6.3.5"
}
},
"node_modules/@babel/runtime": {
@@ -999,9 +999,9 @@
}
},
"node_modules/vite": {
"version": "6.3.3",
"resolved": "https://registry.npmjs.org/vite/-/vite-6.3.3.tgz",
"integrity": "sha512-5nXH+QsELbFKhsEfWLkHrvgRpTdGJzqOZ+utSdmPTvwHmvU6ITTm3xx+mRusihkcI8GeC7lCDyn3kDtiki9scw==",
"version": "6.3.5",
"resolved": "https://registry.npmjs.org/vite/-/vite-6.3.5.tgz",
"integrity": "sha512-cZn6NDFE7wdTpINgs++ZJ4N49W2vRp8LCKrn3Ob1kYNtOo21vfDoaV5GzBfLU4MovSAB8uNRm4jgzVQZ+mBzPQ==",
"dev": true,
"dependencies": {
"esbuild": "^0.25.0",

View File

@@ -12,7 +12,7 @@
"license": "ISC",
"description": "",
"devDependencies": {
"vite": "^6.0.9"
"vite": "^6.3.5"
},
"dependencies": {
"@daily-co/daily-js": "0.74.0"

View File

@@ -1,3 +1,6 @@
# Modal clone
modal-examples
# Python
__pycache__/
*.py[cod]

View File

@@ -1,24 +1,44 @@
# Deploying Pipecat to Modal.com
Barebones deployment example for [modal.com](https://www.modal.com)
Deployment example for [modal.com](https://www.modal.com). This example demonstrates how to deploy a FastAPI webapp to Modal with an RTVI compatible `/connect` endpoint that launches a Pipecat pipeline in a separate Modal container and returns a room/token for the client to join. This example also supports providing a parameter to the `/connect` endpoint for specifying which Pipecat pipeline to launch; openai, gemini, or vllm. The vllm pipeline points to a self-hosted OpenAI compatible LLM, using a llama model (neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16), deployed to Modal.
1. Install dependencies
```bash
python -m venv venv
source venv/bin/active # or OS equivalent
pip install -r requirements.txt
```
2. Setup .env
# Running this Example
## Prerequisites
Setup a Modal account and install it on your machine if you have not already, following their easy 3-steps in their [Getting Started Guide](https://modal.com/docs/guide#getting-started)
## Deploy a self-serve LLM
1. Follow the Modal Guide and example for [Deploying an OpenAI-compatible LLM service with vLLM](https://modal.com/docs/examples/vllm_inference).
The TLDR, though, is to simply do the following from within this directory:
```bash
git clone https://github.com/modal-labs/modal-examples
cd modal-examples
modal deploy 06_gpu_and_ml/llm-serving/vllm_inference.py
```
2. Jot down the endpoint from the previous step to use in the bot_vllm file mentioned below. It will look something like: `https://<Modal workspace>--example-vllm-openai-compatible-serve.modal.run`
**Note:** This Modal example is their [initial getting started example](https://modal.com/docs/examples/vllm_inference) with a Llama-3.1 model. By default, it will tear down the container after 15 minutes of inactivity and can take 5-10 minutes to re-start, during which time it is unusable. So for the purposes of just getting started and this example, we recommend visiting the `/docs` endpoint (`https://<Modal workspace>--example-vllm-openai-compatible-serve.modal.run/docs`) for your deployed llm in a browser to trigger the cold start. Then wait for the page to load, indicating its ready before trying to connect your client.
## Deploy FastAPI App and Pipecat pipeline to Modal
1. Setup environment variables
```bash
cd server
cp env.example .env
# Modify .env to provide your service API Keys
```
Alternatively, you can configure your Modal app to use [secrets](https://modal.com/docs/guide/secrets)
3. Test the app locally
1. Update the `modal_url` in `server/src/bot_vllm.py` to point to the url produced from the self-serve llm deploy, mentioned above.
2. From within the `server` directory, test the app locally:
```bash
modal serve app.py
@@ -30,8 +50,34 @@ modal serve app.py
modal deploy app.py
```
## Configuration options
5. Jot down the endpoint from the previous step to use in the client's app.js file mentioned its README. It will look something like: `https://<Modal workspace>--pipecat-modal-fastapi-app.modal.run`
This app sets some sensible defaults for reducing cold starts, such as `minkeep_warm=1`, which will keep at least 1 warm instance ready for your bot function.
## Launch and Talk to your Bots running on Modal
It has been configured to only allow a concurrency of 1 (`max_inputs=1`) as each user will require their own running function.
## Option 1: Direct Link
Simply click on the url displayed after running the server or deploy step to launch an agent and be redirected to a Daily room to talk with the launched bot. This will use the OpenAI pipeline.
## Option 2: Connect via an RTVI Client
Follow the instructions provided in the [client folder's README](client/javascript/README.md) for building and running a custom client that connects to your Modal endpoint. The provided client provides a dropdown for choosing which bot pipeline to run.
# Navigating your llm, server, and Pipecat logs
In your [Modal dashboard](https://modal.com/apps), you should have two Apps listed under Live Apps:
1. `example-vllm-openai-compatible`: This App contains the containers and logs used to run your self-hosted LLM. There will be just one App Function listed: `serve`. Click on this function to view logs for your LLM.
2. `pipecat-modal`: This App contains the containers and logs used to run your `connect` endpoints and Pipecat pipelines. It will list two App Functions:
1. `fastapi_app`: This function is running the endpoints that your client will interact with and initiate starting a new pipeline (`/`, `/connect`, `/status`). Click on this function to see logs for each endpoint hit.
2. `bot_runner`: This function handles launching and running a bot pipeline. Click on this function to get a list of all pipeline runs and access each run's logs.
## Diagram of Deployment
![](diagram.jpg)
# Modal + Pipecat Tips
- In most other Pipecat examples, we use Popen to launch the pipeline process from the /connect endpoint. In this example, we instead use a Modal function with its own Modal image defined. This change ensures that each run of the Pipeline happens in a isolated, customizable container.
- For the FastAPI and most common Pipecat Pipeline containers, a default debian_slim CPU-only should be all that's required to run. GPU containers are needed for self-hosted services.
- To minimize cold starts of the pipeline and reduce latency for users, set `min_containers=1` on the Modal Function that launches the pipeline to ensure at least one warm instance of your function is always available.
- For next steps on running a self-hosted llm and reducing latency, check out all of [Modal's LLM examples](https://modal.com/docs/examples/vllm_inference).

View File

@@ -1,80 +0,0 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import os
import aiohttp
import modal
from bot import _voice_bot_process
from fastapi import HTTPException
from fastapi.responses import JSONResponse
from loguru import logger
MAX_SESSION_TIME = 15 * 60 # 15 minutes
app = modal.App("pipecat-modal")
image = modal.Image.debian_slim(python_version="3.12").pip_install_from_requirements(
"requirements.txt"
)
@app.function(
image=image,
cpu=1.0,
secrets=[modal.Secret.from_dotenv()],
keep_warm=1,
enable_memory_snapshot=True,
max_inputs=1, # Do not reuse instances across requests
retries=0,
)
def launch_bot_process(room_url: str, token: str):
_voice_bot_process(room_url, token)
@app.function(
image=image,
secrets=[modal.Secret.from_dotenv()],
)
@modal.web_endpoint(method="POST")
async def start():
from pipecat.transports.services.helpers.daily_rest import (
DailyRESTHelper,
DailyRoomParams,
)
logger.info("Request received")
async with aiohttp.ClientSession() as session:
daily_rest_helper = DailyRESTHelper(
daily_api_key=os.getenv("DAILY_API_KEY", ""),
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
aiohttp_session=session,
)
# Create new Daily room
room = await daily_rest_helper.create_room(DailyRoomParams())
if not room.url:
raise HTTPException(
status_code=500,
detail="Unable to create room",
)
logger.info(f"Created room: {room.url}")
# Create bot token for room
token = await daily_rest_helper.get_token(room.url, MAX_SESSION_TIME)
if not token:
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room.url}")
logger.info(f"Bot token created: {token}")
# Spawn a new bot process
launch_bot_process.spawn(room_url=room.url, token=token)
# Return room URL to the user to join
# Note: in production, you would want to return a token to the user
return JSONResponse(content={"room_url": room.url, token: token})

View File

@@ -1,95 +0,0 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import os
import sys
from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.cartesia.tts import CartesiaTTSService
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main(room_url: str, token: str):
transport = DailyTransport(
room_url,
token,
"bot",
DailyParams(
audio_in_enabled=True,
audio_out_enabled=True,
transcription_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
),
)
tts = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY", ""), voice_id="71a7ad14-091c-4e8e-a314-022ece01c121"
)
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
},
]
context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)
pipeline = Pipeline(
[
transport.input(),
context_aggregator.user(),
llm,
tts,
transport.output(),
context_aggregator.assistant(),
]
)
task = PipelineTask(
pipeline,
params=PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
report_only_initial_ttfb=True,
),
)
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
runner = PipelineRunner()
await runner.run(task)
def _voice_bot_process(room_url: str, token: str):
asyncio.run(main(room_url, token))

View File

@@ -0,0 +1 @@
node_modules

View File

@@ -0,0 +1,29 @@
# JavaScript Implementation
Basic implementation using the [Pipecat JavaScript SDK](https://docs.pipecat.ai/client/js/introduction).
## Setup
1. Deploy the Modal server. See the main [README](../../README).
2. Navigate to the `client/javascript` directory:
```bash
cd client/javascript
```
3. Modify the baseUrl in src/app.js to point to your deployed Modal endpoint
4. Install dependencies:
```bash
npm install
```
5. Run the client app:
```
npm run dev
```
6. Visit http://localhost:5173 in your browser.

View File

@@ -0,0 +1,49 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>AI Chatbot</title>
</head>
<body>
<div class="container">
<div class="status-bar">
<div class="status">
Status: <span id="connection-status">Disconnected</span>
</div>
<div class="controls">
<select id="bot-selector">
<option value="openai">OpenAI</option>
<option value="gemini">Gemini</option>
<option value="vllm">Llama</option>
</select>
<button id="connect-btn">Connect</button>
<button id="disconnect-btn" disabled>Disconnect</button>
</div>
</div>
<div class="main-content">
<div class="bot-container">
<div id="bot-video-container"></div>
<audio id="bot-audio" autoplay></audio>
</div>
</div>
<div class="device-bar">
<div class="device-controls">
<select id="device-selector"></select>
<button id="mic-toggle-btn">Mute Mic</button>
</div>
</div>
<div class="debug-panel">
<h3>Debug Info</h3>
<div id="debug-log"></div>
</div>
</div>
<script type="module" src="/src/app.js"></script>
<link rel="stylesheet" href="/src/style.css" />
</body>
</html>

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,21 @@
{
"name": "client",
"version": "1.0.0",
"main": "index.js",
"scripts": {
"dev": "vite",
"build": "vite build",
"preview": "vite preview"
},
"keywords": [],
"author": "",
"license": "ISC",
"description": "",
"devDependencies": {
"vite": "^6.3.5"
},
"dependencies": {
"@pipecat-ai/client-js": "^0.3.5",
"@pipecat-ai/daily-transport": "^0.3.10"
}
}

View File

@@ -0,0 +1,381 @@
/**
* Copyright (c) 20242025, Daily
*
* SPDX-License-Identifier: BSD 2-Clause License
*/
/**
* RTVI Client Implementation
*
* This client connects to an RTVI-compatible bot server using WebRTC (via Daily).
* It handles audio/video streaming and manages the connection lifecycle.
*
* Requirements:
* - A running RTVI bot server (defaults to http://localhost:7860)
* - The server must implement the /connect endpoint that returns Daily.co room credentials
* - Browser with WebRTC support
*/
import { RTVIClient, RTVIEvent } from '@pipecat-ai/client-js';
import { DailyTransport } from '@pipecat-ai/daily-transport';
/**
* ChatbotClient handles the connection and media management for a real-time
* voice and video interaction with an AI bot.
*/
class ChatbotClient {
constructor() {
// Initialize client state
this.rtviClient = null;
this.setupDOMElements();
this.initializeClientAndTransport();
this.setupEventListeners();
}
/**
* Set up references to DOM elements and create necessary media elements
*/
setupDOMElements() {
// Get references to UI control elements
this.connectBtn = document.getElementById('connect-btn');
this.disconnectBtn = document.getElementById('disconnect-btn');
this.statusSpan = document.getElementById('connection-status');
this.debugLog = document.getElementById('debug-log');
this.botVideoContainer = document.getElementById('bot-video-container');
this.deviceSelector = document.getElementById('device-selector');
// Create an audio element for bot's voice output
this.botAudio = document.createElement('audio');
this.botAudio.autoplay = true;
this.botAudio.playsInline = true;
document.body.appendChild(this.botAudio);
}
/**
* Set up event listeners for connect/disconnect buttons
*/
setupEventListeners() {
this.connectBtn.addEventListener('click', () => this.connect());
this.disconnectBtn.addEventListener('click', () => this.disconnect());
// Populate device selector
this.rtviClient.getAllMics().then((mics) => {
console.log('Available mics:', mics);
mics.forEach((device) => {
const option = document.createElement('option');
option.value = device.deviceId;
option.textContent = device.label || `Microphone ${device.deviceId}`;
this.deviceSelector.appendChild(option);
});
});
this.deviceSelector.addEventListener('change', (event) => {
const selectedDeviceId = event.target.value;
console.log('Selected device ID:', selectedDeviceId);
this.rtviClient.updateMic(selectedDeviceId);
});
// Handle mic mute/unmute toggle
const micToggleBtn = document.getElementById('mic-toggle-btn');
micToggleBtn.addEventListener('click', () => {
let micEnabled = this.rtviClient.isMicEnabled;
micToggleBtn.textContent = micEnabled ? 'Unmute Mic' : 'Mute Mic';
this.rtviClient.enableMic(!micEnabled);
// Add logic to mute/unmute the mic
if (micEnabled) {
console.log('Mic muted');
// Add code to mute the mic
} else {
console.log('Mic unmuted');
// Add code to unmute the mic
}
});
}
/**
* Set up the RTVI client and Daily transport
*/
async initializeClientAndTransport() {
// Initialize the RTVI client with a DailyTransport and our configuration
this.rtviClient = new RTVIClient({
transport: new DailyTransport(),
params: {
// REPLACE WITH YOUR MODAL URL ENDPOINT
baseUrl:
'https://<Modal workspace>--pipecat-modal-bot-launcher.modal.run',
endpoints: {
connect: '/connect',
},
requestData: {
bot_name: 'openai',
},
},
enableMic: true, // Enable microphone for user input
enableCam: false,
callbacks: {
// Handle connection state changes
onConnected: () => {
this.updateStatus('Connected');
this.connectBtn.disabled = true;
this.disconnectBtn.disabled = false;
this.log('Client connected');
},
onDisconnected: () => {
this.updateStatus('Disconnected');
this.connectBtn.disabled = false;
this.disconnectBtn.disabled = true;
this.log('Client disconnected');
},
// Handle transport state changes
onTransportStateChanged: (state) => {
this.updateStatus(`Transport: ${state}`);
this.log(`Transport state changed: ${state}`);
if (state === 'connecting') {
window.startTime = Date.now();
}
if (state === 'ready') {
this.setupMediaTracks();
console.warn('TIME TO BOT READY:', Date.now() - window.startTime);
}
},
// Handle bot connection events
onBotConnected: (participant) => {
this.log(`Bot connected: ${JSON.stringify(participant)}`);
},
onBotDisconnected: (participant) => {
this.log(`Bot disconnected: ${JSON.stringify(participant)}`);
},
onBotReady: (data) => {
this.log(`Bot ready: ${JSON.stringify(data)}`);
this.setupMediaTracks();
},
// Transcript events
onUserTranscript: (data) => {
// Only log final transcripts
if (data.final) {
this.log(`User: ${data.text}`);
}
},
onBotTranscript: (data) => {
this.log(`Bot: ${data.text}`);
},
// Error handling
onMessageError: (error) => {
console.log('Message error:', error);
},
onMicUpdated: (data) => {
console.log('Mic updated:', data);
this.deviceSelector.value = data.deviceId;
},
onError: (error) => {
console.log('Error:', JSON.stringify(error));
},
},
});
// Set up listeners for media track events
this.setupTrackListeners();
await this.rtviClient.initDevices();
window.client = this.rtviClient;
}
/**
* Add a timestamped message to the debug log
*/
log(message) {
const entry = document.createElement('div');
entry.textContent = `${new Date().toISOString()} - ${message}`;
// Add styling based on message type
if (message.startsWith('User: ')) {
entry.style.color = '#2196F3'; // blue for user
} else if (message.startsWith('Bot: ')) {
entry.style.color = '#4CAF50'; // green for bot
}
this.debugLog.appendChild(entry);
this.debugLog.scrollTop = this.debugLog.scrollHeight;
console.log(message);
}
/**
* Update the connection status display
*/
updateStatus(status) {
this.statusSpan.textContent = status;
this.log(`Status: ${status}`);
}
/**
* Check for available media tracks and set them up if present
* This is called when the bot is ready or when the transport state changes to ready
*/
setupMediaTracks() {
if (!this.rtviClient) return;
// Get current tracks from the client
const tracks = this.rtviClient.tracks();
// Set up any available bot tracks
if (tracks.bot?.audio) {
this.setupAudioTrack(tracks.bot.audio);
}
if (tracks.bot?.video) {
this.setupVideoTrack(tracks.bot.video);
}
}
/**
* Set up listeners for track events (start/stop)
* This handles new tracks being added during the session
*/
setupTrackListeners() {
if (!this.rtviClient) return;
// Listen for new tracks starting
this.rtviClient.on(RTVIEvent.TrackStarted, (track, participant) => {
// Only handle non-local (bot) tracks
if (!participant?.local) {
if (track.kind === 'audio') {
this.setupAudioTrack(track);
} else if (track.kind === 'video') {
this.setupVideoTrack(track);
}
this.log(
`Track started event: ${track.kind} from ${
participant?.name || 'unknown'
}`
);
} else {
this.log('Local mic unmuted');
}
});
// Listen for tracks stopping
this.rtviClient.on(RTVIEvent.TrackStopped, (track, participant) => {
if (participant.local) {
this.log('Local mic muted');
return;
}
this.log(
`Track stopped event: ${track.kind} from ${
participant?.name || 'unknown'
}`
);
});
}
/**
* Set up an audio track for playback
* Handles both initial setup and track updates
*/
setupAudioTrack(track) {
this.log('Setting up audio track');
// Check if we're already playing this track
if (this.botAudio.srcObject) {
const oldTrack = this.botAudio.srcObject.getAudioTracks()[0];
if (oldTrack?.id === track.id) return;
}
// Create a new MediaStream with the track and set it as the audio source
this.botAudio.srcObject = new MediaStream([track]);
}
/**
* Set up a video track for display
* Handles both initial setup and track updates
*/
setupVideoTrack(track) {
this.log('Setting up video track');
const videoEl = document.createElement('video');
videoEl.autoplay = true;
videoEl.playsInline = true;
videoEl.muted = true;
videoEl.style.width = '100%';
videoEl.style.height = '100%';
videoEl.style.objectFit = 'cover';
// Check if we're already displaying this track
if (this.botVideoContainer.querySelector('video')?.srcObject) {
const oldTrack = this.botVideoContainer
.querySelector('video')
.srcObject.getVideoTracks()[0];
if (oldTrack?.id === track.id) return;
}
// Create a new MediaStream with the track and set it as the video source
videoEl.srcObject = new MediaStream([track]);
this.botVideoContainer.innerHTML = '';
this.botVideoContainer.appendChild(videoEl);
}
/**
* Initialize and connect to the bot
* This sets up the RTVI client, initializes devices, and establishes the connection
*/
async connect() {
try {
const botSelector = document.getElementById('bot-selector');
const selectedBot = botSelector.value;
this.rtviClient.params.requestData.bot_name = selectedBot;
// Initialize audio/video devices
this.log('Initializing devices...');
await this.rtviClient.initDevices();
// Connect to the bot
this.log(`Connecting to bot: ${selectedBot}`);
await this.rtviClient.connect();
this.log('Connection complete');
} catch (error) {
// Handle any errors during connection
console.error('Connection error:', error);
this.log(`Error connecting: ${JSON.stringify(error.message)}`);
this.log(`Error stack: ${error.stack}`);
this.updateStatus('Error');
// Clean up if there's an error
if (this.rtviClient) {
try {
await this.rtviClient.disconnect();
} catch (disconnectError) {
this.log(`Error during disconnect: ${disconnectError.message}`);
}
}
}
}
/**
* Disconnect from the bot and clean up media resources
*/
async disconnect() {
if (this.rtviClient) {
try {
// Disconnect the RTVI client
await this.rtviClient.disconnect();
// Clean up audio
if (this.botAudio.srcObject) {
this.botAudio.srcObject.getTracks().forEach((track) => track.stop());
this.botAudio.srcObject = null;
}
// Clean up video
if (this.botVideoContainer.querySelector('video')?.srcObject) {
const video = this.botVideoContainer.querySelector('video');
video.srcObject.getTracks().forEach((track) => track.stop());
video.srcObject = null;
}
this.botVideoContainer.innerHTML = '';
} catch (error) {
this.log(`Error disconnecting: ${error.message}`);
}
}
}
}
// Initialize the client when the page loads
window.addEventListener('DOMContentLoaded', () => {
new ChatbotClient();
});

View File

@@ -0,0 +1,135 @@
body {
margin: 0;
padding: 20px;
font-family: Arial, sans-serif;
background-color: #f0f0f0;
}
.container {
max-width: 1200px;
margin: 0 auto;
}
.status-bar,
.device-bar {
display: flex;
justify-content: space-between;
align-items: center;
padding: 10px;
background-color: #fff;
border-radius: 8px;
margin-bottom: 20px;
}
.controls,
.device-controls {
display: flex;
align-items: center;
gap: 10px; /* Adds spacing between elements */
}
.device-controls {
margin-left: auto;
}
.controls button,
.device-controls button {
padding: 8px 16px;
margin-left: 10px;
border: none;
border-radius: 4px;
cursor: pointer;
}
#bot-selector,
#device-selector {
padding: 8px 16px;
padding-right: 40px;
border: none;
border-radius: 4px;
background-color: #6c757d; /* Gray background */
color: white; /* White text */
cursor: pointer;
appearance: none; /* Removes default browser styling for dropdowns */
background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' fill='white'%3E%3Cpath d='M7 10l5 5 5-5z'/%3E%3C/svg%3E"); /* Custom arrow */
background-repeat: no-repeat;
background-position: right 8px center; /* Position the arrow */
}
#bot-selector:focus,
#device-selector:focus {
outline: none;
box-shadow: 0 0 4px rgba(0, 0, 0, 0.3); /* Add a subtle focus effect */
}
#connect-btn {
background-color: #4caf50;
color: white;
}
#disconnect-btn {
background-color: #f44336;
color: white;
}
#mic-toggle-btn {
}
button:disabled {
opacity: 0.5;
cursor: not-allowed;
}
.main-content {
background-color: #fff;
border-radius: 8px;
padding: 20px;
margin-bottom: 20px;
}
.bot-container {
display: flex;
flex-direction: column;
align-items: center;
}
#bot-video-container {
width: 640px;
height: 360px;
background-color: #e0e0e0;
border-radius: 8px;
margin: 20px auto;
overflow: hidden;
display: flex;
align-items: center;
justify-content: center;
}
#bot-video-container video {
width: 100%;
height: 100%;
object-fit: cover;
}
.debug-panel {
background-color: #fff;
border-radius: 8px;
padding: 20px;
}
.debug-panel h3 {
margin: 0 0 10px 0;
font-size: 16px;
font-weight: bold;
}
#debug-log {
height: 200px;
overflow-y: auto;
background-color: #f8f8f8;
padding: 10px;
border-radius: 4px;
font-family: monospace;
font-size: 12px;
line-height: 1.4;
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 MiB

View File

@@ -1,3 +0,0 @@
DAILY_API_KEY=
OPENAI_API_KEY=
CARTESIA_API_KEY=

View File

@@ -1,4 +0,0 @@
python-dotenv==1.0.1
modal==0.71.3
pipecat-ai[daily,silero,cartesia,openai]
fastapi==0.115.6

View File

@@ -0,0 +1,307 @@
"""modal_example.
This module shows a simple example of how to deploy a bot using Modal and FastAPI.
It includes:
- FastAPI endpoints for starting agents and checking bot statuses.
- Dynamic loading of bot implementations.
- Use of a Daily transport for bot communication.
"""
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import importlib
import os
from contextlib import asynccontextmanager
from typing import Any, Dict, Literal
import aiohttp
import modal
from fastapi import APIRouter, FastAPI, HTTPException
from fastapi.responses import JSONResponse, RedirectResponse
from pydantic import BaseModel
# container specifications for the FastAPI web server
web_image = (
modal.Image.debian_slim(python_version="3.13")
.pip_install_from_requirements("requirements.txt")
.pip_install("pipecat-ai[daily]")
.add_local_dir("src", remote_path="/root/src")
)
# container specifications for the Pipecat pipeline
bot_image = (
modal.Image.debian_slim(python_version="3.13")
.apt_install("ffmpeg")
.pip_install_from_requirements("requirements.txt")
.pip_install("pipecat-ai[daily,elevenlabs,openai,silero,google]")
.add_local_dir("src", remote_path="/root/src")
)
app = modal.App("pipecat-modal", secrets=[modal.Secret.from_dotenv()])
router = APIRouter()
bot_jobs = {}
daily_helpers = {}
# Names of all supported bot implementations
# These correspond to the bot files in the src directory
BotName = Literal["openai", "gemini", "vllm"]
def cleanup():
"""Cleanup function to terminate all bot processes.
Called during server shutdown.
"""
for entry in bot_jobs.values():
func = modal.FunctionCall.from_id(entry[0])
if func:
func.cancel()
def get_bot_file(bot_name: BotName) -> str:
"""Retrieve the bot file name corresponding to the provided bot_name.
Args:
bot_name (BotName): The name of the bot (e.g., 'openai', 'gemini', 'vllm').
Returns:
str: The file name corresponding to the bot implementation.
Raises:
ValueError: If the bot name is invalid or not supported.
"""
# bot_implementation = os.getenv("BOT_IMPLEMENTATION", "openai").lower().strip()
bot_implementation = bot_name.lower().strip()
if not bot_implementation:
bot_implementation = "openai"
if bot_implementation not in ["openai", "gemini", "vllm"]:
raise ValueError(
f"Invalid BOT_IMPLEMENTATION: {bot_implementation}. Must be 'openai' or 'gemini' or 'vllm'"
)
return f"bot_{bot_implementation}"
def get_runner(path: str, bot_file: str) -> callable:
"""Dynamically import the run_bot function based on the bot name.
Args:
path (str): The path to the bot files (e.g., 'src').
bot_file (str): The file name of the bot implementation (e.g., 'openai', 'gemini', 'vllm').
Returns:
function: The run_bot function from the specified bot module.
Raises:
ImportError: If the specified bot module or run_bot function is not found.
"""
try:
# Dynamically construct the module name
module_name = f"{path}.{bot_file}"
# Import the module
module = importlib.import_module(module_name)
# Get the run_bot function from the module
return getattr(module, "run_bot")
except (ImportError, AttributeError) as e:
raise ImportError(f"Failed to import run_bot from {module_name}: {e}")
async def create_room_and_token() -> tuple[str, str]:
"""Create a Daily room and generate an authentication token.
This function checks for existing room URL and token in the environment variables.
If not found, it creates a new room using the Daily API and generates a token for it.
Returns:
tuple[str, str]: A tuple containing the room URL and the authentication token.
Raises:
HTTPException: If room creation or token generation fails.
"""
from pipecat.transports.services.helpers.daily_rest import DailyRoomParams
room_url = os.getenv("DAILY_SAMPLE_ROOM_URL", None)
token = os.getenv("DAILY_SAMPLE_ROOM_TOKEN", None)
if not room_url:
room = await daily_helpers["rest"].create_room(DailyRoomParams())
if not room.url:
raise HTTPException(status_code=500, detail="Failed to create room")
room_url = room.url
token = await daily_helpers["rest"].get_token(room_url)
if not token:
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room_url}")
return room_url, token
@app.function(image=bot_image, min_containers=1)
async def bot_runner(room_url, token, bot_name: BotName = "openai"):
"""Launch the provided bot process, providing the given room URL and token for the bot to join.
Args:
room_url (str): The URL of the Daily room where the bot and client will communicate.
token (str): The authentication token for the room.
bot_name (BotName): The name of the bot implementation to use. Defaults to "openai".
Raises:
HTTPException: If the bot pipeline fails to start.
"""
try:
path = "src"
bot_file = get_bot_file(bot_name)
run_bot = get_runner(path, bot_file)
print(f"Starting bot process: {bot_file} -u {room_url} -t {token}")
await run_bot(room_url, token)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to start bot pipeline: {e}")
@asynccontextmanager
async def lifespan(app: FastAPI):
"""FastAPI lifespan manager that handles startup and shutdown tasks.
- Creates aiohttp session
- Initializes Daily API helper
- Cleans up resources on shutdown
"""
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
aiohttp_session = aiohttp.ClientSession()
daily_helpers["rest"] = DailyRESTHelper(
daily_api_key=os.getenv("DAILY_API_KEY", ""),
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
aiohttp_session=aiohttp_session,
)
yield
await aiohttp_session.close()
cleanup()
class ConnectData(BaseModel):
"""Data provided by client to specify the bot pipeline.
Attributes:
bot_name (BotName): The name of the bot to connect to. Defaults to "openai".
"""
bot_name: BotName = "openai"
async def start(data: ConnectData):
"""Internal method to start a bot agent and return the room URL and token.
Args:
data (ConnectData): The data containing the bot name to use.
Returns:
tuple[str, str]: A tuple containing the room URL and token.
"""
room_url, token = await create_room_and_token()
launch_bot_func = modal.Function.from_name("pipecat-modal", "bot_runner")
function_id = launch_bot_func.spawn(room_url, token, data.bot_name)
bot_jobs[function_id] = (function_id, room_url)
return room_url, token
@router.get("/")
async def start_agent():
"""A user endpoint for launching a bot agent and redirecting to the created room URL.
This function retrieves the bot implementation from the environment,
starts the bot agent, and redirects the user to the room URL to
interact with the bot through a Daily Prebuilt Interface.
Returns:
RedirectResponse: A response that redirects to the room URL.
"""
bot_name = os.getenv("BOT_IMPLEMENTATION", "openai").lower().strip()
print(f"Starting bot: {bot_name}")
room_url, token = await start(ConnectData(bot_name=bot_name))
return RedirectResponse(room_url)
@router.post("/connect")
async def rtvi_connect(data: ConnectData) -> Dict[Any, Any]:
"""A user endpoint for launching a bot agent and retrieving the room/token credentials.
This function retrieves the bot implementation from the request, if provided,
starts the bot agent, and returns the room URL and token for the bot. This allows the
client to then connect to the bot using their own RTVI interface.
Args:
data (ConnectData): Optional. The data containing the bot name to use.
Returns:
Dict[Any, Any]: A dictionary containing the room URL and token.
"""
print(f"Starting bot: {data.bot_name}")
if data is None or not data.bot_name:
data.bot_name = os.getenv("BOT_IMPLEMENTATION", "openai").lower().strip()
room_url, token = await start(data)
return {"room_url": room_url, "token": token}
@router.get("/status/{fid}")
def get_status(fid: str):
"""Retrieve the status of a bot process by its function ID.
Args:
fid (str): The function ID of the bot process.
Returns:
JSONResponse: A JSON response containing the bot's status and result code.
Raises:
HTTPException: If the bot process with the given ID is not found.
"""
func = modal.FunctionCall.from_id(fid)
if not func:
raise HTTPException(status_code=404, detail=f"Bot with process id: {fid} not found")
try:
result = func.get(timeout=0)
return JSONResponse({"bot_id": fid, "status": "finished", "code": result})
except modal.exception.OutputExpiredError:
return JSONResponse({"bot_id": fid, "status": "finished", "code": 404})
except TimeoutError:
return JSONResponse({"bot_id": fid, "status": "running", "code": 202})
@app.function(image=web_image, min_containers=1)
@modal.concurrent(max_inputs=1)
@modal.asgi_app()
def fastapi_app():
"""Create and configure the FastAPI application.
This function initializes the FastAPI app with middleware, routes, and lifespan management.
It is decorated to be used as a Modal ASGI app.
"""
from fastapi.middleware.cors import CORSMiddleware
# Initialize FastAPI app
web_app = FastAPI(lifespan=lifespan)
web_app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Include the endpoints from endpoints.py
web_app.include_router(router)
return web_app

View File

@@ -0,0 +1,14 @@
DAILY_API_KEY=
# determines which bot file to default to: 'openai', 'gemini', or 'vllm'
BOT_IMPLEMENTATION=openai
# needed for the openai bot pipeline
OPENAI_API_KEY=
ELEVENLABS_API_KEY=
# needed for the gemini live bot pipeline
GOOGLE_API_KEY=
# needed if you modified the API Key for your self-hosted LLM
VLLM_API_KEY=

View File

@@ -0,0 +1,2 @@
python-dotenv==1.0.1
modal==0.71.3

Binary file not shown.

After

Width:  |  Height:  |  Size: 759 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 884 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 876 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 881 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 866 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 874 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 882 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 885 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 888 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 890 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 898 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 836 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 903 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 908 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 908 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 905 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 903 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 866 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 849 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 866 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 866 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 864 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 858 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 875 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 881 KiB

View File

@@ -0,0 +1,198 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
"""Gemini Bot Implementation.
This module implements a chatbot using Google's Gemini Multimodal Live model.
It includes:
- Real-time audio/video interaction through Daily
- Animated robot avatar
- Speech-to-speech model
The bot runs as part of a pipeline that processes audio/video frames and manages
the conversation flow using Gemini's streaming capabilities.
"""
import os
import sys
from dotenv import load_dotenv
from loguru import logger
from PIL import Image
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.audio.vad.vad_analyzer import VADParams
from pipecat.frames.frames import (
BotStartedSpeakingFrame,
BotStoppedSpeakingFrame,
Frame,
OutputImageRawFrame,
SpriteFrame,
)
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.processors.frameworks.rtvi import RTVIConfig, RTVIObserver, RTVIProcessor
from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
try:
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
except ValueError:
# Handle the case where logger is already initialized
pass
sprites = []
script_dir = os.path.dirname(__file__)
for i in range(1, 26):
# Build the full path to the image file
full_path = os.path.join(script_dir, f"assets/robot0{i}.png")
# Get the filename without the extension to use as the dictionary key
# Open the image and convert it to bytes
with Image.open(full_path) as img:
sprites.append(OutputImageRawFrame(image=img.tobytes(), size=img.size, format=img.format))
# Create a smooth animation by adding reversed frames
flipped = sprites[::-1]
sprites.extend(flipped)
# Define static and animated states
quiet_frame = sprites[0] # Static frame for when bot is listening
talking_frame = SpriteFrame(images=sprites) # Animation sequence for when bot is talking
class TalkingAnimation(FrameProcessor):
"""Manages the bot's visual animation states.
Switches between static (listening) and animated (talking) states based on
the bot's current speaking status.
"""
def __init__(self):
super().__init__()
self._is_talking = False
async def process_frame(self, frame: Frame, direction: FrameDirection):
"""Process incoming frames and update animation state.
Args:
frame: The incoming frame to process
direction: The direction of frame flow in the pipeline
"""
await super().process_frame(frame, direction)
# Switch to talking animation when bot starts speaking
if isinstance(frame, BotStartedSpeakingFrame):
if not self._is_talking:
await self.push_frame(talking_frame)
self._is_talking = True
# Return to static frame when bot stops speaking
elif isinstance(frame, BotStoppedSpeakingFrame):
await self.push_frame(quiet_frame)
self._is_talking = False
await self.push_frame(frame, direction)
async def run_bot(room_url: str, token: str):
"""Main bot execution function.
Sets up and runs the bot pipeline including:
- Daily video transport with specific audio parameters
- Gemini Live multimodal model integration
- Voice activity detection
- Animation processing
- RTVI event handling
"""
# Set up Daily transport with specific audio/video parameters for Gemini
transport = DailyTransport(
room_url,
token,
"Chatbot",
DailyParams(
audio_out_enabled=True,
camera_out_enabled=True,
camera_out_width=1024,
camera_out_height=576,
vad_enabled=True,
vad_audio_passthrough=True,
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)),
),
)
# Initialize the Gemini Multimodal Live model
llm = GeminiMultimodalLiveLLMService(
api_key=os.getenv("GOOGLE_API_KEY"),
voice_id="Puck", # Aoede, Charon, Fenrir, Kore, Puck
transcribe_user_audio=True,
)
messages = [
{
"role": "user",
"content": "You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by introducing yourself.",
},
]
# Set up conversation context and management
# The context_aggregator will automatically collect conversation context
context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)
ta = TalkingAnimation()
#
# RTVI events for Pipecat client UI
#
rtvi = RTVIProcessor(config=RTVIConfig(config=[]))
pipeline = Pipeline(
[
transport.input(),
rtvi,
context_aggregator.user(),
llm,
ta,
transport.output(),
context_aggregator.assistant(),
]
)
task = PipelineTask(
pipeline,
params=PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
),
observers=[RTVIObserver(rtvi)],
)
await task.queue_frame(quiet_frame)
@rtvi.event_handler("on_client_ready")
async def on_client_ready(rtvi):
await rtvi.set_bot_ready()
# Kick off the conversation
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
print(f"Participant left: {participant}")
await task.cancel()
runner = PipelineRunner()
await runner.run(task)

View File

@@ -0,0 +1,226 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
"""OpenAI Bot Implementation.
This module implements a chatbot using OpenAI's GPT-4 model for natural language
processing. It includes:
- Real-time audio/video interaction through Daily
- Animated robot avatar
- Text-to-speech using ElevenLabs
- Support for both English and Spanish
The bot runs as part of a pipeline that processes audio/video frames and manages
the conversation flow.
"""
import os
import sys
from dotenv import load_dotenv
from loguru import logger
from PIL import Image
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import (
BotStartedSpeakingFrame,
BotStoppedSpeakingFrame,
Frame,
OutputImageRawFrame,
SpriteFrame,
)
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.processors.frameworks.rtvi import RTVIConfig, RTVIObserver, RTVIProcessor
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
try:
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
except ValueError:
# Handle the case where logger is already initialized
pass
sprites = []
script_dir = os.path.dirname(__file__)
# Load sequential animation frames
for i in range(1, 26):
# Build the full path to the image file
full_path = os.path.join(script_dir, f"assets/robot0{i}.png")
# Get the filename without the extension to use as the dictionary key
# Open the image and convert it to bytes
with Image.open(full_path) as img:
sprites.append(OutputImageRawFrame(image=img.tobytes(), size=img.size, format=img.format))
# Create a smooth animation by adding reversed frames
flipped = sprites[::-1]
sprites.extend(flipped)
# Define static and animated states
quiet_frame = sprites[0] # Static frame for when bot is listening
talking_frame = SpriteFrame(images=sprites) # Animation sequence for when bot is talking
class TalkingAnimation(FrameProcessor):
"""Manages the bot's visual animation states.
Switches between static (listening) and animated (talking) states based on
the bot's current speaking status.
"""
def __init__(self):
super().__init__()
self._is_talking = False
async def process_frame(self, frame: Frame, direction: FrameDirection):
"""Process incoming frames and update animation state.
Args:
frame: The incoming frame to process
direction: The direction of frame flow in the pipeline
"""
await super().process_frame(frame, direction)
# Switch to talking animation when bot starts speaking
if isinstance(frame, BotStartedSpeakingFrame):
if not self._is_talking:
await self.push_frame(talking_frame)
self._is_talking = True
# Return to static frame when bot stops speaking
elif isinstance(frame, BotStoppedSpeakingFrame):
await self.push_frame(quiet_frame)
self._is_talking = False
await self.push_frame(frame, direction)
async def run_bot(room_url: str, token: str):
"""Main bot execution function.
Sets up and runs the bot pipeline including:
- Daily video transport
- Speech-to-text and text-to-speech services
- Language model integration
- Animation processing
- RTVI event handling
"""
# Set up Daily transport with video/audio parameters
transport = DailyTransport(
room_url,
token,
"Chatbot",
DailyParams(
audio_out_enabled=True,
camera_out_enabled=True,
camera_out_width=1024,
camera_out_height=576,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
transcription_enabled=True,
#
# Spanish
#
# transcription_settings=DailyTranscriptionSettings(
# language="es",
# tier="nova",
# model="2-general"
# )
),
)
# Initialize text-to-speech service
tts = ElevenLabsTTSService(
api_key=os.getenv("ELEVENLABS_API_KEY"),
#
# English
#
voice_id="SAz9YHcvj6GT2YYXdXww",
#
# Spanish
#
# model="eleven_multilingual_v2",
# voice_id="gD1IexrzCvsXPHUuT0s3",
)
# Initialize LLM service
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
messages = [
{
"role": "system",
#
# English
#
"content": "You are an incessant one-upper. Start by asking the user how their day is going.",
#
# Spanish
#
# "content": "Eres Chatbot, un amigable y útil robot. Tu objetivo es demostrar tus capacidades de una manera breve. Tus respuestas se convertiran a audio así que nunca no debes incluir caracteres especiales. Contesta a lo que el usuario pregunte de una manera creativa, útil y breve. Empieza por presentarte a ti mismo.",
},
]
# Set up conversation context and management
# The context_aggregator will automatically collect conversation context
context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)
ta = TalkingAnimation()
#
# RTVI events for Pipecat client UI
#
rtvi = RTVIProcessor(config=RTVIConfig(config=[]))
pipeline = Pipeline(
[
transport.input(),
rtvi,
context_aggregator.user(),
llm,
tts,
ta,
transport.output(),
context_aggregator.assistant(),
]
)
task = PipelineTask(
pipeline,
params=PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
),
observers=[RTVIObserver(rtvi)],
)
await task.queue_frame(quiet_frame)
@rtvi.event_handler("on_client_ready")
async def on_client_ready(rtvi):
await rtvi.set_bot_ready()
# Kick off the conversation
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
print(f"Participant left: {participant}")
await task.cancel()
runner = PipelineRunner()
await runner.run(task)

View File

@@ -0,0 +1,239 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
"""OpenAI Bot Implementation.
This module implements a chatbot using OpenAI's GPT-4 model for natural language
processing. It includes:
- Real-time audio/video interaction through Daily
- Animated robot avatar
- Text-to-speech using ElevenLabs
- Support for both English and Spanish
The bot runs as part of a pipeline that processes audio/video frames and manages
the conversation flow.
"""
import os
import sys
from typing import List
from dotenv import load_dotenv
from loguru import logger
from openai.types.chat import ChatCompletionMessageParam
from PIL import Image
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import (
BotStartedSpeakingFrame,
BotStoppedSpeakingFrame,
Frame,
OutputImageRawFrame,
SpriteFrame,
)
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.processors.frameworks.rtvi import RTVIConfig, RTVIObserver, RTVIProcessor
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
try:
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
except ValueError:
# Handle the case where logger is already initialized
pass
# REPLACE WITH YOUR MODAL URL ENDPOINT
modal_url = "https://<Modal workspace>--example-vllm-openai-compatible-serve.modal.run"
api_key = os.getenv("VLLM_API_KEY", "super-secret-key")
sprites = []
script_dir = os.path.dirname(__file__)
# Load sequential animation frames
for i in range(1, 26):
# Build the full path to the image file
full_path = os.path.join(script_dir, f"assets/robot0{i}.png")
# Get the filename without the extension to use as the dictionary key
# Open the image and convert it to bytes
with Image.open(full_path) as img:
sprites.append(OutputImageRawFrame(image=img.tobytes(), size=img.size, format=img.format))
# Create a smooth animation by adding reversed frames
flipped = sprites[::-1]
sprites.extend(flipped)
# Define static and animated states
quiet_frame = sprites[0] # Static frame for when bot is listening
talking_frame = SpriteFrame(images=sprites) # Animation sequence for when bot is talking
class TalkingAnimation(FrameProcessor):
"""Manages the bot's visual animation states.
Switches between static (listening) and animated (talking) states based on
the bot's current speaking status.
"""
def __init__(self):
super().__init__()
self._is_talking = False
async def process_frame(self, frame: Frame, direction: FrameDirection):
"""Process incoming frames and update animation state.
Args:
frame: The incoming frame to process
direction: The direction of frame flow in the pipeline
"""
await super().process_frame(frame, direction)
# Switch to talking animation when bot starts speaking
if isinstance(frame, BotStartedSpeakingFrame):
if not self._is_talking:
await self.push_frame(talking_frame)
self._is_talking = True
# Return to static frame when bot stops speaking
elif isinstance(frame, BotStoppedSpeakingFrame):
await self.push_frame(quiet_frame)
self._is_talking = False
await self.push_frame(frame, direction)
async def run_bot(room_url: str, token: str):
"""Main bot execution function.
Sets up and runs the bot pipeline including:
- Daily video transport
- Speech-to-text and text-to-speech services
- Language model integration
- Animation processing
- RTVI event handling
"""
# Set up Daily transport with video/audio parameters
transport = DailyTransport(
room_url,
token,
"Chatbot",
DailyParams(
audio_out_enabled=True,
camera_out_enabled=True,
camera_out_width=1024,
camera_out_height=576,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
transcription_enabled=True,
#
# Spanish
#
# transcription_settings=DailyTranscriptionSettings(
# language="es",
# tier="nova",
# model="2-general"
# )
),
)
# Initialize text-to-speech service
tts = ElevenLabsTTSService(
api_key=os.getenv("ELEVENLABS_API_KEY"),
#
# English
#
voice_id="D38z5RcWu1voky8WS1ja",
#
# Spanish
#
# model="eleven_multilingual_v2",
# voice_id="gD1IexrzCvsXPHUuT0s3",
)
# Initialize LLM service
llm = OpenAILLMService(
# To use OpenAI
api_key=api_key,
# Or, to use a local vLLM (or similar) api server
model="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
base_url=f"{modal_url}/v1",
)
messages = [
{
"role": "system",
#
# English
#
"content": "You are a salesman for Modal, the cloud-native serverless Python computing platform.",
#
# Spanish
#
# "content": "Eres Chatbot, un amigable y útil robot. Tu objetivo es demostrar tus capacidades de una manera breve. Tus respuestas se convertiran a audio así que nunca no debes incluir caracteres especiales. Contesta a lo que el usuario pregunte de una manera creativa, útil y breve. Empieza por presentarte a ti mismo.",
},
]
# Set up conversation context and management
# The context_aggregator will automatically collect conversation context
context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)
ta = TalkingAnimation()
#
# RTVI events for Pipecat client UI
#
rtvi = RTVIProcessor(config=RTVIConfig(config=[]))
pipeline = Pipeline(
[
transport.input(),
rtvi,
context_aggregator.user(),
llm,
tts,
ta,
transport.output(),
context_aggregator.assistant(),
]
)
task = PipelineTask(
pipeline,
params=PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
),
observers=[RTVIObserver(rtvi)],
)
await task.queue_frame(quiet_frame)
@rtvi.event_handler("on_client_ready")
async def on_client_ready(rtvi):
await rtvi.set_bot_ready()
# Kick off the conversation
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
print(f"Participant left: {participant}")
await task.cancel()
runner = PipelineRunner()
await runner.run(task)

View File

@@ -0,0 +1,84 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import asyncio
import importlib
import os
def get_bot_file(arg_bot: str | None) -> str:
bot_implementation = arg_bot or os.getenv("BOT_IMPLEMENTATION", "openai").lower().strip()
if not bot_implementation:
bot_implementation = "openai"
if bot_implementation not in ["openai", "gemini", "vllm"]:
raise ValueError(
f"Invalid BOT_IMPLEMENTATION: {bot_implementation}. Must be 'openai' or 'gemini'"
)
return f"bot_{bot_implementation}"
def get_runner(bot_file: str):
"""Dynamically import the run_bot function based on the bot name.
Args:
bot_name (str): The name of the bot implementation (e.g., 'openai', 'gemini').
Returns:
function: The run_bot function from the specified bot module.
Raises:
ImportError: If the specified bot module or run_bot function is not found.
"""
try:
# Dynamically construct the module name
module_name = f"{bot_file}"
# Import the module
module = importlib.import_module(module_name)
# Get the run_bot function from the module
return getattr(module, "run_bot")
except (ImportError, AttributeError) as e:
raise ImportError(f"Failed to import run_bot from {module_name}: {e}")
def main():
"""Parse the args to launch the appropriate bot using the given room/token."""
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
parser.add_argument(
"-u", "--url", type=str, required=False, help="URL of the Daily room to join"
)
parser.add_argument(
"-t",
"--token",
type=str,
required=False,
help="Daily room token",
)
parser.add_argument(
"-b",
"--bot",
type=str,
required=False,
help="Bot runner to use (e.g., openai, gemini)",
)
args, unknown = parser.parse_known_args()
url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL")
token = args.token or os.getenv("DAILY_SAMPLE_ROOM_TOKEN")
bot_file = get_bot_file(args.bot)
if not url:
raise Exception(
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL."
)
run_bot = get_runner(bot_file)
asyncio.run(run_bot(url, token))
if __name__ == "__main__":
main()

View File

@@ -100,7 +100,28 @@ phone numbers with valid values for your use case.
### Dialin Request
The server will receive a request when a call is received from Daily.
The server will receive a request when a call is received from Daily.
The payload that the webhook received is as follows:
```json
{
// for dial-in from webhook
"To": "+14152251493",
"From": "+14158483432",
"callId": "string-contains-uuid",
"callDomain": "string-contains-uuid",
"sipHeaders": {
"X-My-Custom-Header": "value",
"x-caller": "+1234567890",
"x-called": "+1987654321",
},
}
```
The `To`, `From`, `callId`, `callDomain` fields are converted to
`snake_case` and mapped to `dialin_settings`. In addition, `sipHeader`
contains any custom SIP headers received by Daily on the SIP
interconnect address (`sip_uri`). These are headers sent from
Twilio or other external SIP platforms, for example, to send the
caller's phone number.
### Dialout Request
@@ -158,6 +179,7 @@ curl -X POST http://localhost:3000/api/dial \
"From": "+1987654321",
"callId": "call-uuid-123",
"callDomain": "domain-uuid-456",
"sipHeader": {},
"dialout_settings": [
{
"phoneNumber": "+1234567890",

View File

@@ -39,6 +39,11 @@ class RoomRequest(BaseModel):
None, description="A flag to perform voicemail or answeing-machine detection"
)
call_transfer: Optional[Dict[str, Any]] = Field(None, description="to initiate a call transfer")
sipHeaders: Optional[Dict[str, Any]] = Field(
None,
alias="sip_headers",
description="Custom SIP headers received from the external SIP provider",
)
class Config:
populate_by_name = True
@@ -57,6 +62,14 @@ class RoomRequest(BaseModel):
"callDomain": "string-contains-uuid"
These need to be remapped to dialin_settings
In addition, we may receive in the body that can be
sent to the bot as a custom field, sip_headers
"sipHeaders": {
"X-My-Custom-Header": "value",
"x-caller": "+14158483432",
"x-called": "+14152251493",
},
"dialout_settings": [
{"phoneNumber": "+14158483432", "callerId": "+14152251493"},
{"sipUri": "sip:username@sip.hostname"}
@@ -157,6 +170,7 @@ async def dial(request: RoomRequest, raw_request: Request):
"dialout_settings": request.dialout_settings,
"voicemail_detection": request.voicemail_detection,
"call_transfer": request.call_transfer,
"sip_headers": request.sipHeaders, # passing the SIP headers to the bot
},
}

View File

@@ -65,6 +65,7 @@ export default async function handler(req, res) {
From,
callId,
callDomain,
sipHeaders,
dialout_settings,
voicemail_detection,
call_transfer
@@ -117,6 +118,7 @@ export default async function handler(req, res) {
dialout_settings,
voicemail_detection,
call_transfer,
sip_headers: sipHeaders,
},
};

View File

@@ -0,0 +1,111 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import os
import sys
import aiohttp
from daily_runner import configure
from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.cartesia.tts import CartesiaTTSService
from pipecat.services.deepgram.stt import DeepgramSTTService, Language, LiveOptions
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
"Respond bot",
DailyParams(
audio_in_enabled=True,
audio_in_passthrough=False,
audio_out_enabled=True,
audio_out_sample_rate=16000,
transcription_enabled=False,
vad_analyzer=SileroVADAnalyzer(),
),
)
stt = DeepgramSTTService(
api_key=os.getenv("DEEPGRAM_API_KEY"),
live_options=LiveOptions(language=Language.EN),
)
tts = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
)
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
},
]
context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)
pipeline = Pipeline(
[
transport.input(), # Transport user input
stt,
context_aggregator.user(), # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
context_aggregator.assistant(), # Assistant spoken responses
]
)
task = PipelineTask(
pipeline,
params=PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
report_only_initial_ttfb=True,
),
)
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_audio(participant["id"])
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,119 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import os
from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.cartesia.tts import CartesiaTTSService
from pipecat.services.deepgram.stt import DeepgramSTTService
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.transports.base_transport import TransportParams
from pipecat.transports.network.small_webrtc import SmallWebRTCTransport
from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection
load_dotenv(override=True)
async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace):
logger.info(f"Starting bot")
transport = SmallWebRTCTransport(
webrtc_connection=webrtc_connection,
params=TransportParams(
audio_in_enabled=True,
audio_out_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
),
)
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
tts = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
)
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
},
]
context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)
pipeline = Pipeline(
[
transport.input(), # Transport user input
stt,
context_aggregator.user(), # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
context_aggregator.assistant(), # Assistant spoken responses
]
)
task = PipelineTask(
pipeline,
params=PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
report_only_initial_ttfb=True,
),
)
turn_observer = task.turn_tracking_observer
if turn_observer:
@turn_observer.event_handler("on_turn_started")
async def on_turn_started(observer, turn_number):
logger.info(f"🔄 Turn {turn_number} started")
@turn_observer.event_handler("on_turn_ended")
async def on_turn_ended(observer, turn_number, duration, was_interrupted):
if was_interrupted:
logger.info(f"🔄 Turn {turn_number} interrupted after {duration:.2f}s")
else:
logger.info(f"🏁 Turn {turn_number} completed in {duration:.2f}s")
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
@transport.event_handler("on_client_closed")
async def on_client_closed(transport, client):
logger.info(f"Client closed connection")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
await runner.run(task)
if __name__ == "__main__":
from run import main
main()

View File

@@ -11,18 +11,17 @@ from pathlib import Path
from dotenv import load_dotenv
from loguru import logger
from openai import audio
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import Frame
from pipecat.observers.base_observer import BaseObserver, FramePushed
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.services.cartesia.tts import CartesiaTTSService
from pipecat.services.deepgram.stt import DeepgramSTTService
from pipecat.services.google.llm import GoogleLLMService, LLMSearchResponseFrame
from pipecat.services.llm_service import LLMService
from pipecat.transports.base_transport import TransportParams
from pipecat.transports.network.small_webrtc import SmallWebRTCTransport
from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection
@@ -33,7 +32,7 @@ load_dotenv(override=True)
# Function handlers for the LLM
search_tool = {"google_search_retrieval": {}}
search_tool = {"google_search": {}}
tools = [search_tool]
system_instruction = """
@@ -50,14 +49,22 @@ Start each interaction by asking the user about which place they would like to k
"""
class LLMSearchLoggerProcessor(FrameProcessor):
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
class LLMSearchLoggerObserver(BaseObserver):
async def on_push_frame(self, data: FramePushed):
src = data.source
dst = data.destination
frame = data.frame
timestamp = data.timestamp
if not isinstance(src, LLMService) and not isinstance(dst, LLMService):
return
time_sec = timestamp / 1_000_000_000
arrow = ""
if isinstance(frame, LLMSearchResponseFrame):
print(f"LLMSearchLoggerProcessor: {frame}")
await self.push_frame(frame)
logger.debug(f"🧠 {arrow} {dst} LLM SEARCH RESPONSE FRAME: {frame} at {time_sec:.2f}s")
async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace):
@@ -84,7 +91,6 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac
api_key=os.getenv("GOOGLE_API_KEY"),
system_instruction=system_instruction,
tools=tools,
model="gemini-1.5-flash-002",
)
context = OpenAILLMContext(
@@ -97,22 +103,23 @@ async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespac
)
context_aggregator = llm.create_context_aggregator(context)
llm_search_logger = LLMSearchLoggerProcessor()
pipeline = Pipeline(
[
transport.input(),
stt,
context_aggregator.user(),
llm,
llm_search_logger,
tts,
transport.output(),
context_aggregator.assistant(),
]
)
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
task = PipelineTask(
pipeline,
params=PipelineParams(allow_interruptions=True),
observers=[LLMSearchLoggerObserver()],
)
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):

View File

@@ -4,6 +4,7 @@
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import asyncio
import io
import os
@@ -80,7 +81,7 @@ class UrlToImageProcessor(FrameProcessor):
logger.error(error_msg)
async def run_bot(webrtc_connection: SmallWebRTCConnection):
async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace):
logger.info(f"Starting bot")
transport = SmallWebRTCTransport(

View File

@@ -4,6 +4,7 @@
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import os
import sys
@@ -28,7 +29,7 @@ from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection
load_dotenv(override=True)
async def run_bot(webrtc_connection: SmallWebRTCConnection):
async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace):
logger.info(f"Starting bot")
transport = SmallWebRTCTransport(

View File

@@ -4,6 +4,7 @@
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import asyncio
import io
import os
@@ -81,7 +82,7 @@ class UrlToImageProcessor(FrameProcessor):
logger.error(error_msg)
async def run_bot(webrtc_connection: SmallWebRTCConnection):
async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace):
logger.info(f"Starting bot")
transport = SmallWebRTCTransport(

View File

@@ -16,7 +16,7 @@
"@types/node": "^22.13.1",
"@vitejs/plugin-react-swc": "^3.7.2",
"typescript": "^5.7.3",
"vite": "^6.0.2"
"vite": "^6.3.5"
}
},
"node_modules/@babel/runtime": {
@@ -1370,9 +1370,9 @@
}
},
"node_modules/vite": {
"version": "6.3.3",
"resolved": "https://registry.npmjs.org/vite/-/vite-6.3.3.tgz",
"integrity": "sha512-5nXH+QsELbFKhsEfWLkHrvgRpTdGJzqOZ+utSdmPTvwHmvU6ITTm3xx+mRusihkcI8GeC7lCDyn3kDtiki9scw==",
"version": "6.3.5",
"resolved": "https://registry.npmjs.org/vite/-/vite-6.3.5.tgz",
"integrity": "sha512-cZn6NDFE7wdTpINgs++ZJ4N49W2vRp8LCKrn3Ob1kYNtOo21vfDoaV5GzBfLU4MovSAB8uNRm4jgzVQZ+mBzPQ==",
"dev": true,
"dependencies": {
"esbuild": "^0.25.0",

View File

@@ -15,7 +15,7 @@
"@types/node": "^22.13.1",
"@vitejs/plugin-react-swc": "^3.7.2",
"typescript": "^5.7.3",
"vite": "^6.0.2"
"vite": "^6.3.5"
},
"dependencies": {
"@pipecat-ai/client-js": "^0.3.5",

View File

@@ -13,7 +13,7 @@
"@pipecat-ai/daily-transport": "^0.3.8"
},
"devDependencies": {
"vite": "^6.0.9"
"vite": "^6.3.5"
}
},
"node_modules/@babel/runtime": {
@@ -1114,9 +1114,9 @@
}
},
"node_modules/vite": {
"version": "6.3.3",
"resolved": "https://registry.npmjs.org/vite/-/vite-6.3.3.tgz",
"integrity": "sha512-5nXH+QsELbFKhsEfWLkHrvgRpTdGJzqOZ+utSdmPTvwHmvU6ITTm3xx+mRusihkcI8GeC7lCDyn3kDtiki9scw==",
"version": "6.3.5",
"resolved": "https://registry.npmjs.org/vite/-/vite-6.3.5.tgz",
"integrity": "sha512-cZn6NDFE7wdTpINgs++ZJ4N49W2vRp8LCKrn3Ob1kYNtOo21vfDoaV5GzBfLU4MovSAB8uNRm4jgzVQZ+mBzPQ==",
"dev": true,
"dependencies": {
"esbuild": "^0.25.0",

View File

@@ -12,7 +12,7 @@
"license": "ISC",
"description": "",
"devDependencies": {
"vite": "^6.0.9"
"vite": "^6.3.5"
},
"dependencies": {
"@pipecat-ai/client-js": "^0.3.5",

View File

@@ -102,9 +102,9 @@ async def main():
llm = GoogleLLMService(
api_key=os.getenv("GOOGLE_API_KEY"),
model="gemini-1.5-flash-002",
system_instruction=system_instruction,
tools=tools,
model="gemini-1.5-flash",
)
context = OpenAILLMContext(
@@ -153,7 +153,6 @@ async def main():
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
logger.debug("First participant joined: {}", participant["id"])
await transport.capture_participant_transcription(participant["id"])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):

View File

@@ -0,0 +1,169 @@
# OpenTelemetry Tracing for Pipecat
This demo showcases OpenTelemetry tracing integration for Pipecat services, allowing you to visualize service calls, performance metrics, and dependencies in a Jaeger dashboard.
## Features
- **Hierarchical Tracing**: Track entire conversations, turns, and service calls
- **Service Tracing**: Detailed spans for TTS, STT, and LLM services with rich context
- **TTFB Metrics**: Capture Time To First Byte metrics for latency analysis
- **Usage Statistics**: Track character counts for TTS and token usage for LLMs
- **Flexible Exporters**: Use Jaeger, Zipkin, or any OpenTelemetry-compatible backend
## Trace Structure
Traces are organized hierarchically:
```
Conversation (conversation-uuid)
├── turn-1
│ ├── stt_deepgramsttservice
│ ├── llm_openaillmservice
│ └── tts_cartesiattsservice
└── turn-2
├── stt_deepgramsttservice
├── llm_openaillmservice
└── tts_cartesiattsservice
turn-N
└── ...
```
This organization helps you track conversation-to-conversation and turn-to-turn.
## Setup Instructions
### 1. Start the Jaeger Container
Run Jaeger in Docker to collect and visualize traces:
```bash
docker run -d --name jaeger \
-e COLLECTOR_ZIPKIN_HOST_PORT=:9411 \
-p 16686:16686 \
-p 4317:4317 \
-p 4318:4318 \
jaegertracing/all-in-one:latest
```
### 2. Environment Configuration
Create a `.env` file with your API keys and enable tracing:
```
ENABLE_TRACING=true
OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 # Point to your preferred backend
# OTEL_CONSOLE_EXPORT=true # Set to any value for debug output to console
# Service API keys
DEEPGRAM_API_KEY=your_key_here
CARTESIA_API_KEY=your_key_here
OPENAI_API_KEY=your_key_here
```
### 3. Configure Your Pipeline Task
Enable tracing in your Pipecat application:
```python
# Initialize OpenTelemetry with your chosen exporter
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
exporter = OTLPSpanExporter(
endpoint="http://localhost:4317", # Jaeger OTLP endpoint
insecure=True,
)
setup_tracing(
service_name="pipecat-demo",
exporter=exporter,
console_export=os.getenv("OTEL_CONSOLE_EXPORT", "false").lower() == "true",
)
# Enable tracing in your PipelineTask
task = PipelineTask(
pipeline,
params=PipelineParams(
allow_interruptions=True,
enable_metrics=True, # Required for some service metrics
),
enable_tracing=True, # Enables both turn and conversation tracing
conversation_id="customer-123", # Optional - will auto-generate if not provided
)
```
### 4. Exporter Options
While this demo uses Jaeger, you can configure any OpenTelemetry-compatible exporter:
#### Jaeger (Default for the demo)
```python
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
exporter = OTLPSpanExporter(
endpoint="http://localhost:4317", # Jaeger OTLP endpoint
insecure=True,
)
```
#### Cloud Providers
Many cloud providers offer OpenTelemetry-compatible observability services:
- AWS X-Ray
- Google Cloud Trace
- Azure Monitor
- Datadog APM
See the OpenTelemetry documentation for specific exporter configurations:
https://opentelemetry.io/ecosystem/vendors/
### 5. Install Dependencies
```bash
pip install -r requirements.txt
```
### 6. Run the Demo
```bash
python bot.py
```
### 7. View Traces in Jaeger
Open your browser to [http://localhost:16686](http://localhost:16686) and select the "pipecat-demo" service to view traces.
## Understanding the Traces
- **Conversation Spans**: The top-level span representing an entire conversation
- **Turn Spans**: Child spans of conversations that represent each turn in the dialog
- **Service Spans**: Detailed service operations nested under turns
- **Service Attributes**: Each service includes rich context about its operation:
- **TTS**: Voice ID, character count, service type
- **STT**: Transcription text, language, model
- **LLM**: Messages, tokens used, model, service configuration
- **Metrics**: Performance data like `metrics.ttfb_ms` and processing durations
## How It Works
The tracing system consists of:
1. **TurnTrackingObserver**: Detects conversation turns
2. **TurnTraceObserver**: Creates spans for turns and conversations
3. **Service Decorators**: `@traced_tts`, `@traced_stt`, `@traced_llm` for service-specific tracing
4. **Context Providers**: Share context between different parts of the pipeline
## Troubleshooting
- **No Traces in Jaeger**: Ensure the Docker container is running and the OTLP endpoint is correct
- **Debugging Traces**: Set `OTEL_CONSOLE_EXPORT=true` to print traces to the console for debugging
- **Missing Metrics**: Check that `enable_metrics=True` in PipelineParams
- **Connection Errors**: Verify network connectivity to the Jaeger container
- **Exporter Issues**: Try the Console exporter (`OTEL_CONSOLE_EXPORT=true`) to verify tracing works
- **Other Backends**: If using a different backend, ensure you've configured the correct exporter and endpoint
## References
- [OpenTelemetry Python Documentation](https://opentelemetry-python.readthedocs.io/)
- [Jaeger Documentation](https://www.jaegertracing.io/docs/latest/)

View File

@@ -0,0 +1,159 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import os
from dotenv import load_dotenv
from loguru import logger
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from pipecat.adapters.schemas.function_schema import FunctionSchema
from pipecat.adapters.schemas.tools_schema import ToolsSchema
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import TTSSpeakFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.cartesia.tts import CartesiaTTSService
from pipecat.services.deepgram.stt import DeepgramSTTService
from pipecat.services.llm_service import FunctionCallParams
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.transports.base_transport import TransportParams
from pipecat.transports.network.small_webrtc import SmallWebRTCTransport
from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection
from pipecat.utils.tracing.setup import setup_tracing
load_dotenv(override=True)
IS_TRACING_ENABLED = bool(os.getenv("ENABLE_TRACING"))
# Initialize tracing if enabled
if IS_TRACING_ENABLED:
# Create the exporter
otlp_exporter = OTLPSpanExporter(
endpoint=os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4317"),
insecure=True,
)
# Set up tracing with the exporter
setup_tracing(
service_name="pipecat-demo",
exporter=otlp_exporter,
console_export=bool(os.getenv("OTEL_CONSOLE_EXPORT")),
)
logger.info("OpenTelemetry tracing initialized")
async def fetch_weather_from_api(params: FunctionCallParams):
await params.llm.push_frame(TTSSpeakFrame("Let me check on that."))
await params.result_callback({"conditions": "nice", "temperature": "75"})
async def run_bot(webrtc_connection: SmallWebRTCConnection, _: argparse.Namespace):
logger.info(f"Starting bot")
transport = SmallWebRTCTransport(
webrtc_connection=webrtc_connection,
params=TransportParams(
audio_in_enabled=True,
audio_out_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
),
)
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
tts = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
)
llm = OpenAILLMService(
api_key=os.getenv("OPENAI_API_KEY"), params=OpenAILLMService.InputParams(temperature=0.5)
)
# You can also register a function_name of None to get all functions
# sent to the same callback with an additional function_name parameter.
llm.register_function("get_current_weather", fetch_weather_from_api)
weather_function = FunctionSchema(
name="get_current_weather",
description="Get the current weather",
properties={
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"format": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
"description": "The temperature unit to use. Infer this from the user's location.",
},
},
required=["location", "format"],
)
tools = ToolsSchema(standard_tools=[weather_function])
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
},
]
context = OpenAILLMContext(messages, tools)
context_aggregator = llm.create_context_aggregator(context)
pipeline = Pipeline(
[
transport.input(),
stt,
context_aggregator.user(),
llm,
tts,
transport.output(),
context_aggregator.assistant(),
]
)
task = PipelineTask(
pipeline,
params=PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
),
enable_tracing=IS_TRACING_ENABLED,
# Optionally, add a conversation ID to track the conversation
# conversation_id="8df26cc1-6db0-4a7a-9930-1e037c8f1fa2",
)
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
logger.info(f"Client connected")
# Kick off the conversation.
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
@transport.event_handler("on_client_closed")
async def on_client_closed(transport, client):
logger.info(f"Client closed connection")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
await runner.run(task)
if __name__ == "__main__":
from run import main
main()

View File

@@ -0,0 +1,10 @@
DEEPGRAM_API_KEY=your_deepgram_key
CARTESIA_API_KEY=your_cartesia_key
OPENAI_API_KEY=your_openai_key
# Set to any value to enable tracing
ENABLE_TRACING=true
# OTLP endpoint (defaults to localhost:4317 if not set)
OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317
# Set to any value to enable console output for debugging
# OTEL_CONSOLE_EXPORT=true

View File

@@ -0,0 +1,6 @@
fastapi
uvicorn
python-dotenv
pipecat-ai[webrtc,silero,cartesia,deepgram,openai,tracing]
pipecat-ai-small-webrtc-prebuilt
opentelemetry-exporter-otlp-proto-grpc

View File

@@ -0,0 +1,205 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import asyncio
import importlib.util
import os
import sys
from contextlib import asynccontextmanager
from inspect import iscoroutinefunction, signature
from typing import Any, Callable, Dict, Optional, Tuple
import uvicorn
from dotenv import load_dotenv
from fastapi import BackgroundTasks, FastAPI
from fastapi.responses import RedirectResponse
from loguru import logger
from pipecat_ai_small_webrtc_prebuilt.frontend import SmallWebRTCPrebuiltUI
from pipecat.transports.network.webrtc_connection import IceServer, SmallWebRTCConnection
# Load environment variables
load_dotenv(override=True)
app = FastAPI()
# Store connections by pc_id
pcs_map: Dict[str, SmallWebRTCConnection] = {}
ice_servers = [
IceServer(
urls="stun:stun.l.google.com:19302",
)
]
# Mount the frontend at /
app.mount("/client", SmallWebRTCPrebuiltUI)
# Store program arguments
args: argparse.Namespace = argparse.Namespace()
# Store the bot module and function info
bot_module: Any = None
run_bot_func: Optional[Callable] = None
is_webrtc_bot: bool = True
def import_bot_file(file_path: str) -> Tuple[Any, Callable, bool]:
"""Dynamically import the bot file and determine how to run it.
Returns:
tuple: (module, run_function, is_webrtc_bot)
- module: The imported module
- run_function: Either run_bot or main function
- is_webrtc_bot: True if run_bot function exists and accepts a WebRTC connection
"""
if not os.path.exists(file_path):
raise FileNotFoundError(f"Bot file not found: {file_path}")
# Extract module name without extension
module_name = os.path.splitext(os.path.basename(file_path))[0]
# Load the module
spec = importlib.util.spec_from_file_location(module_name, file_path)
if not spec or not spec.loader:
raise ImportError(f"Could not load spec for {file_path}")
module = importlib.util.module_from_spec(spec)
sys.modules[module_name] = module
spec.loader.exec_module(module)
# Check for run_bot function first
if hasattr(module, "run_bot"):
run_func = module.run_bot
# Check if the function accepts a WebRTC connection
sig = signature(run_func)
is_webrtc = len(sig.parameters) > 0
return module, run_func, is_webrtc
# Fall back to main function
if hasattr(module, "main") and iscoroutinefunction(module.main):
return module, module.main, False
raise AttributeError(f"No run_bot or async main function found in {file_path}")
@app.get("/", include_in_schema=False)
async def root_redirect():
return RedirectResponse(url="/client/")
@app.post("/api/offer")
async def offer(request: dict, background_tasks: BackgroundTasks):
global run_bot_func, is_webrtc_bot
if not run_bot_func:
raise RuntimeError("No bot file has been loaded")
if not is_webrtc_bot:
return {
"error": "This bot doesn't support WebRTC connections, it's running in standalone mode"
}
pc_id = request.get("pc_id")
if pc_id and pc_id in pcs_map:
pipecat_connection = pcs_map[pc_id]
logger.info(f"Reusing existing connection for pc_id: {pc_id}")
await pipecat_connection.renegotiate(
sdp=request["sdp"], type=request["type"], restart_pc=request.get("restart_pc", False)
)
else:
pipecat_connection = SmallWebRTCConnection(ice_servers)
await pipecat_connection.initialize(sdp=request["sdp"], type=request["type"])
@pipecat_connection.event_handler("closed")
async def handle_disconnected(webrtc_connection: SmallWebRTCConnection):
logger.info(f"Discarding peer connection for pc_id: {webrtc_connection.pc_id}")
pcs_map.pop(webrtc_connection.pc_id, None)
# We've already checked that run_bot_func exists
assert run_bot_func is not None
background_tasks.add_task(run_bot_func, pipecat_connection, args)
answer = pipecat_connection.get_answer()
# Updating the peer connection inside the map
pcs_map[answer["pc_id"]] = pipecat_connection
return answer
@asynccontextmanager
async def lifespan(app: FastAPI):
yield # Run app
coros = [pc.close() for pc in pcs_map.values()]
await asyncio.gather(*coros)
pcs_map.clear()
async def run_standalone_bot() -> None:
"""Run a standalone bot that doesn't require WebRTC"""
global run_bot_func
if run_bot_func is not None:
await run_bot_func()
else:
raise RuntimeError("No bot function available to run")
def main(parser: Optional[argparse.ArgumentParser] = None):
global args
if not parser:
parser = argparse.ArgumentParser(description="Pipecat Bot Runner")
parser.add_argument("bot_file", nargs="?", help="Path to the bot file", default=None)
parser.add_argument(
"--host", default="localhost", help="Host for HTTP server (default: localhost)"
)
parser.add_argument(
"--port", type=int, default=7860, help="Port for HTTP server (default: 7860)"
)
parser.add_argument("--verbose", "-v", action="count", default=0)
args = parser.parse_args()
logger.remove(0)
if args.verbose:
logger.add(sys.stderr, level="TRACE")
else:
logger.add(sys.stderr, level="DEBUG")
# Infer the bot file from the caller if not provided explicitly
bot_file = args.bot_file
if bot_file is None:
# Get the __file__ of the script that called main()
import inspect
caller_frame = inspect.stack()[1]
caller_globals = caller_frame.frame.f_globals
bot_file = caller_globals.get("__file__")
if not bot_file:
print("❌ Could not determine the bot file. Pass it explicitly to main().")
sys.exit(1)
# Import the bot file
try:
global run_bot_func, bot_module, is_webrtc_bot
bot_module, run_bot_func, is_webrtc_bot = import_bot_file(bot_file)
logger.info(f"Successfully loaded bot from {bot_file}")
if is_webrtc_bot:
logger.info("Detected WebRTC-compatible bot, starting web server...")
uvicorn.run(app, host=args.host, port=args.port)
else:
logger.info("Detected standalone bot, running directly...")
asyncio.run(run_standalone_bot())
except Exception as e:
logger.error(f"Error loading bot file: {e}")
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -16,7 +16,7 @@
"@types/node": "^22.13.1",
"@vitejs/plugin-react-swc": "^3.7.2",
"typescript": "^5.7.3",
"vite": "^6.0.2"
"vite": "^6.3.5"
}
},
"node_modules/@babel/runtime": {
@@ -1371,9 +1371,9 @@
}
},
"node_modules/vite": {
"version": "6.3.3",
"resolved": "https://registry.npmjs.org/vite/-/vite-6.3.3.tgz",
"integrity": "sha512-5nXH+QsELbFKhsEfWLkHrvgRpTdGJzqOZ+utSdmPTvwHmvU6ITTm3xx+mRusihkcI8GeC7lCDyn3kDtiki9scw==",
"version": "6.3.5",
"resolved": "https://registry.npmjs.org/vite/-/vite-6.3.5.tgz",
"integrity": "sha512-cZn6NDFE7wdTpINgs++ZJ4N49W2vRp8LCKrn3Ob1kYNtOo21vfDoaV5GzBfLU4MovSAB8uNRm4jgzVQZ+mBzPQ==",
"dev": true,
"dependencies": {
"esbuild": "^0.25.0",

View File

@@ -15,7 +15,7 @@
"@types/node": "^22.13.1",
"@vitejs/plugin-react-swc": "^3.7.2",
"typescript": "^5.7.3",
"vite": "^6.0.2"
"vite": "^6.3.5"
},
"dependencies": {
"@pipecat-ai/client-js": "^0.3.2",

View File

@@ -13,7 +13,7 @@
"@pipecat-ai/daily-transport": "^0.3.8"
},
"devDependencies": {
"vite": "^6.0.9"
"vite": "^6.3.5"
}
},
"node_modules/@babel/runtime": {
@@ -1114,9 +1114,9 @@
}
},
"node_modules/vite": {
"version": "6.3.3",
"resolved": "https://registry.npmjs.org/vite/-/vite-6.3.3.tgz",
"integrity": "sha512-5nXH+QsELbFKhsEfWLkHrvgRpTdGJzqOZ+utSdmPTvwHmvU6ITTm3xx+mRusihkcI8GeC7lCDyn3kDtiki9scw==",
"version": "6.3.5",
"resolved": "https://registry.npmjs.org/vite/-/vite-6.3.5.tgz",
"integrity": "sha512-cZn6NDFE7wdTpINgs++ZJ4N49W2vRp8LCKrn3Ob1kYNtOo21vfDoaV5GzBfLU4MovSAB8uNRm4jgzVQZ+mBzPQ==",
"dev": true,
"dependencies": {
"esbuild": "^0.25.0",

View File

@@ -12,7 +12,7 @@
"license": "ISC",
"description": "",
"devDependencies": {
"vite": "^6.0.9"
"vite": "^6.3.5"
},
"dependencies": {
"@pipecat-ai/client-js": "^0.3.5",

View File

@@ -0,0 +1,5 @@
ios
android
node_modules
.expo
.env

View File

@@ -0,0 +1 @@
22.14

View File

@@ -0,0 +1,21 @@
// Disabling the logs from react-native-webrtc
import debug from 'debug';
debug.disable('rn-webrtc:*');
// Ignoring the warnings from react-native-background-timer while they don't fix this issue:
// https://github.com/ocetnik/react-native-background-timer/issues/366
import { LogBox } from 'react-native';
LogBox.ignoreLogs([
"`new NativeEventEmitter()` was called with a non-null argument without the required `addListener` method.",
"`new NativeEventEmitter()` was called with a non-null argument without the required `removeListeners` method."
]);
// Enable debug logs
/*window.localStorage = window.localStorage || {};
window.localStorage.debug = '*';
window.localStorage.getItem = (itemName) => {
console.log('Requesting the localStorage item ', itemName);
return window.localStorage[itemName];
};*/
export { default } from './src/App';

View File

@@ -0,0 +1,89 @@
# React Native implementation
Basic implementation using the [Pipecat RN SDK](https://docs.pipecat.ai/client/react-native/introduction).
## Usage
### Expo requirements
This project cannot be used with an [Expo Go](https://docs.expo.dev/workflow/expo-go/) app because [it requires custom native code](https://docs.expo.io/workflow/customizing/).
When a project requires custom native code or a config plugin, we need to transition from using [Expo Go](https://docs.expo.dev/workflow/expo-go/)
to a [development build](https://docs.expo.dev/development/introduction/).
More details about the custom native code used by this demo can be found in [rn-daily-js-expo-config-plugin](https://github.com/daily-co/rn-daily-js-expo-config-plugin).
### Building remotely
If you do not have experience with Xcode and Android Studio builds or do not have them installed locally on your computer, you will need to follow [this guide from Expo to use EAS Build](https://docs.expo.dev/development/create-development-builds/#create-and-install-eas-build).
### Building locally
You will need to have installed locally on your computer:
- [Xcode](https://developer.apple.com/xcode/) to build for iOS;
- [Android Studio](https://developer.android.com/studio) to build for Android;
#### Install the demo dependencies
```bash
# Use the version of node specified in .nvmrc
nvm i
# Install dependencies
yarn install
# Before a native app can be compiled, the native source code must be generated.
npx expo prebuild
```
#### Running on Android
After plugging in an Android device [configured for debugging](https://developer.android.com/studio/debug/dev-options), run the following command:
```
npm run android
```
#### Running on iOS
First, you'll need to do a one-time setup. This is required to build to a physical device.
If you're familiar with Xcode, open `ios/RNSimpleChatbot.xcworkspace` and, in the target settings, provide a development team registered with Apple.
If you're newer to Xcode, here are some more detailed instructions to get you started.
First, open the project in Xcode. Make sure to specifically select `RNSimpleChatbot.xcworkspace` from `/ios`. The `/ios` directory will have been generated by running `npx expo prebuild` as instructed above. This is also a good time to plug in your iOS device to be sure the following steps are successful.
From the main menu, select `Settings` and then `Accounts`. Click the `+` sign to add an account (e.g. an Apple ID).
![xcode-accounts.png](./docsAssets/xcode-accounts.png)
Once an account is added, perform the following steps:
1. Close `Settings`.
1. Select the folder icon in the top left corner.
1. Select `RNSimpleChatbot` from the side panel
1. Navigate to `Signing & Capabilities` in the top nav bar.
1. Open the "Team" dropdown
1. Select the account added in the previous step.
The "Signing Certificate" section should update accordingly with your account information.
![xcode-signing.png](./docsAssets/xcode-signing.png)
**Troubleshooting common errors:**
- If you see the error `Change your bundle identifier to a unique string to try again`, update the "Bundle Identifier" input in `Signing & Capabilities` to make it unique. This should resolve the error.
- If you see an error that says `Xcode was unable to launch because it has an invalid code signature, inadequate entitlements or its profile has not been explicitly trusted by the user`, you may need to update the settings on your iPhone to enable the required permissions as follows:
1. Open `Settings` on your iPhone
1. Select `General`, then `Device Management`
1. Click `Trust` for DailyPlayground
- You may also be prompted to enter you login keychain password. Be sure to click `Always trust` to avoid the prompt showing multiple times.
After, run the following command:
```
npm run ios
```

View File

@@ -0,0 +1,75 @@
{
"expo": {
"name": "RN Simple Chatbot",
"slug": "simple-chatbot-demo",
"newArchEnabled": false,
"version": "1.0.0",
"orientation": "portrait",
"icon": "./assets/images/pipecat.png",
"userInterfaceStyle": "light",
"splash": {
"image": "./assets/images/splash.png",
"resizeMode": "contain",
"backgroundColor": "#ffffff"
},
"updates": {
"fallbackToCacheTimeout": 0
},
"assetBundlePatterns": [
"**/*"
],
"ios": {
"supportsTablet": true,
"bitcode": false,
"bundleIdentifier": "co.daily.SimpleChatbot",
"infoPlist": {
"UIBackgroundModes": [
"voip"
]
}
},
"android": {
"adaptiveIcon": {
"foregroundImage": "./assets/images/pipecat.png",
"backgroundColor": "#FFFFFF"
},
"package": "co.daily.SimpleChatbot",
"permissions": [
"android.permission.ACCESS_NETWORK_STATE",
"android.permission.BLUETOOTH",
"android.permission.CAMERA",
"android.permission.INTERNET",
"android.permission.MODIFY_AUDIO_SETTINGS",
"android.permission.RECORD_AUDIO",
"android.permission.SYSTEM_ALERT_WINDOW",
"android.permission.WAKE_LOCK",
"android.permission.FOREGROUND_SERVICE",
"android.permission.FOREGROUND_SERVICE_CAMERA",
"android.permission.FOREGROUND_SERVICE_MICROPHONE",
"android.permission.FOREGROUND_SERVICE_MEDIA_PROJECTION",
"android.permission.POST_NOTIFICATIONS"
]
},
"web": {
"favicon": "./assets/images/pipecat.png"
},
"plugins": [
"@config-plugins/react-native-webrtc",
"@daily-co/config-plugin-rn-daily-js",
[
"expo-build-properties",
{
"android": {
"minSdkVersion": 24,
"compileSdkVersion": 35,
"targetSdkVersion": 35,
"buildToolsVersion": "35.0.0"
},
"ios": {
"deploymentTarget": "15.1"
}
}
]
]
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

View File

@@ -0,0 +1,6 @@
module.exports = function(api) {
api.cache(true);
return {
presets: ['babel-preset-expo'],
};
};

Binary file not shown.

After

Width:  |  Height:  |  Size: 177 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 213 KiB

View File

@@ -0,0 +1 @@
EXPO_SIMPLE_CHATBOT_SERVER=http://$YOUR_IP:7860

View File

@@ -0,0 +1,2 @@
const { getDefaultConfig } = require('expo/metro-config');
module.exports = getDefaultConfig(__dirname);

View File

@@ -0,0 +1,44 @@
{
"name": "simple-chatbot-demo",
"version": "1.0.0",
"scripts": {
"start": "expo start --dev-client",
"android": "expo run:android --device",
"ios": "expo run:ios --device",
"web": "expo start --web",
"update": "(cd ../rtvi-client-react-native-daily; yarn build); cp -R ../rtvi-client-react-native-daily/lib/* ./node_modules/react-native-realtime-ai-daily/lib/;"
},
"dependencies": {
"@config-plugins/react-native-webrtc": "^10.0.0",
"@daily-co/config-plugin-rn-daily-js": "0.0.7",
"@daily-co/react-native-daily-js": "^0.76.0",
"@daily-co/react-native-webrtc": "^118.0.3-daily.2",
"@react-native-async-storage/async-storage": "1.24.0",
"@react-navigation/native": "^7.0.14",
"@react-navigation/stack": "^7.1.1",
"expo": "^53.0.7",
"expo-build-properties": "~0.14.6",
"expo-dev-client": "~5.1.8",
"expo-splash-screen": "~0.30.8",
"expo-status-bar": "~2.2.3",
"react": "19.0.0",
"react-native": "0.79.2",
"react-native-background-timer": "^2.4.1",
"react-native-gesture-handler": "^2.25.0",
"react-native-get-random-values": "^1.11.0",
"@pipecat-ai/react-native-daily-transport": "^0.3.5",
"react-native-safe-area-context": "^5.4.0",
"react-native-screens": "^4.10.0",
"react-native-toast-message": "^2.3.0"
},
"devDependencies": {
"@babel/core": "^7.27.1",
"@types/react-native": "^0.73.0",
"typescript": "~5.8.3"
},
"private": true,
"resolutions": {
"@daily-co/react-native-webrtc/debug": "^4.0.0",
"@daily-co/react-native-webrtc/@types/react-native": "^0.73.0"
}
}

View File

@@ -0,0 +1,34 @@
import React from "react"
import { NavigationContainer } from '@react-navigation/native';
import { createStackNavigator } from '@react-navigation/stack';
import PreJoinView from './views/PreJoinView';
import MeetingView from './views/MeetingView';
import { VoiceClientProvider } from './context/VoiceClientContext';
import Toast from 'react-native-toast-message';
import { useVoiceClientNavigation } from './hooks/useVoiceClientNavigation';
const Stack = createStackNavigator();
const NavigationManager: React.FC = () => {
useVoiceClientNavigation(); // This hook now controls the navigation based on the connection state.
return null; // This component doesn't render anything but manages navigation.
};
const App: React.FC = () => {
return (
<VoiceClientProvider>
<NavigationContainer>
<Stack.Navigator initialRouteName="Prejoin">
<Stack.Screen name="Prejoin" component={PreJoinView} options={{ headerShown: false }}/>
<Stack.Screen name="Meeting" component={MeetingView} options={{ headerShown: false }}/>
</Stack.Navigator>
<NavigationManager />
<Toast />
</NavigationContainer>
</VoiceClientProvider>
);
};
export default App;

View File

@@ -0,0 +1,94 @@
import React, { useState, useMemo } from 'react';
import { View, StyleSheet, LayoutChangeEvent, ViewStyle } from 'react-native';
import { MaterialIcons } from '@expo/vector-icons';
import Colors from '../theme/Colors';
import { useVoiceClient } from '../context/VoiceClientContext';
interface MicrophoneViewProps {
style?: ViewStyle; // Optional additional styles for the button container
}
const MicrophoneView: React.FC<MicrophoneViewProps> = ({ style }) => {
const { isMicEnabled, localAudioLevel: audioLevel } = useVoiceClient();
const [dimensions, setDimensions] = useState({ width: 0, height: 0 });
const onLayout = (event: LayoutChangeEvent) => {
const { width, height } = event.nativeEvent.layout;
setDimensions({ width, height });
};
const { width } = dimensions;
const circleSize = useMemo(() => width * 0.9, [width]);
const innerCircleSize = useMemo(() => width * 0.82, [width]);
const audioCircleSize = useMemo(() => audioLevel * width * 0.95, [audioLevel, width]);
return (
<View style={[styles.container, style]} onLayout={onLayout}>
<View
style={[
styles.outerCircle,
{ width: circleSize, height: circleSize, borderRadius: circleSize / 2 },
]}
>
<View
style={[
styles.innerCircle,
{
backgroundColor: !isMicEnabled ? Colors.disabledMic : Colors.backgroundCircle,
width: innerCircleSize,
height: innerCircleSize,
borderRadius: innerCircleSize / 2,
},
]}
/>
{isMicEnabled && (
<View
style={[
styles.audioCircle,
{
width: audioCircleSize,
height: audioCircleSize,
borderRadius: audioCircleSize / 2,
},
]}
/>
)}
<MaterialIcons
name={!isMicEnabled ? "mic-off" : "mic"}
size={width * 0.2}
color="white"
style={styles.micIcon}
/>
</View>
</View>
);
};
const styles = StyleSheet.create({
container: {
justifyContent: 'center',
alignItems: 'center',
} as ViewStyle,
outerCircle: {
borderWidth: 1,
borderColor: Colors.buttonsBorder,
justifyContent: 'center',
alignItems: 'center',
} as ViewStyle,
innerCircle: {
position: 'absolute',
} as ViewStyle,
audioCircle: {
position: 'absolute',
backgroundColor: Colors.micVolume,
opacity: 0.5,
} as ViewStyle,
micIcon: {
position: 'absolute',
},
});
export default MicrophoneView;

View File

@@ -0,0 +1,128 @@
import React, { useEffect, useState } from 'react';
import { LayoutChangeEvent, StyleSheet, Text, View, ViewStyle } from 'react-native';
import { MaterialIcons } from '@expo/vector-icons';
import Colors from '../theme/Colors';
import { useVoiceClient } from '../context/VoiceClientContext';
const dotCount = 5;
const WaveformView: React.FC = () => {
const [audioLevels, setAudioLevels] = useState(Array(dotCount).fill(0));
const [dimensions, setDimensions] = useState({ width: 0, height: 0 });
const { currentState: voiceClientStatus, botReady: isBotReady, remoteAudioLevel: audioLevel } = useVoiceClient();
const onLayout = (event: LayoutChangeEvent) => {
const { width, height } = event.nativeEvent.layout;
setDimensions({ width, height });
};
useEffect(() => {
setAudioLevels((prevLevels) => [...prevLevels.slice(1), audioLevel]);
}, [audioLevel]);
const { width, height } = dimensions;
const circleSize = width * 0.9;
const innerCircleSize = width * 0.82;
const barWidth = (width * 0.5) / dotCount;
return (
<View style={styles.container} onLayout={onLayout}>
<View style={[styles.outerCircle, { width: circleSize, height: circleSize, borderRadius: circleSize / 2 }]}>
<View
style={[
styles.innerCircle,
{
backgroundColor: isBotReady ? Colors.backgroundCircle : Colors.backgroundCircleNotConnected,
width: innerCircleSize,
height: innerCircleSize,
borderRadius: innerCircleSize / 2,
},
]}
>
{isBotReady ? (
audioLevel > 0 ? (
<View style={[styles.waveformContainer, { width: width * 0.5, height: width * 0.5 }]}>
{audioLevels.map((level, index) => (
<View
key={index}
style={[
styles.waveformBar,
{
width: barWidth - 10, // Subtract some margin
height: level * height,
},
]}
/>
))}
</View>
) : (
<View style={[styles.dotContainer, { width: width * 0.5, height: height * 0.5 }]}>
{Array(dotCount)
.fill(0)
.map((_, index) => (
<View key={index} style={[styles.dot, { width: height * 0.1, height: height * 0.1 }]} />
))}
</View>
)
) : (
<View style={styles.notReadyContainer}>
<MaterialIcons name="hourglass-empty" size={32} color="white" />
<Text style={styles.voiceClientStatusText}>{voiceClientStatus}</Text>
</View>
)}
</View>
</View>
</View>
);
};
const styles = StyleSheet.create({
container: {
justifyContent: 'center',
alignItems: 'center',
width: "100%",
} as ViewStyle,
outerCircle: {
borderWidth: 1,
borderColor: 'gray',
justifyContent: 'center',
alignItems: 'center',
} as ViewStyle,
innerCircle: {
justifyContent: 'center',
alignItems: 'center',
position: 'relative',
} as ViewStyle,
waveformContainer: {
flexDirection: 'row',
justifyContent: 'space-between',
alignItems: 'center',
} as ViewStyle,
waveformBar: {
backgroundColor: 'white',
maxHeight: '100%',
borderRadius: 12,
} as ViewStyle,
dotContainer: {
flexDirection: 'row',
justifyContent: 'space-between',
alignItems: 'center',
} as ViewStyle,
dot: {
backgroundColor: 'white',
borderRadius: 50,
} as ViewStyle,
notReadyContainer: {
justifyContent: 'center',
alignItems: 'center',
} as ViewStyle,
voiceClientStatusText: {
color: 'white',
marginTop: 10,
fontSize: 16,
fontWeight: 'bold',
} as ViewStyle,
});
export default WaveformView;

View File

@@ -0,0 +1,229 @@
import React, { createContext, useState, useContext, ReactNode, useCallback, useMemo, useRef, useEffect } from 'react'
import Toast from 'react-native-toast-message'
import { RNDailyTransport } from '@pipecat-ai/react-native-daily-transport'
import { RTVIClient, TransportState, RTVIMessage, Participant } from '@pipecat-ai/client-js'
import { MediaStreamTrack } from '@daily-co/react-native-webrtc'
import { SettingsManager } from '../settings/SettingsManager';
interface VoiceClientContextProps {
voiceClient: RTVIClient | null
inCall: boolean
currentState: string
botReady: boolean
localAudioLevel: number
remoteAudioLevel: number
isMicEnabled: boolean
isCamEnabled: boolean
videoTrack?: MediaStreamTrack
timerCountDown: number
// methods
start: (url: string) => Promise<void>
leave: () => void
toggleMicInput: () => void
toggleCamInput: () => void
}
export const VoiceClientContext = createContext<VoiceClientContextProps | undefined>(undefined)
interface VoiceClientProviderProps {
children: ReactNode
}
export const VoiceClientProvider: React.FC<VoiceClientProviderProps> = ({ children }) => {
const [voiceClient, setVoiceClient] = useState<RTVIClient | null>(null)
const [inCall, setInCall] = useState<boolean>(false)
const [currentState, setCurrentState] = useState<TransportState>("disconnected")
const [botReady, setBotReady] = useState<boolean>(false)
const [isMicEnabled, setIsMicEnabled] = useState<boolean>(false)
const [isCamEnabled, setIsCamEnabled] = useState<boolean>(false)
const [videoTrack, setVideoTrack] = useState<MediaStreamTrack>()
const [localAudioLevel, setLocalAudioLevel] = useState<number>(0)
const [remoteAudioLevel, setRemoteAudioLevel] = useState<number>(0)
const [timerCountDown, setTimerCountDown] = useState<number>(0)
const botSpeakingRef = useRef(false)
let meetingTimer: NodeJS.Timeout | null
const createVoiceClient = useCallback((url: string): RTVIClient => {
return new RTVIClient({
transport: new RNDailyTransport(),
params: {
baseUrl: url,
endpoints: {
connect: "/connect"
}
},
enableMic: true,
enableCam: false
})
}, [])
const handleError = useCallback((error: any) => {
console.log("Error occurred:", error)
const errorMessage = error.message || error.data?.error || "An unexpected error occurred"
Toast.show({
type: 'error',
text1: errorMessage,
})
}, [])
const setupListeners = useCallback((voiceClient: RTVIClient): void => {
const inCallStates = new Set(["authenticating", "connecting", "connected", "ready"])
voiceClient
.on("transportStateChanged", (state: TransportState) => {
setCurrentState(voiceClient.state)
setInCall(inCallStates.has(state))
})
.on("error", (error: RTVIMessage) => {
handleError(error)
})
.on("botReady", () => {
setBotReady(true)
let expirationTime = voiceClient.transportExpiry
if (expirationTime) {
startTimer(expirationTime)
}
})
.on("disconnected", () => {
setBotReady(false)
stopTimer()
setIsMicEnabled(false)
setIsCamEnabled(false)
})
.on("localAudioLevel", (level: number) => {
setLocalAudioLevel(level)
})
.on("remoteAudioLevel", (level: number) => {
if (botSpeakingRef.current) {
setRemoteAudioLevel(level)
}
})
.on("userStartedSpeaking", () => {
// nothing to do here
})
.on("userStoppedSpeaking", () => {
// nothing to do here
})
.on("botStartedSpeaking", () => {
botSpeakingRef.current = true
})
.on("botStoppedSpeaking", () => {
botSpeakingRef.current = false
setRemoteAudioLevel(0)
})
.on("connected", () => {
setIsMicEnabled(voiceClient.isMicEnabled)
setIsCamEnabled(voiceClient.isCamEnabled)
})
.on("trackStarted", (track: MediaStreamTrack, p?: Participant) => {
if (p?.local && track.kind === 'video'){
setVideoTrack(track)
}
})
}, [handleError])
const start = useCallback(async (url: string): Promise<void> => {
const client = createVoiceClient(url)
setVoiceClient(client)
setupListeners(client)
try {
await client.connect()
// updating the preferences
const newSettings = await SettingsManager.getSettings();
newSettings.backendURL = url
await SettingsManager.updateSettings(newSettings)
} catch (error) {
handleError(error)
}
}, [createVoiceClient, setupListeners, handleError])
const leave = useCallback(async (): Promise<void> => {
if (voiceClient) {
await voiceClient.disconnect()
setVoiceClient(null)
}
}, [voiceClient])
const toggleMicInput = useCallback(async (): Promise<void> => {
if (voiceClient) {
try {
let enableMic = !isMicEnabled
voiceClient.enableMic(enableMic)
setIsMicEnabled(enableMic)
} catch (e) {
handleError(e)
}
}
}, [voiceClient, isMicEnabled])
const toggleCamInput = useCallback(async (): Promise<void> => {
if (voiceClient) {
try {
let enableCam = !isCamEnabled
voiceClient.enableCam(enableCam)
setIsCamEnabled(enableCam)
} catch (e) {
handleError(e)
}
}
}, [voiceClient, isCamEnabled])
const startTimer = (expirationTime: number): void => {
const currentTime = Math.floor(Date.now() / 1000)
const leftTime = expirationTime - currentTime
setTimerCountDown(leftTime)
meetingTimer = setInterval(() => {
setTimerCountDown((prevCountDown) => {
return prevCountDown - 1
})
}, 1000)
}
const stopTimer = (): void => {
if (meetingTimer) {
clearInterval(meetingTimer)
meetingTimer = null
}
setTimerCountDown(0)
}
useEffect(() => {
return () => {
if (voiceClient) {
voiceClient.removeAllListeners() // Cleanup on unmount
}
}
}, [voiceClient])
const contextValue = useMemo(() => ({
voiceClient,
inCall,
currentState,
botReady,
isMicEnabled,
isCamEnabled,
localAudioLevel,
remoteAudioLevel,
videoTrack,
timerCountDown,
start,
leave,
toggleMicInput,
toggleCamInput
}), [voiceClient, inCall, currentState, botReady, isMicEnabled, isCamEnabled, localAudioLevel, remoteAudioLevel, videoTrack, timerCountDown, start, leave, toggleMicInput, toggleCamInput])
return (
<VoiceClientContext.Provider value={contextValue}>
{children}
</VoiceClientContext.Provider>
)
}
export const useVoiceClient = (): VoiceClientContextProps => {
const context = useContext(VoiceClientContext)
if (!context) {
throw new Error('useVoiceClient must be used within a VoiceClientProvider')
}
return context
}

View File

@@ -0,0 +1,22 @@
import { useEffect } from 'react';
import { useNavigation, NavigationProp } from '@react-navigation/native';
import { useVoiceClient } from '../context/VoiceClientContext';
export type RootStackParamList = {
Meeting: undefined;
Prejoin: undefined;
};
export const useVoiceClientNavigation = () => {
const navigation = useNavigation<NavigationProp<RootStackParamList>>();
const { inCall } = useVoiceClient();
useEffect(() => {
if (inCall) {
navigation.navigate('Meeting');
} else {
navigation.navigate('Prejoin');
}
}, [inCall, navigation]);
};

View File

@@ -0,0 +1,42 @@
import AsyncStorage from '@react-native-async-storage/async-storage';
export interface SettingsManager {
enableCam: boolean;
enableMic: boolean;
backendURL: string;
}
// Define the settings object
const defaultSettings: SettingsManager = {
enableCam: false,
enableMic: true,
backendURL: process.env.EXPO_SIMPLE_CHATBOT_SERVER || "",
};
export class SettingsManager {
private static preferencesKey = 'settingsPreference';
static async getSettings(): Promise<SettingsManager> {
try {
const data = await AsyncStorage.getItem(this.preferencesKey);
if (data !== null) {
return JSON.parse(data) as SettingsManager;
} else {
return defaultSettings;
}
} catch (error) {
console.error("Failed to load settings:", error);
return defaultSettings;
}
}
static async updateSettings(settings: SettingsManager): Promise<void> {
try {
const data = JSON.stringify(settings);
await AsyncStorage.setItem(this.preferencesKey, data);
} catch (error) {
console.error("Failed to save settings:", error);
}
}
}

View File

@@ -0,0 +1,7 @@
export const Images = {
dailyBot: require('../../assets/images/pipecat.png'),
};
export const Icons = {
vision: require('../../assets/icons/vision.png'),
};

View File

@@ -0,0 +1,27 @@
type ColorsType = {
white: string;
black: string;
backgroundCircle: string;
backgroundCircleNotConnected: string;
backgroundApp: string;
buttonsBorder: string;
micVolume: string;
timer: string;
disabledMic: string;
disabledVision: string;
};
const Colors: ColorsType = {
white: '#ffffff',
black: '#000000',
backgroundCircle: '#374151',
backgroundCircleNotConnected: '#D1D5DB',
backgroundApp: '#F9FAFB',
buttonsBorder: '#E5E7EB',
micVolume: '#86EFAC',
timer: '#E5E7EB',
disabledMic: '#ee6b6e',
disabledVision: '#BBF7D0',
};
export default Colors;

View File

@@ -0,0 +1,62 @@
import React from 'react';
import { TouchableOpacity, Text, StyleSheet, ViewStyle, TextStyle, GestureResponderEvent } from 'react-native';
import { MaterialIcons } from '@expo/vector-icons';
interface CustomButtonProps {
title: string;
onPress: (event: GestureResponderEvent) => void;
backgroundColor?: string; // Optional prop for background color
textColor?: string; // Optional prop for text color
style?: ViewStyle; // Optional additional styles for the button container
textStyle?: TextStyle; // Optional additional styles for the text
iconName?: string; // Optional prop for the icon name from MaterialIcons
iconPosition?: 'left' | 'right'; // Optional prop to control icon position
iconSize?: number; // Optional prop for icon size
iconColor?: string; // Optional prop for icon color
}
const CustomButton: React.FC<CustomButtonProps> = ({
title,
onPress,
backgroundColor = 'black',
textColor = 'white',
style,
textStyle,
iconName,
iconPosition = 'left',
iconSize = 24,
iconColor = 'white',
}) => {
return (
<TouchableOpacity
onPress={onPress}
style={[styles.button, { backgroundColor }, style]}>
{iconName && iconPosition === 'left' && (
<MaterialIcons name={iconName as keyof typeof MaterialIcons.glyphMap} size={iconSize} color={iconColor} style={styles.icon} />
)}
<Text style={[styles.text, { color: textColor }, textStyle]}>{title}</Text>
{iconName && iconPosition === 'right' && (
<MaterialIcons name={iconName as keyof typeof MaterialIcons.glyphMap} size={iconSize} color={iconColor} style={styles.icon} />
)}
</TouchableOpacity>
);
};
const styles = StyleSheet.create({
button: {
padding: 12,
borderRadius: 8,
alignItems: 'center',
justifyContent: 'center',
flexDirection: 'row', // Ensures icon and text are aligned in a row
},
text: {
fontSize: 16,
fontWeight: 'bold',
},
icon: {
marginHorizontal: 5, // Adds space between the icon and text
},
});
export default CustomButton;

View File

@@ -0,0 +1,139 @@
import {
View,
StyleSheet,
Text,
Image,
TouchableOpacity,
} from 'react-native';
import React from "react"
import { useVoiceClient } from '../context/VoiceClientContext';
import { Images } from '../theme/Assets';
import { MaterialIcons } from '@expo/vector-icons';
import WaveformView from '../components/WaveformView';
import MicrophoneView from '../components/MicrophoneView';
import { SafeAreaView } from 'react-native-safe-area-context';
import Colors from '../theme/Colors';
import CustomButton from '../theme/CustomButton';
const MeetingView: React.FC = () => {
const { leave, toggleMicInput, toggleCamInput, timerCountDown } = useVoiceClient();
const timerString = (count: number): string => {
const hours = Math.floor(count / 3600);
const minutes = Math.floor((count % 3600) / 60);
const seconds = count % 60;
return `${String(hours).padStart(2, '0')}:${String(minutes).padStart(2, '0')}:${String(seconds).padStart(2, '0')}`;
};
return (
<SafeAreaView style={styles.safeArea}>
<View style={styles.container}>
<View style={styles.header}>
<Image source={Images.dailyBot} style={styles.botImage} />
<View style={styles.timerContainer}>
<MaterialIcons name="timelapse" size={24} color="black" />
<Text style={styles.timerText}>{timerString(timerCountDown)}</Text>
</View>
</View>
<View style={styles.mainPanel}>
<WaveformView/>
<View style={styles.bottomControls}>
<TouchableOpacity onPress={toggleMicInput}>
<MicrophoneView
style={styles.microphone}
/>
</TouchableOpacity>
</View>
</View>
{/* Bottom Panel */}
<View style={styles.bottomPanel}>
<CustomButton
title="End"
iconName={"exit-to-app"}
onPress={leave}
backgroundColor={Colors.black}
/>
</View>
</View>
</SafeAreaView>
);
};
const styles = StyleSheet.create({
safeArea: {
flex: 1,
width: "100%",
backgroundColor: Colors.backgroundApp,
},
container: {
flex: 1,
padding: 20,
},
header: {
flexDirection: 'row',
alignItems: 'center',
justifyContent: 'space-between',
paddingBottom: 10,
},
botImage: {
width: 48,
height: 48,
},
timerContainer: {
flexDirection: 'row',
alignItems: 'center',
backgroundColor: Colors.timer,
padding: 10,
borderRadius: 12,
},
timerText: {
color: 'black',
fontWeight: '500',
fontSize: 18,
marginLeft: 5,
},
mainPanel: {
flex: 1,
justifyContent: 'center',
alignItems: 'center',
},
bottomControls: {
flexDirection: 'row',
justifyContent: 'center',
alignItems: 'center',
width: '100%',
paddingBottom: 20,
},
microphone: {
width: 160,
height: 160,
},
camera: {
width: 120,
height: 120,
},
bottomPanel: {
paddingVertical: 10,
},
endButton: {
flexDirection: 'row',
alignItems: 'center',
justifyContent: 'center',
backgroundColor: 'black',
borderRadius: 12,
padding: 10,
},
endText: {
marginLeft: 5,
color: 'white',
},
});
export default MeetingView;

View File

@@ -0,0 +1,82 @@
import {
View,
StyleSheet,
Text,
TextInput,
Image
} from "react-native"
import React, { useEffect, useState } from 'react';
import { useVoiceClient } from '../context/VoiceClientContext';
import Colors from '../theme/Colors';
import { Images } from '../theme/Assets';
import CustomButton from '../theme/CustomButton';
import { SettingsManager } from '../settings/SettingsManager';
const styles = StyleSheet.create({
container: {
flex: 1,
padding: 20,
backgroundColor: Colors.backgroundApp,
justifyContent: 'center',
alignItems: 'center',
},
image: {
width: 64,
height: 64,
marginBottom: 20,
},
header: {
fontSize: 18,
fontWeight: 'bold',
marginBottom: 20,
},
textInput: {
width: '100%',
padding: 10,
borderColor: Colors.buttonsBorder,
backgroundColor: Colors.white,
borderWidth: 1,
borderRadius: 5,
marginBottom: 10,
},
lastTextInput: {
marginBottom: 20,
},
});
const PreJoinView: React.FC = () => {
const { start } = useVoiceClient();
const [backendURL, setBackendURL] = useState<string>('')
useEffect(() => {
const loadSettings = async () => {
const loadedSettings = await SettingsManager.getSettings();
setBackendURL(loadedSettings.backendURL)
};
loadSettings();
}, []);
return (
<View style={styles.container}>
<Image source={Images.dailyBot} style={styles.image} />
<Text style={styles.header}>Connect to Pipecat.</Text>
<TextInput
placeholder="Server URL"
value={backendURL}
onChangeText={setBackendURL}
style={[styles.textInput, styles.lastTextInput]}
/>
<CustomButton
title="Connect"
onPress={() => start(backendURL)}
backgroundColor={Colors.backgroundCircle}
/>
</View>
)
};
export default PreJoinView;

Some files were not shown because too many files have changed in this diff Show More