Compare commits

..

18 Commits

Author SHA1 Message Date
Mark Backman
b28c2b9a58 Use new pipecat.runner.local for examples 2025-07-27 07:48:52 -04:00
Mark Backman
250dec2371 Updates to cloud examples: cloud-simple so it can be deployed and use Krisp 2025-07-26 22:19:05 -04:00
Mark Backman
49435cd115 Add typing support for session_args, fix debug logging in cloud.py 2025-07-26 21:56:06 -04:00
Mark Backman
65daac9853 Add RTVI to local-simple-bot.py 2025-07-26 10:35:07 -04:00
Mark Backman
26743f40a0 Add startup message for local WebRTC, remove WebRTC from cloud /connect 2025-07-26 10:15:32 -04:00
Mark Backman
bd91a31a40 Comment about starting a SmallWebRTCTransport client session 2025-07-26 00:11:36 -04:00
Mark Backman
242b277413 Fix the /connect endpoint's return value for Daily, WebRTC 2025-07-25 23:36:23 -04:00
Mark Backman
c71e97dd08 PCC deployment scripts 2025-07-25 22:33:49 -04:00
Mark Backman
c6e2bfe601 Ignore import warning in _get_bot_module 2025-07-25 22:25:31 -04:00
Mark Backman
f61bbd3d3d Console message for where to connect to the server 2025-07-25 22:22:01 -04:00
Mark Backman
c74fede862 Remove livekit from cloud.py, use localhost for webrtc 2025-07-25 22:14:16 -04:00
Mark Backman
ff7e0ff27d Rename examples 2025-07-25 21:29:46 -04:00
Mark Backman
7253077bb7 Final cleanup 2025-07-25 21:25:01 -04:00
Mark Backman
df211fb5a3 Renaming files 2025-07-25 20:23:55 -04:00
Mark Backman
ae6698429f Add quickstart examples 2025-07-25 18:04:40 -04:00
Mark Backman
b7e415a659 Checkpoint: local working, server: daily,webrtc working 2025-07-25 17:39:04 -04:00
Mark Backman
2cdc42adf6 Lazy load imports to avoid unnecessary dependencies when running 2025-07-25 11:47:17 -04:00
Mark Backman
988d986e22 Add new module, add telnyx, plivo, livekit runners 2025-07-25 11:30:12 -04:00
1041 changed files with 105995 additions and 16292 deletions

60
.github/workflows/android.yaml vendored Normal file
View File

@@ -0,0 +1,60 @@
name: android
on:
push:
branches:
- main
paths:
- "examples/simple-chatbot/client/android/**"
- "examples/p2p-webrtc/video-transform/client/android/**"
pull_request:
branches:
- "**"
paths:
- "examples/simple-chatbot/client/android/**"
- "examples/p2p-webrtc/video-transform/client/android/**"
workflow_dispatch:
inputs:
sdk_git_ref:
type: string
description: "Which git ref of the app to build"
concurrency:
group: build-android-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
sdk:
name: "Demo apps"
runs-on: ubuntu-latest
steps:
- name: Checkout repo
uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.sdk_git_ref || github.ref }}
- name: "Install Java"
uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: '17'
- name: "Example app: Simple Chatbot"
working-directory: examples/simple-chatbot/client/android
run: ./gradlew :simple-chatbot-client:assembleDebug
- name: Upload Simple Chatbot APK
uses: actions/upload-artifact@v4
with:
name: Simple Chatbot Android Client
path: examples/simple-chatbot/client/android/simple-chatbot-client/build/outputs/apk/debug/simple-chatbot-client-debug.apk
- name: "Example app: Small WebRTC Client"
working-directory: examples/p2p-webrtc/video-transform/client/android
run: ./gradlew :small-webrtc-client:assembleDebug
- name: Upload Small WebRTC APK
uses: actions/upload-artifact@v4
with:
name: Small WebRTC Android Client
path: examples/p2p-webrtc/video-transform/client/android/small-webrtc-client/build/outputs/apk/debug/small-webrtc-client-debug.apk

View File

@@ -21,20 +21,24 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
version: "latest"
- name: Set up Python
run: uv python install 3.10
- name: Install development dependencies
run: uv sync --group dev
id: setup_python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Setup virtual environment
run: |
python -m venv .venv
- name: Install basic Python dependencies
run: |
source .venv/bin/activate
python -m pip install --upgrade pip
pip install -r dev-requirements.txt
- name: Build project
run: uv build
- name: Install project in editable mode
run: uv pip install --editable .
run: |
source .venv/bin/activate
python -m build
- name: Install project and other Python dependencies
run: |
source .venv/bin/activate
pip install --editable .

View File

@@ -18,28 +18,35 @@ jobs:
steps:
- name: Checkout repo
uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
version: "latest"
- name: Set up Python
run: uv python install 3.10
id: setup_python
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Cache virtual environment
uses: actions/cache@v3
with:
# We are hashing dev-requirements.txt and test-requirements.txt which
# contain all dependencies needed to run the tests.
key: venv-${{ runner.os }}-${{ steps.setup_python.outputs.python-version}}-${{ hashFiles('dev-requirements.txt') }}-${{ hashFiles('test-requirements.txt') }}
path: .venv
- name: Install system packages
id: install_system_packages
run: |
sudo apt-get install -y portaudio19-dev
- name: Install dependencies
- name: Setup virtual environment
run: |
uv sync --group dev --extra anthropic --extra aws --extra google --extra langchain
python -m venv .venv
- name: Install basic Python dependencies
run: |
source .venv/bin/activate
python -m pip install --upgrade pip
pip install -r dev-requirements.txt -r test-requirements.txt
- name: Run tests with coverage
run: |
uv run coverage run
uv run coverage xml
source .venv/bin/activate
coverage run
coverage xml
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v5
with:

View File

@@ -22,22 +22,25 @@ jobs:
steps:
- name: Checkout repo
uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
version: "latest"
- name: Set up Python
run: uv python install 3.10
- name: Install development dependencies
run: uv sync --group dev
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Setup virtual environment
run: |
python -m venv .venv
- name: Install development Python dependencies
run: |
source .venv/bin/activate
python -m pip install --upgrade pip
pip install -r dev-requirements.txt
- name: Ruff formatter
id: ruff-format
run: uv run ruff format --diff
run: |
source .venv/bin/activate
ruff format --diff
- name: Ruff linter (all rules)
id: ruff-check
run: uv run ruff check
run: |
source .venv/bin/activate
ruff check

View File

@@ -17,17 +17,23 @@ jobs:
uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.gitref }}
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
version: "latest"
- name: Set up Python
run: uv python install 3.10
- name: Install development dependencies
run: uv sync --group dev
id: setup_python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Setup virtual environment
run: |
python -m venv .venv
- name: Install basic Python dependencies
run: |
source .venv/bin/activate
python -m pip install --upgrade pip
pip install -r dev-requirements.txt
- name: Build project
run: uv build
run: |
source .venv/bin/activate
python -m build
- name: Upload wheels
uses: actions/upload-artifact@v4
with:

View File

@@ -12,16 +12,23 @@ jobs:
with:
fetch-tags: true
fetch-depth: 100
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
version: "latest"
- name: Set up Python
run: uv python install 3.10
- name: Install development dependencies
run: uv sync --group dev
id: setup_python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Setup virtual environment
run: |
python -m venv .venv
- name: Install basic Python dependencies
run: |
source .venv/bin/activate
python -m pip install --upgrade pip
pip install -r dev-requirements.txt
- name: Build project
run: uv build
run: |
source .venv/bin/activate
python -m build
- name: Upload wheels
uses: actions/upload-artifact@v4
with:
@@ -31,7 +38,7 @@ jobs:
publish-to-test-pypi:
name: "Publish to Test PyPI"
runs-on: ubuntu-latest
needs: [build]
needs: [ build ]
environment:
name: testpypi
url: https://pypi.org/p/pipecat-ai

View File

@@ -1,123 +0,0 @@
name: Python Compatibility Test
on:
push:
branches: [main, develop]
paths: ['pyproject.toml']
pull_request:
branches: [main, develop]
paths: ['pyproject.toml']
jobs:
test-dev-environment:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ['3.10.18', '3.11.13', '3.12.11', '3.13.5']
name: Dev Environment - Python ${{ matrix.python-version }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y \
portaudio19-dev \
libcairo2-dev \
libgirepository1.0-dev \
pkg-config
- name: Install uv
uses: astral-sh/setup-uv@v4
with:
version: 'latest'
- name: Set up Python ${{ matrix.python-version }}
run: |
uv python install ${{ matrix.python-version }}
uv python pin ${{ matrix.python-version }}
- name: Test uv sync with all extras (Python < 3.13)
if: "!startsWith(matrix.python-version, '3.13.')"
run: |
uv sync --group dev --all-extras --no-extra krisp
- name: Test uv sync without PyTorch extras (Python 3.13+)
if: startsWith(matrix.python-version, '3.13.')
run: |
uv sync --group dev --all-extras \
--no-extra krisp \
--no-extra ultravox \
--no-extra local-smart-turn \
--no-extra moondream \
--no-extra mlx-whisper
- name: Verify dev installation
run: |
uv run python --version
uv run python -c "import pipecat; print('✅ Dev environment - Pipecat imports successfully')"
test-user-experience:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ['3.10.18', '3.11.13', '3.12.11', '3.13.5']
name: User Experience - Python ${{ matrix.python-version }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y \
portaudio19-dev \
libcairo2-dev \
libgirepository1.0-dev \
pkg-config
- name: Install uv
uses: astral-sh/setup-uv@v4
with:
version: 'latest'
- name: Set up Python ${{ matrix.python-version }}
run: |
uv python install ${{ matrix.python-version }}
- name: Build local package
run: |
uv build
- name: Create test project
run: |
mkdir test-project
cd test-project
uv init --python ${{ matrix.python-version }}
- name: Test comprehensive extras with uv add (Python 3.10-3.12)
if: "!startsWith(matrix.python-version, '3.13.')"
run: |
cd test-project
# Use uv add with built wheel to leverage dependency management
uv add "../dist/pipecat_ai-"*".whl[anthropic,assemblyai,asyncai,aws,aws-nova-sonic,azure,cartesia,cerebras,deepseek,daily,deepgram,elevenlabs,fal,fireworks,fish,gladia,google,grok,groq,gstreamer,heygen,inworld,koala,langchain,livekit,lmnt,local,mcp,mem0,mlx-whisper,moondream,nim,neuphonic,noisereduce,openai,openpipe,openrouter,perplexity,playht,qwen,rime,riva,runner,sambanova,sentry,local-smart-turn,remote-smart-turn,silero,simli,soniox,soundfile,speechmatics,tavus,together,tracing,ultravox,webrtc,websocket,whisper]"
- name: Test Python 3.13 compatible extras with uv add
if: startsWith(matrix.python-version, '3.13.')
run: |
cd test-project
# Use uv add with built wheel and Python 3.13 compatible extras
uv add "../dist/pipecat_ai-"*".whl[anthropic,assemblyai,asyncai,aws,aws-nova-sonic,azure,cartesia,cerebras,deepseek,daily,deepgram,elevenlabs,fal,fireworks,fish,gladia,google,grok,groq,gstreamer,heygen,inworld,koala,langchain,livekit,lmnt,local,mcp,mem0,nim,neuphonic,noisereduce,openai,openpipe,openrouter,perplexity,playht,qwen,rime,riva,runner,sambanova,sentry,remote-smart-turn,silero,simli,soniox,soundfile,speechmatics,tavus,together,tracing,webrtc,websocket,whisper]"
- name: Verify user installation
run: |
cd test-project
uv run python --version
uv run python -c "import pipecat; print('✅ User experience - Pipecat imports successfully')"
# Test that basic functionality works
uv run python -c "from pipecat.pipeline.pipeline import Pipeline; print('✅ Pipeline import works')"

View File

@@ -1,56 +0,0 @@
name: Sync Quickstart to pipecat-quickstart repo
on:
push:
branches: [main]
paths:
- 'examples/quickstart/**'
workflow_dispatch: # Manual trigger
jobs:
sync-quickstart:
runs-on: ubuntu-latest
steps:
- name: Checkout main repo
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Checkout quickstart repo
uses: actions/checkout@v4
with:
repository: pipecat-ai/pipecat-quickstart
token: ${{ secrets.QUICKSTART_SYNC_TOKEN }}
path: quickstart-repo
- name: Sync files (excluding READMEs)
run: |
# Copy code files only, skip READMEs
cp examples/quickstart/bot.py quickstart-repo/
cp examples/quickstart/requirements.txt quickstart-repo/
cp examples/quickstart/env.example quickstart-repo/
# Copy any other files that aren't README.md
find examples/quickstart -type f \
-not -name "README.md" \
-not -name "*.md" \
-exec cp {} quickstart-repo/ \;
- name: Commit and push changes
run: |
cd quickstart-repo
git config user.name "GitHub Action"
git config user.email "action@github.com"
git add .
# Only commit if there are changes
if ! git diff --staged --quiet; then
git commit -m "Sync from pipecat main repo
Updated files from examples/quickstart/
Commit: ${{ github.sha }}
"
git push
else
echo "No changes to sync"
fi

View File

@@ -22,23 +22,31 @@ jobs:
steps:
- name: Checkout repo
uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
version: "latest"
- name: Set up Python
run: uv python install 3.10
id: setup_python
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Cache virtual environment
uses: actions/cache@v3
with:
# We are hashing dev-requirements.txt and test-requirements.txt which
# contain all dependencies needed to run the tests.
key: venv-${{ runner.os }}-${{ steps.setup_python.outputs.python-version}}-${{ hashFiles('dev-requirements.txt') }}-${{ hashFiles('test-requirements.txt') }}
path: .venv
- name: Install system packages
id: install_system_packages
run: |
sudo apt-get install -y portaudio19-dev
- name: Install dependencies
- name: Setup virtual environment
run: |
uv sync --group dev --extra anthropic --extra aws --extra google --extra langchain
python -m venv .venv
- name: Install basic Python dependencies
run: |
source .venv/bin/activate
python -m pip install --upgrade pip
pip install -r dev-requirements.txt -r test-requirements.txt
- name: Test with pytest
run: |
uv run pytest
source .venv/bin/activate
pytest

View File

@@ -1,42 +0,0 @@
name: Update lockfile
on:
push:
paths:
- 'pyproject.toml'
branches:
- main
workflow_dispatch: # Allows manual triggering from GitHub UI
jobs:
update-lockfile:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
# This gives the workflow permission to push back to the repo
token: ${{ secrets.GITHUB_TOKEN }}
- name: Install uv
uses: astral-sh/setup-uv@v1
- name: Update lockfile
run: uv lock
- name: Check for changes
id: verify-changed-files
run: |
if [ -n "$(git status --porcelain)" ]; then
echo "changed=true" >> $GITHUB_OUTPUT
else
echo "changed=false" >> $GITHUB_OUTPUT
fi
- name: Commit lockfile
if: steps.verify-changed-files.outputs.changed == 'true'
run: |
git config --local user.email "action@github.com"
git config --local user.name "GitHub Action"
git add uv.lock
git commit -m "chore: update uv.lock after dependency changes"
git push

7
.gitignore vendored
View File

@@ -31,6 +31,8 @@ MANIFEST
fly.toml
# Examples
examples/telnyx-chatbot/templates/streams.xml
examples/twilio-chatbot/templates/streams.xml
examples/**/node_modules/
examples/**/.expo/
examples/**/dist/
@@ -48,7 +50,4 @@ examples/**/web-build/
# Documentation
docs/api/_build/
docs/api/api
# uv
.python-version
docs/api/api

View File

@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.12.1
rev: v0.9.7
hooks:
- id: ruff
language_version: python3

View File

@@ -9,14 +9,22 @@ build:
- python3-dev
- libasound2-dev
jobs:
post_install:
- pip install uv
- UV_PROJECT_ENVIRONMENT=$READTHEDOCS_VIRTUALENV_PATH uv sync --group docs --all-extras --no-extra krisp --no-extra gstreamer --no-extra ultravox --no-extra local_smart_turn --no-extra moondream --no-extra riva --no-extra mlx-whisper
pre_build:
- python -m pip install --upgrade pip
- pip install wheel setuptools
post_build:
- echo "Build completed"
sphinx:
configuration: docs/api/conf.py
fail_on_warning: false
python:
install:
- requirements: docs/api/requirements.txt
- method: pip
path: .
search:
ranking:
api/*: 5

View File

@@ -5,220 +5,13 @@ All notable changes to **Pipecat** will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## Unreleased
### Changed
- Updated `pyproject.toml` to once again pin `numba` to `>=0.61.2` in order to
resolve package versioning issues.
### Other
- Updated `15-switch-voices.py` and `15a-switch-languages.py` examples to show
how to enclose complex logic (e.g. `ParallelPipeline`) into a single processor
so the main pipeline becomes simpler.
## [0.0.79] - 2025-08-07
### Changed
- Changed `pipecat-ai`'s `openai` dependency to `>=1.74.0,<=1.99.1` due to a
breaking change in `openai` 1.99.2 ([commit](https://github.com/openai/openai-python/commit/657f551dbe583ffb259d987dafae12c6211fba06))
### Deprecated
- `TTSService.say()` is deprecated, push a `TTSSpeakFrame` instead. Calling
functions directly is a discouraged pattern in Pipecat because, for example,
it might cause issues with frame ordering.
- `LLMMessagesFrame` is deprecated, in favor of either:
- `LLMMessagesUpdateFrame` with `run_llm=True`
- `OpenAILLMContextFrame` with desired messages in a new context
- `LLMUserResponseAggregator` and `LLMAssistantResponseAggregator` are
deprecated, as they depended on the now-deprecated `LLMMessagesFrame`. Use
`LLMUserContextAggregator` and `LLMAssistantResponseAggregator` (or
LLM-specific subclasses thereof) instead.
## [0.0.78] - 2025-08-07
## [Unreleased]
### Added
- Added `enable_emulated_vad_interruptions` to `LLMUserAggregatorParams`.
When user speech is emulated (e.g. when a transcription is received but
VAD doesn't detect speech), this parameter controls whether the emulated
speech can interrupt the bot. Default is False (emulated speech is ignored
while the bot is speaking).
- Added new `handle_sigint` and `handle_sigterm` to `RunnerArguments`. This
allows applications to know what settings they should use for the environment
they are running on. Also, added `pipeline_idle_timeout_secs` to be able to
control the `PipelineTask` idle timeout.
- Added `processor` field to `ErrorFrame` to indicate `FrameProcessor` that
generated the error.
- Added new language support for `AWSTranscribeSTTService`. All languages
supporting streaming data input are now supported:
https://docs.aws.amazon.com/transcribe/latest/dg/supported-languages.html
- Added support for Simli Trinity Avatars. A new `is_trinity_avatar` parameter
has been introduced to specify whether the provided `faceId` corresponds to a
Trinity avatar, which is required for optimal Trinity avatar performance.
- The development runner how handles custom `body` data for `DailyTransport`.
The `body` data is passed to the Pipecat client. You can POST to the `/start`
endpoint with a request body of:
```
{
"createDailyRoom": true,
"dailyRoomProperties": { "start_video_off": true },
"body": { "custom_data": "value" }
}
```
The `body` information is parsed and used in the application. The
`dailyRoomProperties` are currently not handled.
- Added detailed latency logging to `UserBotLatencyLogObserver`, capturing
average response time between user stop and bot start, as well as minimum and
maximum response latency.
- Added Chinese, Japanese, Korean word timestamp support to
`CartesiaTTSService`.
- Added `region` parameter to `GladiaSTTService`. Accepted values: eu-west
(default), us-west.
### Changed
- System frames are now queued. Before, system frames could be generated from
any task and would not guarantee any order which was causing undesired
behavior. Also, it was possible to get into some rare recursion issues because
of the way system frames were executed (they were executed in-place, meaning
calling `push_frame()` would finish after the system frame traversed all the
pipeline). This makes system frames more deterministic.
- Changed the default model for both `ElevenLabsTTSService` and
`ElevenLabsHttpTTSService` to `eleven_turbo_v2_5`. The rationale for this
change is that the Turbo v2.5 model exhibits the most stable voice quality
along with very low latency TTFB; latencies are on par with the Flash v2.5
model. Also, the Turbo v2.5 model outputs word/timestamp alignment data with
correct spacing.
- The development runners `/connect` and `/start` endpoint now both return
`dailyRoom` and `dailyToken` in place of the previous `room_url` and `token`.
- Updated the `pipecat.runner.daily` utility to only a take `DAILY_API_URL` and
`DAILY_SAMPLE_ROOM_URL` environment variables instead of argparsing `-u` and
`-k`, respectively.
- Updated `daily-python` to 0.19.6.
- Changed `TavusVideoService` to send audio or video frames only after the
transport is ready, preventing warning messages at startup.
- The development runner now strips any provided protocol (e.g. https://) from
the proxy address and issues a warning. It also strips trailing `/`.
### Deprecated
- In the `pipecat.runner.daily`, the `configure_with_args()` function is
deprecated. Use the `configure()` function instead.
- The development runner's `/connect` endpoint is deprecated and will be
removed in a future version. Use the `/start` endpoint in its place. In the
meantime, both endpoints work and deliver equivalent functionality.
### Fixed
- Fixed a `DailyTransport` issue that would result in an unhandled
`concurrent.futures.CancelledError` when a future is cancelled.
- Fixed a `RivaSTTService` issue that would result in an unhandled
`concurrent.futures.CancelledError` when a future is cancelled when reading
from the audio chunks from the incoming audio stream.
- Fixed an issue in the `BaseOutputTransport`, mainly reproducible with
`FastAPIWebsocketOutputTransport` when the audio mixer was enabled, where the
loop could consume 100% CPU by continuously returning without delay, preventing
other asyncio tasks (such as cancellation or shutdown signals) from being
processed.
- Fixed an issue where `BotStartedSpeakingFrame` and `BotStoppedSpeakingFrame`
were not emitted when using `TavusVideoService` or `HeyGenVideoService`.
- Fixed an issue in `LiveKitTransport` where empty `AudioRawFrame`s were pushed
down the pipeline. This resulted in warnings by the STT processor.
- Fixed `PiperTTSService` to send text as a JSON object in the request body,
resolving compatibility with Piper's HTTP API.
- Fixed an issue with the `TavusVideoService` where an error was thrown due to
missing transcription callbacks.
- Fixed an issue in `SpeechmaticsSTTService` where the `user_id` was set to
`None` when diarization is not enabled.
### Performance
- Fixed an issue in `TaskObserver` (a proxy to all observers) that was degrading
global performance.
### Other
- Added `07aa-interruptible-soniox.py`, `07ab-interruptible-inworld-http.py`,
`07ac-interruptible-asyncai.py` and `07ac-interruptible-asyncai-http.py`
release evals.
## [0.0.77] - 2025-07-31
### Added
- Added `InputTextRawFrame` frame type to handle user text input with Gemini
Multimodal Live.
- Added `HeyGenVideoService`. This is an integration for HeyGen Interactive
Avatar. A video service that handles audio streaming and requests HeyGen to
generate avatar video responses. (see https://www.heygen.com/)
- Added the ability to switch voices to `RimeTTSService`.
- Added unified development runner for building voice AI bots across multiple
transports
- `pipecat.runner.run` FastAPI-based development server with automatic bot
discovery
- `pipecat.runner.types` Runner session argument types
(`DailyRunnerArguments`, `SmallWebRTCRunnerArguments`,
`WebSocketRunnerArguments`)
- `pipecat.runner.utils.create_transport()` Factory function for creating
transports from session arguments
- `pipecat.runner.daily` and `pipecat.runner.livekit` Configuration
utilities for Daily and LiveKit setups
- Support for all transport types: Daily, WebRTC, Twilio, Telnyx, Plivo
- Automatic telephony provider detection and serializer configuration
- ESP32 WebRTC compatibility with SDP munging
- Environment detection (`ENV=local`) for conditional features
- Added Async.ai TTS integration (https://async.ai/)
- `AsyncAITTSService` WebSocket-based streaming TTS with interruption
support
- `AsyncAIHttpTTSService` HTTP-based streaming TTS service
- Example scripts:
- `examples/foundational/07ac-interruptible-asyncai.py` (WebSocket demo)
- `examples/foundational/07ac-interruptible-asyncai-http.py` (HTTP demo)
- Added `transcription_bucket` params support to the `DailyRESTHelper`.
- Added a new TTS service, `InworldTTSService`. This service provides
low-latency, high-quality speech generation using Inworld's streaming API.
- Added a new field `handle_sigterm` to `PipelineRunner`. It defaults to
`False`. This field handles SIGTERM signals. The `handle_sigint` field still
defaults to `True`, but now it handles only SIGINT signals.
- Added a new field `handle_sigterm` to `PipelineRunner`. It defaults to `False`.
This field handles SIGTERM signals. The `handle_sigint` field still defaults
to `True`, but now it handles only SIGINT signals.
- Added foundational example `14u-function-calling-ollama.py` for Ollama
function calling.
@@ -229,21 +22,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Added `set_log_level` to `DailyTransport`, allowing setting the logging level
for Daily's internal logging system.
- Added `on_transcription_stopped` and `on_transcription_error` to Daily
callbacks.
### Changed
- Changed the default `url` for `NeuphonicTTSService` to
`wss://api.neuphonic.com` as it provides better global performance. You can
set the URL to other URLs, such as the previous default:
`wss://eu-west-1.api.neuphonic.com`.
- Update `daily-python` to 0.19.5.
- `STTMuteFilter` now pushes the `STTMuteFrame` upstream and downstream, to
allow for more flexible `STTMuteFilter` placement.
- Play delayed messages from `ElevenLabsTTSService` if they still belong to the
current context.
@@ -289,39 +69,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- For `LmntTTSService`, changed the default `model` to `blizzard`, LMNT's
recommended model.
- Updated `SpeechmaticsSTTService`:
- Added support for additional diarization options.
- Added foundational example `07a-interruptible-speechmatics-vad.py`, which
uses VAD detection provided by `SpeechmaticsSTTService`.
### Fixed
- Fixed a `LLMUserResponseAggregator` issue where interruptions were not being
handled properly.
- Fixed `PiperTTSService` to work with newer Piper GPL.
- Fixed a race condition in `FastAPIWebsocketClient` that occurred when
attempting to send a message while the client was disconnecting.
- Fixed an issue in `GoogleLLMService` where interruptions did not work when an
interruption strategy was used.
- Fixed an issue in the `TranscriptProcessor` where newline characters could
cause the transcript output to be corrupted (e.g. missing all spaces).
- Fixed an issue in `AudioBufferProcessor` when using `SmallWebRTCTransport`
where, if the microphone was muted, track timing was not respected.
- Fixed an error that occurs when pushing an `LLMMessagesFrame`. Only some LLM
services, like Grok, are impacted by this issue. The fix is to remove the
optional `name` property that was being added to the message.
- Fixed an issue in `AudioBufferProcessor` that caused garbled audio when
`enable_turn_audio` was enabled and audio resampling was required.
- Fixed a dependency issue for uv users where an `llvmlite` version required
python 3.9.
- Fixed a dependency issue for uv users where an `llvmlite` version required python 3.9.
- Fixed an issue in `MiniMaxHttpTTSService` where the `pitch` param was the
incorrect type.
@@ -335,28 +88,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Fixed an issue in `ElevenLabsTTSService` where the word/timestamp pairs were
calculating word boundaries incorrectly.
- Fixed an issue where, in some edge cases, the
`EmulateUserStartedSpeakingFrame` could be created even if we didn't have a
transcription.
- Fixed an issue where, in some edge cases, the `EmulateUserStartedSpeakingFrame`
could be created even if we didn't have a transcription.
- Fixed an issue in `GoogleLLMContext` where it would inject the
`system_message` as a "user" message into cases where it was not meant to;
it was only meant to do that when there were no "regular" (non-function-call)
messages in the context, to ensure that inference would run properly.
- Fixed an issue in `LiveKitTransport` where the `on_audio_track_subscribed` was
never emitted.
### Other
- Added new quickstart demos:
- examples/quickstart: voice AI bot quickstart
- examples/client-server-web: client/server starter example
- examples/phone-bot-twilio: twilio starter example
- Removed most of the examples from the pipecat repo. Examples can now be
found in: https://github.com/pipecat-ai/pipecat-examples.
- Fixed an issue in `LiveKitTransport` where the `on_audio_track_subscribed` was never emitted.
## [0.0.76] - 2025-07-11
@@ -399,12 +139,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
user started early, while the bot was still working through
`trigger_assistant_response()`.
## [0.0.75] - 2025-07-08 [YANKED]
**This release has been yanked due to resampling issues affecting audio output
quality and critical bugs impacting `ParallelPipelines` functionality.**
**Please upgrade to version 0.0.76 or later.**
## [0.0.75] - 2025-07-08
### Added
@@ -465,12 +200,7 @@ quality and critical bugs impacting `ParallelPipelines` functionality.**
- Remove unncessary push task in each `FrameProcessor`.
## [0.0.74] - 2025-07-03 [YANKED]
**This release has been yanked due to resampling issues affecting audio output
quality and critical bugs impacting `ParallelPipelines` functionality.**
**Please upgrade to version 0.0.76 or later.**
## [0.0.74] - 2025-07-03
### Added

40
Dockerfile Normal file
View File

@@ -0,0 +1,40 @@
# setup
FROM python:3.11.5
WORKDIR /app
COPY requirements.txt /app
COPY *.py /app
COPY pyproject.toml /app
COPY src/ /app/src/
COPY examples/ /app/examples/
WORKDIR /app
RUN ls --recursive /app/
RUN pip3 install --upgrade -r requirements.txt
RUN python -m build .
RUN pip3 install .
RUN pip3 install gunicorn
# If running on Ubuntu, Azure TTS requires some extra config
# https://learn.microsoft.com/en-us/azure/ai-services/speech-service/quickstarts/setup-platform?pivots=programming-language-python&tabs=linux%2Cubuntu%2Cdotnetcli%2Cdotnet%2Cjre%2Cmaven%2Cnodejs%2Cmac%2Cpypi
RUN wget -O - https://www.openssl.org/source/openssl-1.1.1w.tar.gz | tar zxf -
WORKDIR openssl-1.1.1w
RUN ./config --prefix=/usr/local
RUN make -j $(nproc)
RUN make install_sw install_ssldirs
RUN ldconfig -v
ENV SSL_CERT_DIR=/etc/ssl/certs
#ENV LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
RUN apt clean
RUN apt-get update
RUN apt-get -y install build-essential libssl-dev ca-certificates libasound2 wget
ENV PYTHONUNBUFFERED=1
WORKDIR /app
EXPOSE 8000
# run
CMD ["gunicorn", "--workers=2", "--log-level", "debug", "--chdir", "examples/server", "--capture-output", "daily-bot-manager:app", "--bind=0.0.0.0:8000"]

149
README.md
View File

@@ -8,7 +8,7 @@
**Pipecat** is an open-source Python framework for building real-time voice and multimodal conversational agents. Orchestrate audio and video, AI services, different transports, and conversation pipelines effortlessly—so you can focus on what makes your agent unique.
> Want to dive right in? Try the [quickstart](https://docs.pipecat.ai/getting-started/quickstart).
> Want to dive right in? [Install Pipecat](https://docs.pipecat.ai/getting-started/installation) then try the [quickstart](https://docs.pipecat.ai/getting-started/quickstart).
## 🚀 What You Can Build
@@ -31,11 +31,11 @@
## 🎬 See it in action
<p float="left">
<a href="https://github.com/pipecat-ai/pipecat-examples/tree/main/simple-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat-examples/main/simple-chatbot/image.png" width="400" /></a>&nbsp;
<a href="https://github.com/pipecat-ai/pipecat-examples/tree/main/storytelling-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat-examples/main/storytelling-chatbot/image.png" width="400" /></a>
<a href="https://github.com/pipecat-ai/pipecat/tree/main/examples/simple-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/examples/simple-chatbot/image.png" width="400" /></a>&nbsp;
<a href="https://github.com/pipecat-ai/pipecat/tree/main/examples/storytelling-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/examples/storytelling-chatbot/image.png" width="400" /></a>
<br/>
<a href="https://github.com/pipecat-ai/pipecat-examples/tree/main/translation-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat-examples/main/translation-chatbot/image.png" width="400" /></a>&nbsp;
<a href="https://github.com/pipecat-ai/pipecat-examples/tree/main/moondream-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat-examples/main/moondream-chatbot/image.png" width="400" /></a>
<a href="https://github.com/pipecat-ai/pipecat/tree/main/examples/translation-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/examples/translation-chatbot/image.png" width="400" /></a>&nbsp;
<a href="https://github.com/pipecat-ai/pipecat/tree/main/examples/moondream-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/examples/moondream-chatbot/image.png" width="400" /></a>
</p>
## 📱 Client SDKs
@@ -51,123 +51,98 @@ You can connect to Pipecat from any platform using our official SDKs:
## 🧩 Available services
| Category | Services |
| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Ultravox](https://docs.pipecat.ai/server/services/stt/ultravox), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova) [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
| Text-to-Speech | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) |
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local |
| Serializers | [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx) |
| Video | [HeyGen](https://docs.pipecat.ai/server/services/video/heygen), [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/fal), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) |
| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
| Category | Services |
| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Parakeet (NVIDIA)](https://docs.pipecat.ai/server/services/stt/parakeet), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Ultravox](https://docs.pipecat.ai/server/services/stt/ultravox), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova) [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
| Text-to-Speech | [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [FastPitch (NVIDIA)](https://docs.pipecat.ai/server/services/tts/fastpitch), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) |
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local |
| Serializers | [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx) |
| Video | [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/fal), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) |
| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
📚 [View full services documentation →](https://docs.pipecat.ai/server/services/supported-services)
## ⚡ Getting started
You can get started with Pipecat running on your local machine, then move your agent processes to the cloud when you're ready.
You can get started with Pipecat running on your local machine, then move your agent processes to the cloud when youre ready.
1. Install uv
```shell
# Install the module
pip install pipecat-ai
```bash
curl -LsSf https://astral.sh/uv/install.sh | sh
```
# Set up your environment
cp dot-env.template .env
```
> **Need help?** Refer to the [uv install documentation](https://docs.astral.sh/uv/getting-started/installation/).
To keep things lightweight, only the core framework is included by default. If you need support for third-party AI services, you can add the necessary dependencies with:
2. Install the module
```bash
# For new projects
uv init my-pipecat-app
cd my-pipecat-app
uv add pipecat-ai
# Or for existing projects
uv add pipecat-ai
```
3. Set up your environment
```bash
cp env.example .env
```
4. To keep things lightweight, only the core framework is included by default. If you need support for third-party AI services, you can add the necessary dependencies with:
```bash
uv add "pipecat-ai[option,...]"
```
> **Using pip?** You can still use `pip install pipecat-ai` and `pip install "pipecat-ai[option,...]"` to get set up.
```shell
pip install "pipecat-ai[option,...]"
```
## 🧪 Code examples
- [Foundational](https://github.com/pipecat-ai/pipecat/tree/main/examples/foundational) — small snippets that build on each other, introducing one or two concepts at a time
- [Example apps](https://github.com/pipecat-ai/pipecat-examples) — complete applications that you can use as starting points for development
- [Example apps](https://github.com/pipecat-ai/pipecat/tree/main/examples/) — complete applications that you can use as starting points for development
## 🛠️ Contributing to the framework
## 🛠️ Hacking on the framework itself
### Prerequisites
1. Set up a virtual environment before following these instructions. From the root of the repo:
**Python Version:** 3.10+
### Setup Steps
1. Clone the repository and navigate to it:
```bash
git clone https://github.com/pipecat-ai/pipecat.git
cd pipecat
```shell
python3 -m venv venv
source venv/bin/activate
```
2. Install development and testing dependencies:
2. Install the development dependencies:
```bash
uv sync --group dev --all-extras --no-extra gstreamer --no-extra krisp --no-extra local
```shell
pip install -r dev-requirements.txt
```
3. Install the git pre-commit hooks:
3. Install the git pre-commit hooks (these help ensure your code follows project rules):
```bash
uv run pre-commit install
```shell
pre-commit install
```
### Python 3.13+ Compatibility
4. Install the `pipecat-ai` package locally in editable mode:
Some features require PyTorch, which doesn't yet support Python 3.13+. Install using:
```shell
pip install -e .
```
```bash
uv sync --group dev --all-extras \
--no-extra gstreamer \
--no-extra krisp \
--no-extra local \
--no-extra local-smart-turn \
--no-extra mlx-whisper \
--no-extra moondream \
--no-extra ultravox
```
> The `-e` or `--editable` option allows you to modify the code without reinstalling.
> **Tip:** For full compatibility, use Python 3.12: `uv python pin 3.12`
5. Include optional dependencies as needed. For example:
> **Note**: Some extras (local, gstreamer) require system dependencies. See documentation if you encounter build errors.
```shell
pip install -e ".[daily,deepgram,cartesia,openai,silero]"
```
6. (Optional) If you want to use this package from another directory:
```shell
pip install "path_to_this_repo[option,...]"
```
### Running tests
To run all tests, from the root directory:
Install the test dependencies:
```bash
uv run pytest
```shell
pip install -r test-requirements.txt
```
Run a specific test suite:
From the root directory, run:
```bash
uv run pytest tests/test_name.py
```shell
pytest
```
### Setting up your editor

13
dev-requirements.txt Normal file
View File

@@ -0,0 +1,13 @@
build~=1.2.2
coverage~=7.9.1
grpcio-tools~=1.67.1
pip-tools~=7.4.1
pre-commit~=4.2.0
pyright~=1.1.402
pytest~=8.4.1
pytest-asyncio~=1.0.0
pytest-aiohttp==1.1.0
ruff~=0.12.1
setuptools~=78.1.1
setuptools_scm~=8.3.1
python-dotenv~=1.1.1

View File

@@ -1,27 +1,10 @@
#!/bin/bash
# Build docs using uv
echo "Installing dependencies with uv..."
uv sync --group docs --all-extras --no-extra krisp --no-extra gstreamer --no-extra ultravox --no-extra local_smart_turn --no-extra moondream --no-extra riva --no-extra mlx-whisper
# Check if sphinx-build is available
if ! uv run sphinx-build --version &> /dev/null; then
echo "Error: sphinx-build is not available" >&2
exit 1
fi
# Clean previous build
rm -rf _build
echo "Building documentation..."
# Build docs matching ReadTheDocs configuration
uv run sphinx-build -b html -d _build/doctrees . _build/html -W --keep-going
sphinx-build -b html -d _build/doctrees . _build/html -W --keep-going
if [ $? -eq 0 ]; then
echo "Documentation built successfully!"
# Open docs (MacOS)
open _build/html/index.html
else
echo "Documentation build failed!" >&2
exit 1
fi
# Open docs (MacOS)
open _build/html/index.html

View File

@@ -1,5 +1,4 @@
import logging
import os
import sys
from datetime import datetime
from pathlib import Path
@@ -29,7 +28,6 @@ extensions = [
suppress_warnings = [
"autodoc.mocked_object",
"toc.not_included",
]
# Napoleon settings
@@ -47,40 +45,85 @@ autodoc_default_options = {
# Mock imports for optional dependencies
autodoc_mock_imports = [
# Krisp - has build issues on some platforms
"pipecat_ai_krisp",
"riva",
"livekit",
"pyht", # Base PlayHT package
"pyht.async_client", # PlayHT specific imports
"pyht.client",
"pyht.protos",
"pyht.protos.api_pb2",
"pipecat_ai_playht", # PlayHT wrapper
"aiortc",
"aiortc.mediastreams",
"cv2",
"av",
"pyneuphonic",
"mem0",
"mlx_whisper",
"anthropic",
"assemblyai",
"boto3",
"azure",
"cartesia",
"deepgram",
"elevenlabs",
"fal",
"gladia",
"google",
"krisp",
# System-specific GUI libraries
"langchain",
"lmnt",
"noisereduce",
"openpipe",
"simli",
"soundfile",
"soniox",
"pipecat_ai_krisp",
"pyaudio",
"_tkinter",
"tkinter",
# Platform-specific audio libraries (if needed)
"gi",
"gi.require_version",
"gi.repository",
# OpenCV - sometimes has import issues during docs build
"cv2",
# Heavy ML packages excluded from ReadTheDocs
# ultravox dependencies
"daily",
"daily_python",
# Moondream dependencies
"torch",
"transformers",
"intel_extension_for_pytorch",
# Ultravox dependencies
"huggingface_hub",
"vllm",
"vllm.engine.arg_utils",
# local-smart-turn dependencies
"coremltools",
"coremltools.models",
"coremltools.models.MLModel",
"torch",
"torch.nn",
"torch.nn.functional",
"torchaudio",
# moondream dependencies
"transformers",
"transformers.AutoTokenizer",
"transformers.AutoFeatureExtractor",
"AutoFeatureExtractor",
"timm",
"einops",
"intel_extension_for_pytorch",
"huggingface_hub",
# riva dependencies
# Langchain dependencies
"langchain_core",
"langchain_core.messages",
"langchain_core.runnables",
"langchain_core.messages.AIMessageChunk",
"langchain_core.runnables.Runnable",
# LiveKit dependencies
"livekit",
"livekit.rtc",
"livekit_api",
"livekit_protocol",
"tenacity",
"tenacity.retry",
"tenacity.stop_after_attempt",
"tenacity.wait_exponential",
"rtc",
"rtc.Room",
"rtc.RoomOptions",
"rtc.AudioSource",
"rtc.LocalAudioTrack",
"rtc.TrackPublishOptions",
"rtc.TrackSource",
"rtc.AudioStream",
"rtc.AudioFrameEvent",
"rtc.AudioFrame",
"rtc.Track",
"rtc.TrackKind",
"rtc.RemoteParticipant",
"rtc.RemoteTrackPublication",
"rtc.DataPacket",
# Riva dependencies
"riva",
"riva.client",
"riva.client.Auth",
@@ -90,14 +133,57 @@ autodoc_mock_imports = [
"riva.client.AudioEncoding",
"riva.client.proto.riva_tts_pb2",
"riva.client.SpeechSynthesisService",
# MLX dependencies (Apple Silicon specific)
"mlx",
"mlx_whisper", # Note: might need underscore format too
# Local CoreML Smart Turn dependencies
"coremltools",
"coremltools.models",
"coremltools.models.MLModel",
"torch",
"torch.nn",
"torch.nn.functional",
"transformers",
"transformers.AutoFeatureExtractor",
# Also add specific classes that are imported
"AutoFeatureExtractor",
# Sentry dependencies
"sentry_sdk",
# AWS Nova Sonic dependencies
"aws_sdk_bedrock_runtime",
"aws_sdk_bedrock_runtime.client",
"aws_sdk_bedrock_runtime.config",
"aws_sdk_bedrock_runtime.models",
"smithy_aws_core",
"smithy_aws_core.credentials_resolvers",
"smithy_aws_core.credentials_resolvers.static",
"smithy_aws_core.identity",
"smithy_core",
"smithy_core.aio",
"smithy_core.aio.eventstream",
# MCP dependencies (you may already have these)
"mcp",
"mcp.client",
"mcp.client.session_group",
"mcp.client.sse",
"mcp.client.stdio",
"mcp.ClientSession",
"mcp.StdioServerParameters",
# gstreamer
"gi",
"gi.require_version",
"gi.repository",
# Protobuf mocks
"pipecat.frames.protobufs.frames_pb2",
"pipecat.serializers.protobuf",
"google.protobuf",
"google.protobuf.descriptor",
"google.protobuf.descriptor_pool",
"google.protobuf.runtime_version",
"google.protobuf.symbol_database",
"google.protobuf.internal.builder",
]
# HTML output settings
html_theme = "sphinx_rtd_theme"
html_static_path = ["_static"] if os.path.exists("_static") else []
html_static_path = ["_static"]
autodoc_typehints = "signature" # Show type hints in the signature only, not in the docstring
html_show_sphinx = False
@@ -116,7 +202,6 @@ def import_core_modules():
"pipecat.clocks",
"pipecat.metrics",
"pipecat.observers",
"pipecat.runner",
"pipecat.serializers",
"pipecat.sync",
"pipecat.transcriptions",
@@ -165,7 +250,6 @@ def setup(app):
excludes = [
str(project_root / "src/pipecat/pipeline/to_be_updated"),
str(project_root / "src/pipecat/examples"),
str(project_root / "src/pipecat/tests"),
"**/test_*.py",
"**/tests/*.py",

View File

@@ -14,7 +14,7 @@ Quick Links
* `Join our Community <https://discord.gg/pipecat>`_
.. toctree::
:maxdepth: 2
:maxdepth: 3
:caption: API Reference
:hidden:
@@ -26,7 +26,6 @@ Quick Links
Observers <api/pipecat.observers>
Pipeline <api/pipecat.pipeline>
Processors <api/pipecat.processors>
Runner <api/pipecat.runner>
Serializers <api/pipecat.serializers>
Services <api/pipecat.services>
Sync <api/pipecat.sync>

56
docs/api/requirements.txt Normal file
View File

@@ -0,0 +1,56 @@
# Sphinx dependencies
sphinx>=8.1.3
sphinx-rtd-theme
sphinx-markdown-builder
sphinx-autodoc-typehints
toml
# Install all extras individually to ensure they're properly resolved
pipecat-ai[anthropic]
pipecat-ai[assemblyai]
pipecat-ai[aws]
pipecat-ai[azure]
pipecat-ai[cartesia]
pipecat-ai[cerebras]
pipecat-ai[deepseek]
pipecat-ai[daily]
pipecat-ai[deepgram]
pipecat-ai[elevenlabs]
pipecat-ai[fal]
pipecat-ai[fireworks]
pipecat-ai[fish]
pipecat-ai[gladia]
pipecat-ai[google]
pipecat-ai[grok]
pipecat-ai[groq]
# pipecat-ai[krisp] # Mocked
pipecat-ai[koala]
# pipecat-ai[langchain] # Mocked
# pipecat-ai[livekit] # Mocked
pipecat-ai[lmnt]
pipecat-ai[local]
# pipecat-ai[local-smart-turn] # Mocked
# pipecat-ai[mem0] # Mocked
# pipecat-ai[mlx-whisper] # Mocked
# pipecat-ai[moondream] # Mocked
pipecat-ai[nim]
# pipecat-ai[neuphonic] # Mocked
pipecat-ai[noisereduce]
pipecat-ai[openai]
# pipecat-ai[openpipe]
# pipecat-ai[playht] # Mocked due to grpcio conflict with riva
pipecat-ai[qwen]
pipecat-ai[remote-smart-turn]
# pipecat-ai[riva] # Mocked
pipecat-ai[sambanova]
pipecat-ai[silero]
pipecat-ai[simli]
pipecat-ai[soundfile]
pipecat-ai[soniox]
pipecat-ai[speechmatics]
pipecat-ai[tavus]
pipecat-ai[together]
# pipecat-ai[ultravox] # Mocked
# pipecat-ai[webrtc] # Mocked
pipecat-ai[websocket]
pipecat-ai[whisper]

View File

@@ -1,10 +1,6 @@
# Anthropic
ANTHROPIC_API_KEY=...
# Async
ASYNCAI_API_KEY=...
ASYNCAI_VOICE_ID=...
# AWS
AWS_SECRET_ACCESS_KEY=...
AWS_ACCESS_KEY_ID=...
@@ -29,9 +25,6 @@ CARTESIA_API_KEY=...
DAILY_API_KEY=...
DAILY_SAMPLE_ROOM_URL=https://...
# Deepgram
DEEPGRAM_API_KEY=...
# ElevenLabs
ELEVENLABS_API_KEY=...
ELEVENLABS_VOICE_ID=...
@@ -47,13 +40,6 @@ FIREWORKS_API_KEY=...
# Gladia
GLADIA_API_KEY=...
GLADIA_REGION=...
# Google
GOOGLE_API_KEY=...
GOOGLE_CLOUD_PROJECT_ID=...
GOOGLE_TEST_CREDENTIALS=...
GOOGLE_VERTEX_TEST_CREDENTIALS=...
# LMNT
LMNT_API_KEY=...
@@ -90,9 +76,6 @@ GROQ_API_KEY=...
# Grok
GROK_API_KEY=...
# Inworld
INWORLD_API_KEY=...
# Together.ai
TOGETHER_API_KEY=...
@@ -132,11 +115,9 @@ SONIOX_API_KEY=
# Speechmatics
SPEECHMATICS_API_KEY=...
# SambaNova
SAMBANOVA_API_KEY=...
# Sentry
SENTRY_DSN=...
# Heygen
HEYGEN_API_KEY=...

View File

@@ -1,31 +1,88 @@
# Pipecat Examples
This directory contains examples to help you learn how to build with Pipecat.
## Getting Started
# Pipecat &mdash; Examples
New to Pipecat? Start here:
## Foundational snippets
Small snippets that build on each other, introducing one or two concepts at a time.
- **[Quickstart](quickstart/)** - Get your first voice AI bot running in 5 minutes _(coming soon)_
- **[Client/Server Web](client-server-web/)** - Learn to build web applications with Pipecat's client SDKs _(coming soon)_
- **[Phone Bot with Twilio](phone-bot-twilio/)** - Connect your bot to a phone number _(coming soon)_
➡️ [Take a look](https://github.com/pipecat-ai/pipecat/tree/main/examples/foundational)
## Foundational Examples
## Chatbot examples
Collection of self-contained real-time voice and video AI demo applications built with Pipecat.
Single-file examples that introduce core Pipecat concepts one at a time. These examples:
### Quickstart
- Build on each other progressively
- Focus on specific features or integrations
- Are used for testing with every Pipecat release
Each project has its own set of dependencies and configuration variables. They intentionally avoids shared code across projects &mdash; you can grab whichever demo folder you want to work with as a starting point.
See the **[Foundational Examples README](foundational/)** for the complete list.
We recommend you start with a virtual environment:
## More Advanced Examples
```shell
cd pipecat-ai/examples/simple-chatbot
Ready to explore complex use cases? Visit **[pipecat-examples](https://github.com/pipecat-ai/pipecat-examples)** for:
python -m venv venv
- Production-ready applications
- Multi-platform client implementations
- Telephony integrations
- Multimodal and creative applications
- Deployment and monitoring examples
source venv/bin/activate
pip install -r requirements.txt
```
Next, follow the steps in the README for each demo.
Make sure you `pip install -r requirements.txt` for each demo project, so you can be sure to have the necessary service dependencies that extend the functionality of Pipecat. You can read more about the framework architecture [here](https://github.com/pipecat-ai/pipecat/tree/main/docs).
## Projects:
| Project | Description | Services |
|----------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------|
| [Simple Chatbot](simple-chatbot) | Basic voice-driven conversational bot. A good starting point for learning the flow of the framework. | Deepgram, ElevenLabs, OpenAI, Daily, Daily Prebuilt UI |
| [Storytelling Chatbot](storytelling-chatbot) | Stitches together multiple third-party services to create a collaborative storytime experience. | Deepgram, ElevenLabs, OpenAI, Fal, Daily, Custom UI |
| [Translation Chatbot](translation-chatbot) | Listens for user speech, then translates that speech to Spanish and speaks the translation back. Demonstrates multi-participant use-cases. | Deepgram, Azure, OpenAI, Daily, Daily Prebuilt UI |
| [Moondream Chatbot](moondream-chatbot) | Demonstrates how to add vision capabilities to GPT4. **Note: works best with a GPU** | Deepgram, ElevenLabs, OpenAI, Moondream, Daily, Daily Prebuilt UI |
| [Patient intake](patient-intake) | A chatbot that can call functions in response to user input. | Deepgram, ElevenLabs, OpenAI, Daily, Daily Prebuilt UI |
| [Phone Chatbot](phone-chatbot) | A chatbot that connects to PSTN/SIP phone calls, powered by Daily or Twilio. | Deepgram, ElevenLabs, OpenAI, Daily, Twilio |
| [Twilio Chatbot](twilio-chatbot) | A chatbot that connects to an incoming phone call from Twilio. | Deepgram, ElevenLabs, OpenAI, Daily, Twilio |
| [studypal](studypal) | A chatbot to have a conversation about any article on the web | |
| [WebSocket Chatbot Server](websocket-server) | A real-time websocket server that handles audio streaming and bot interactions with speech-to-text and text-to-speech capabilities. | Cartesia, Deepgram, OpenAI, Websockets |
> [!IMPORTANT]
> These example projects use Daily as a WebRTC transport and can be joined using their hosted Prebuilt UI.
> It provides a quick way to join a real-time session with your bot and test your ideas without building any frontend code. If you'd like to see an example of a custom UI, try Storybot.
## FAQ
### Deployment
For each of these demos we've included a `Dockerfile`. Out of the box, this should provide everything needed to get the respective demo running on a VM:
```shell
docker build username/app:tag .
docker run -p 7860:7860 --env-file ./.env username/app:tag
docker push ...
```
### SSL
If you're working with a custom UI (such as with the Storytelling Chatbot), it's important to ensure your deployment platform supports HTTPS, as accessing user devices such as mics and webcams requires SSL.
If you try to run a custom UI without SSL, you may see an error in the console telling you that `navigator` is undefined, or no devices are available.
### Are these examples production ready?
Yes, kind of.
These demos attempt to keep things simple and are unopinionated regarding environment or scalability.
We're using FastAPI to spawn a subprocess for the bots / agents &mdash; useful for small tests, but not so great for production grade apps with many concurrent users. You can see how this works in each project's `start` endpoint in `server.py`.
Creating virtualized worker pools and on-demand instances is out of scope for these examples, but we hope to add some examples to this repo soon!
For projects that have CUDA as a requirement, such as Moondream Chatbot, be sure to deploy to a GPU-powered platform (such as [fly.io](https://fly.io) or [Runpod](https://runpod.io).)
## Getting help
➡️ [Join our Discord](https://discord.gg/pipecat)
➡️ [Reach us on Twitter](https://x.com/pipecat_ai)

View File

@@ -0,0 +1,60 @@
# AWS Strands Examples
This folder contains two Python examples demonstrating how to use Pipecat with the AWS Strands agent.
## Overview
These examples show how to delegate complex, multi-step tasks to a Strands agent, which can reason step-by-step and call tools to accomplish user requests.
These examples are intentionally simplified for demonstration, using mock API calls. They work best if you ask it:
> What's the weather where the Golden Gate Bridge is?
## Example Scripts
### `black-box.py`
A minimal example that demonstrates how to use the Strands agent with Pipecat. The agent can handle multi-step queries by calling tools, but does not explain its reasoning out loud.
### `explain-thinking.py`
An enhanced example where the Strands agent explains each step of its reasoning in clear, simple language as it works through a multi-step task.
## Quick Start
1. **Clone the repository and navigate to this example:**
```bash
git clone https://github.com/pipecat-ai/pipecat.git
cd pipecat/examples/aws-strands
```
2. **Set up a virtual environment:**
```bash
python -m venv venv
source venv/bin/activate # On Windows: venv\Scripts\activate
```
3. **Install dependencies:**
```bash
pip install -r requirements.txt
```
4. **Configure environment variables:**
Copy the provided `env.example` file to `.env` and fill in the necessary credentials:
```bash
cp env.example .env
# Then edit .env with your preferred editor
```
5. **Run an example:**
```bash
python black-box.py
# or
python explain-thinking.py
```

View File

@@ -0,0 +1,206 @@
#
# Copyright (c) 2025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import asyncio
import os
from dotenv import load_dotenv
from loguru import logger
from strands import Agent, tool
from strands.models import BedrockModel
from pipecat.adapters.schemas.tools_schema import ToolsSchema
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import TTSSpeakFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.cartesia.tts import CartesiaTTSService
from pipecat.services.deepgram.stt import DeepgramSTTService
from pipecat.services.llm_service import FunctionCallParams
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.transports.base_transport import BaseTransport, TransportParams
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
from pipecat.transports.services.daily import DailyParams
load_dotenv(override=True)
"""This example demonstrates how to use the Strands agent with Pipecat.
You can delegate complex, multi-step tasks to the Strands agent, which can cycle through LLM-based reasoning and tool calls to accomplish the task.
Try asking: "What's the weather where the Golden Gate Bridge is?"
"""
# Strands agent tools
@tool
def get_location_name_from_landmark(landmark: str) -> str:
"""
Get the location name from a landmark.
Args:
landmark (str): The name of the landmark, e.g. "Golden Gate Bridge".
"""
# Simulate fetching location
return "San Francisco, CA"
@tool
def get_lat_long_from_location_name(location: str) -> dict:
"""
Get the latitude and longitude for a location name.
Args:
location (str): The city and state, e.g. "San Francisco, CA".
"""
# Simulate fetching lat/long from a geocoding service
return {"lat": 37.7749, "long": -122.4194}
@tool
def get_current_weather_from_lat_long(lat: float, long: float) -> dict:
"""
Get the current weather for a specific latitude and longitude.
Args:
lat (float): The latitude of the location.
long (float): The longitude of the location.
"""
# Simulate fetching weather data from a weather service
return {"conditions": "nice", "temperature": "75"}
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
# instantiated. The function will be called when the desired transport gets
# selected.
transport_params = {
"daily": lambda: DailyParams(
audio_in_enabled=True,
audio_out_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
),
"twilio": lambda: FastAPIWebsocketParams(
audio_in_enabled=True,
audio_out_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
),
"webrtc": lambda: TransportParams(
audio_in_enabled=True,
audio_out_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
),
}
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
logger.info(f"Starting bot")
strands_agent = Agent(
model=BedrockModel(
model_id="us.anthropic.claude-3-7-sonnet-20250219-v1:0", max_tokens=64000
),
tools=[
get_location_name_from_landmark,
get_lat_long_from_location_name,
get_current_weather_from_lat_long,
],
system_prompt="""
You are a helpful personal assistant who can look up information about places and weather.
Your key capabilities:
1. Look up where landmarks are located.
2. Find latitude and longitude for a location.
3. Look up the current weather for a specific latitude and longitude.
Explain each step of your reasoning in clear, simple, and concise language. Your responses will be converted to audio, so avoid special characters and numbered lists.
""",
)
async def handle_location_or_weather_related_queries(params: FunctionCallParams, query: str):
"""
Handle location or weather related queries.
Args:
query (str): The user's query, e.g. "What's the weather where the Golden Gate Bridge is?".
"""
# Run in a background thread
# (Otherwise the agent blocks the event loop; one effect of that is that we don't hear
# "let me check on that" until the agent finishes)
loop = asyncio.get_running_loop()
result = await loop.run_in_executor(None, strands_agent, query)
await params.result_callback(result.message)
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
tts = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
)
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
llm.register_direct_function(handle_location_or_weather_related_queries)
@llm.event_handler("on_function_calls_started")
async def on_function_calls_started(service, function_calls):
await tts.queue_frame(TTSSpeakFrame("Let me check on that."))
tools = ToolsSchema(standard_tools=[handle_location_or_weather_related_queries])
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way. Start by suggesting that the user ask about the weather where the Golden Gate Bridge is.",
},
]
context = OpenAILLMContext(messages, tools)
context_aggregator = llm.create_context_aggregator(context)
pipeline = Pipeline(
[
transport.input(),
stt,
context_aggregator.user(),
llm,
tts,
transport.output(),
context_aggregator.assistant(),
]
)
task = PipelineTask(
pipeline,
params=PipelineParams(
enable_metrics=True,
enable_usage_metrics=True,
),
)
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
logger.info(f"Client connected")
# Kick off the conversation.
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=handle_sigint)
await runner.run(task)
if __name__ == "__main__":
from pipecat.runner.local import main
main(run_example, transport_params=transport_params)

View File

@@ -0,0 +1,8 @@
OPENAI_API_KEY=
CARTESIA_API_KEY=
DEEPGRAM_API_KEY=
DAILY_API_KEY=
DAILY_SAMPLE_ROOM_URL=
AWS_SECRET_ACCESS_KEY=
AWS_ACCESS_KEY_ID=
AWS_REGION=

View File

@@ -0,0 +1,249 @@
#
# Copyright (c) 2025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import asyncio
import os
import threading
import time
from dotenv import load_dotenv
from loguru import logger
from strands import Agent, tool
from strands.models import BedrockModel
from pipecat.adapters.schemas.tools_schema import ToolsSchema
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import TTSSpeakFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.cartesia.tts import CartesiaTTSService
from pipecat.services.deepgram.stt import DeepgramSTTService
from pipecat.services.llm_service import FunctionCallParams
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.transports.base_transport import BaseTransport, TransportParams
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
from pipecat.transports.services.daily import DailyParams
load_dotenv(override=True)
"""This example demonstrates how to use the Strands agent with Pipecat in a way where the agent explains its reasoning step-by-step.
You can delegate complex, multi-step tasks to the Strands agent, which can cycle through LLM-based reasoning and tool calls to accomplish the task.
Try asking: "What's the weather where the Golden Gate Bridge is?"
"""
# Strands agent tools
@tool
def get_location_name_from_landmark(landmark: str) -> str:
"""
Get the location name from a landmark.
Args:
landmark (str): The name of the landmark, e.g. "Golden Gate Bridge".
"""
# Simulate fetching location (slowly)
time.sleep(3)
return "San Francisco, CA"
@tool
def get_lat_long_from_location_name(location: str) -> dict:
"""
Get the latitude and longitude for a location name.
Args:
location (str): The city and state, e.g. "San Francisco, CA".
"""
# Simulate fetching lat/long from a geocoding service (slowly)
time.sleep(3)
return {"lat": 37.7749, "long": -122.4194}
@tool
def get_current_weather_from_lat_long(lat: float, long: float) -> dict:
"""
Get the current weather for a specific latitude and longitude.
Args:
lat (float): The latitude of the location.
long (float): The longitude of the location.
"""
# Simulate fetching weather data from a weather service (slowly)
time.sleep(3)
return {"conditions": "nice", "temperature": "75"}
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
# instantiated. The function will be called when the desired transport gets
# selected.
transport_params = {
"daily": lambda: DailyParams(
audio_in_enabled=True,
audio_out_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
),
"twilio": lambda: FastAPIWebsocketParams(
audio_in_enabled=True,
audio_out_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
),
"webrtc": lambda: TransportParams(
audio_in_enabled=True,
audio_out_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
),
}
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
logger.info(f"Starting bot")
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
tts = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
)
next_strands_message_is_last = False
strands_messages_queue = asyncio.Queue()
def strands_callback_handler(**kwargs):
"""
Handle events from the Strands agent.
"""
nonlocal next_strands_message_is_last
if "event" in kwargs:
event_obj = kwargs["event"]
if event_obj and "messageStop" in event_obj:
message_stop = event_obj["messageStop"]
if message_stop and "stopReason" in message_stop:
stop_reason = message_stop["stopReason"]
if stop_reason == "end_turn":
next_strands_message_is_last = True
elif "message" in kwargs:
message_obj = kwargs["message"]
if message_obj and "content" in message_obj and "role" in message_obj:
role = message_obj["role"]
content = message_obj["content"]
if role == "assistant" and isinstance(content, list):
for content_obj in content:
if isinstance(content_obj, dict) and "text" in content_obj:
message = content_obj["text"]
if not next_strands_message_is_last:
strands_messages_queue.put_nowait(message)
async def process_strands_messages():
while True:
message = await strands_messages_queue.get()
await tts.queue_frame(TTSSpeakFrame(message))
strands_messages_queue.task_done()
asyncio.create_task(process_strands_messages())
strands_agent = Agent(
model=BedrockModel(
model_id="us.anthropic.claude-3-7-sonnet-20250219-v1:0", max_tokens=64000
),
tools=[
get_location_name_from_landmark,
get_lat_long_from_location_name,
get_current_weather_from_lat_long,
],
system_prompt="""
You are a helpful personal assistant who can look up information about places and weather.
Your key capabilities:
1. Look up where landmarks are located.
2. Find latitude and longitude for a location.
3. Look up the current weather for a specific latitude and longitude.
Explain each step of your reasoning in clear, simple, and concise language. Your responses will be converted to audio, so avoid special characters and numbered lists.
""",
callback_handler=strands_callback_handler,
)
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
async def handle_location_or_weather_related_queries(params: FunctionCallParams, query: str):
"""
Handle location or weather related queries.
Args:
query (str): The user's query, e.g. "What's the weather where the Golden Gate Bridge is?".
"""
# Run in a background thread
# (Otherwise the agent blocks the event loop; one effect of that is that we don't hear
# the agent's "thinking" messages until the agent finishes)
loop = asyncio.get_running_loop()
result = await loop.run_in_executor(None, strands_agent, query)
await params.result_callback(result.message)
llm.register_direct_function(handle_location_or_weather_related_queries)
@llm.event_handler("on_function_calls_started")
async def on_function_calls_started(service, function_calls):
await tts.queue_frame(TTSSpeakFrame("Let me check on that."))
tools = ToolsSchema(standard_tools=[handle_location_or_weather_related_queries])
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way. Start by suggesting that the user ask about the weather where the Golden Gate Bridge is.",
},
]
context = OpenAILLMContext(messages, tools)
context_aggregator = llm.create_context_aggregator(context)
pipeline = Pipeline(
[
transport.input(),
stt,
context_aggregator.user(),
llm,
tts,
transport.output(),
context_aggregator.assistant(),
]
)
task = PipelineTask(
pipeline,
params=PipelineParams(
enable_metrics=True,
enable_usage_metrics=True,
),
)
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
logger.info(f"Client connected")
# Kick off the conversation.
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=handle_sigint)
await runner.run(task)
if __name__ == "__main__":
from pipecat.runner.local import main
main(run_example, transport_params=transport_params)

View File

@@ -0,0 +1,6 @@
fastapi
uvicorn
python-dotenv
pipecat-ai[webrtc,daily,deepgram,cartesia]
pipecat-ai-small-webrtc-prebuilt
strands-agents

View File

@@ -0,0 +1,45 @@
# Bot ready signaling
A simple Pipecat example demonstrating how to handle signaling between the client and the bot,
ensuring that the bot starts sending audio only when the client is available,
thereby avoiding the risk of cutting off the beginning of the audio.
## Quick Start
### First, start the bot server:
1. Navigate to the server directory:
```bash
cd server
```
2. Create and activate a virtual environment:
```bash
python3 -m venv venv
source venv/bin/activate # On Windows: venv\Scripts\activate
```
3. Install requirements:
```bash
pip install -r requirements.txt
```
4. Copy env.example to .env and configure:
- Add your API keys
5. Start the server:
```bash
python server.py
```
### Next, connect using the client app:
For client-side setup, refer to the [JavaScript Guide](client/javascript/README.md).
## Important Note
Ensure the bot server is running before using any client implementations.
## Requirements
- Python 3.10+
- Node.js 16+ (for JavaScript)
- Daily API key
- Cartesia API key
- Modern web browser with WebRTC support

View File

@@ -0,0 +1,27 @@
# JavaScript Implementation
Basic implementation using the [Pipecat JavaScript SDK](https://docs.pipecat.ai/client/js/introduction).
## Setup
1. Run the bot server. See the [server README](../../README).
2. Navigate to the `client/javascript` directory:
```bash
cd client/javascript
```
3. Install dependencies:
```bash
npm install
```
4. Run the client app:
```
npm run dev
```
5. Visit http://localhost:5173 in your browser.

View File

@@ -0,0 +1,34 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>AI Chatbot</title>
</head>
<body>
<div class="container">
<div class="status-bar">
<div class="status">
Status: <span id="connection-status">Disconnected</span>
</div>
<div class="controls">
<button id="connect-btn">Connect</button>
<button id="disconnect-btn" disabled>Disconnect</button>
</div>
</div>
<audio id="bot-audio" autoplay></audio>
<div class="debug-panel">
<h3>Debug Info</h3>
<div id="debug-log"></div>
</div>
</div>
<script type="module" src="/src/app.js"></script>
<link rel="stylesheet" href="/src/style.css">
</body>
</html>

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,20 @@
{
"name": "client",
"version": "1.0.0",
"main": "index.js",
"scripts": {
"dev": "vite",
"build": "vite build",
"preview": "vite preview"
},
"keywords": [],
"author": "",
"license": "ISC",
"description": "",
"devDependencies": {
"vite": "^6.3.5"
},
"dependencies": {
"@daily-co/daily-js": "0.74.0"
}
}

View File

@@ -0,0 +1,216 @@
/**
* Copyright (c) 20242025, Daily
*
* SPDX-License-Identifier: BSD 2-Clause License
*/
import Daily from "@daily-co/daily-js";
/**
* ChatbotClient handles the connection and media management for a real-time
* voice interaction with an AI bot.
*/
class ChatbotClient {
constructor() {
// Initialize client state
this.dailyCallObject = null;
this.setupDOMElements();
this.setupEventListeners();
}
/**
* Set up references to DOM elements and create necessary media elements
*/
setupDOMElements() {
// Get references to UI control elements
this.connectBtn = document.getElementById('connect-btn');
this.disconnectBtn = document.getElementById('disconnect-btn');
this.statusSpan = document.getElementById('connection-status');
this.debugLog = document.getElementById('debug-log');
// Create an audio element for bot's voice output
this.botAudio = document.createElement('audio');
this.botAudio.autoplay = true;
this.botAudio.playsInline = true;
document.body.appendChild(this.botAudio);
}
/**
* Set up event listeners for connect/disconnect buttons
*/
setupEventListeners() {
this.connectBtn.addEventListener('click', () => this.connect());
this.disconnectBtn.addEventListener('click', () => this.disconnect());
}
/**
* Add a timestamped message to the debug log
*/
log(message) {
const entry = document.createElement('div');
entry.textContent = `${new Date().toISOString()} - ${message}`;
// Add styling based on message type
if (message.startsWith('User: ')) {
entry.style.color = '#2196F3'; // blue for user
} else if (message.startsWith('Bot: ')) {
entry.style.color = '#4CAF50'; // green for bot
}
this.debugLog.appendChild(entry);
this.debugLog.scrollTop = this.debugLog.scrollHeight;
console.log(message);
}
/**
* Update the connection status display
*/
updateStatus(status) {
this.statusSpan.textContent = status;
this.log(`Status: ${status}`);
}
handleEventToConsole (evt) {
this.log(`Received event: ${evt.action}`);
};
/**
* Set up listeners for track events (start/stop)
* This handles new tracks being added during the session
*/
setupTrackListeners() {
if (!this.dailyCallObject) return;
this.dailyCallObject.on("joined-meeting", () => {
this.updateStatus('Connected');
this.connectBtn.disabled = true;
this.disconnectBtn.disabled = false;
this.log('Client connected');
});
this.dailyCallObject.on("track-started", (evt) => {
if (evt.track.kind === "audio" && evt.participant.local === false) {
this.log("Audio track started.")
this.setupAudioTrack(evt.track);
}
});
this.dailyCallObject.on("track-stopped", this.handleEventToConsole.bind(this));
this.dailyCallObject.on("participant-joined", this.handleEventToConsole.bind(this));
this.dailyCallObject.on("participant-updated", this.handleEventToConsole.bind(this));
this.dailyCallObject.on("participant-left", () => {
// When the bot leaves, we are also disconnecting from the call
this.disconnect()
});
this.dailyCallObject.on("left-meeting", () => {
this.updateStatus('Disconnected');
this.connectBtn.disabled = false;
this.disconnectBtn.disabled = true;
this.log('Client disconnected');
});
this.dailyCallObject.on("error", this.handleEventToConsole.bind(this));
}
/**
* Set up an audio track for playback
* Handles both initial setup and track updates
*/
setupAudioTrack(track) {
this.log(`Setting up audio track, track state: ${track.readyState}, muted: ${track.muted}`);
// Check if we're already playing this track
if (this.botAudio.srcObject) {
const oldTrack = this.botAudio.srcObject.getAudioTracks()[0];
if (oldTrack?.id === track.id) return;
}
// Create a new MediaStream with the track and set it as the audio source
this.botAudio.srcObject = new MediaStream([track]);
this.botAudio.onplaying = async (event) => {
this.log("onplaying")
this.log("Will send the audio message to play the audio at the next tick")
this.dailyCallObject.sendAppMessage("playable")
}
}
async fetchRoomInfo() {
let connectUrl = '/connect'
let res = await fetch(connectUrl, {
method: "POST",
mode: "cors",
headers: new Headers({
"Content-Type": "application/json"
}),
})
if (res.ok) {
return res.json();
}
}
/**
* Initialize and connect to the bot
* This sets up the RTVI client, initializes devices, and establishes the connection
*/
async connect() {
try {
// Initialize the client
this.dailyCallObject = Daily.createCallObject({
subscribeToTracksAutomatically: true,
});
// Set up listeners for media track events
this.setupTrackListeners();
this.log('Creating the bot...');
let roomInfo = await this.fetchRoomInfo()
// Connect to the bot
this.log('Connecting to bot...');
// Only for making debugger easier
window.callObject = this.dailyCallObject;
await this.dailyCallObject.join({
url: roomInfo.room_url,
});
this.log('Connection complete');
} catch (error) {
// Handle any errors during connection
this.log(`Error connecting: ${error.message}`);
this.log(`Error stack: ${error.stack}`);
this.updateStatus('Error');
// Clean up if there's an error
if (this.dailyCallObject) {
try {
await this.dailyCallObject.leave();
} catch (disconnectError) {
this.log(`Error during disconnect: ${disconnectError.message}`);
}
}
}
}
/**
* Disconnect from the bot and clean up media resources
*/
async disconnect() {
if (this.dailyCallObject) {
try {
// Disconnect the RTVI client
await this.dailyCallObject.leave();
await this.dailyCallObject.destroy();
this.dailyCallObject = null;
// Clean up audio
if (this.botAudio.srcObject) {
this.botAudio.srcObject.getTracks().forEach((track) => track.stop());
this.botAudio.srcObject = null;
}
} catch (error) {
this.log(`Error disconnecting: ${error.message}`);
}
}
}
}
// Initialize the client when the page loads
window.addEventListener('DOMContentLoaded', () => {
new ChatbotClient();
});

View File

@@ -0,0 +1,98 @@
body {
margin: 0;
padding: 20px;
font-family: Arial, sans-serif;
background-color: #f0f0f0;
}
.container {
max-width: 1200px;
margin: 0 auto;
}
.status-bar {
display: flex;
justify-content: space-between;
align-items: center;
padding: 10px;
background-color: #fff;
border-radius: 8px;
margin-bottom: 20px;
}
.controls button {
padding: 8px 16px;
margin-left: 10px;
border: none;
border-radius: 4px;
cursor: pointer;
}
#connect-btn {
background-color: #4caf50;
color: white;
}
#disconnect-btn {
background-color: #f44336;
color: white;
}
button:disabled {
opacity: 0.5;
cursor: not-allowed;
}
.main-content {
background-color: #fff;
border-radius: 8px;
padding: 20px;
margin-bottom: 20px;
}
.bot-container {
display: flex;
flex-direction: column;
align-items: center;
}
#bot-video-container {
width: 640px;
height: 360px;
background-color: #e0e0e0;
border-radius: 8px;
margin: 20px auto;
overflow: hidden;
display: flex;
align-items: center;
justify-content: center;
}
#bot-video-container video {
width: 100%;
height: 100%;
object-fit: cover;
}
.debug-panel {
background-color: #fff;
border-radius: 8px;
padding: 20px;
}
.debug-panel h3 {
margin: 0 0 10px 0;
font-size: 16px;
font-weight: bold;
}
#debug-log {
height: 200px;
overflow-y: auto;
background-color: #f8f8f8;
padding: 10px;
border-radius: 4px;
font-family: monospace;
font-size: 12px;
line-height: 1.4;
}

View File

@@ -0,0 +1,13 @@
import { defineConfig } from 'vite';
export default defineConfig({
server: {
proxy: {
// Proxy /api requests to the backend server
'/connect': {
target: 'http://0.0.0.0:7860', // Replace with your backend URL
changeOrigin: true,
},
},
},
});

View File

@@ -0,0 +1 @@
22.14

View File

@@ -0,0 +1,60 @@
# React Native Implementation
Basic implementation using the [Pipecat React Native SDK](https://docs.pipecat.ai/client/react-native/introduction).
## Usage
### Expo requirements
This project cannot be used with an [Expo Go](https://docs.expo.dev/workflow/expo-go/) app because [it requires custom native code](https://docs.expo.io/workflow/customizing/).
When a project requires custom native code or a config plugin, we need to transition from using [Expo Go](https://docs.expo.dev/workflow/expo-go/)
to a [development build](https://docs.expo.dev/development/introduction/).
More details about the custom native code used by this demo can be found in [rn-daily-js-expo-config-plugin](https://github.com/daily-co/rn-daily-js-expo-config-plugin).
### Building remotely
If you do not have experience with Xcode and Android Studio builds or do not have them installed locally on your computer, you will need to follow [this guide from Expo to use EAS Build](https://docs.expo.dev/development/create-development-builds/#create-and-install-eas-build).
### Building locally
You will need to have installed locally on your computer:
- [Xcode](https://developer.apple.com/xcode/) to build for iOS;
- [Android Studio](https://developer.android.com/studio) to build for Android;
#### Install the demo dependencies
```bash
# Use the version of node specified in .nvmrc
nvm i
# Install dependencies
npm i
# Before a native app can be compiled, the native source code must be generated.
npx expo prebuild
# Configure the environment variable to connect to the local server
cp env.example .env
# edit .env and add your local ip address, for example: http://192.168.1.16:7860
```
#### Running on Android
After plugging in an Android device [configured for debugging](https://developer.android.com/studio/debug/dev-options), run the following command:
```
npm run android
```
#### Running on iOS
Run the following command:
```
npm run ios
```
#### Connect to the server
Use the http://localhost:5173 in your app.

View File

@@ -0,0 +1,75 @@
{
"expo": {
"name": "bot-ready-rn",
"slug": "bot-ready-rn",
"version": "1.0.0",
"orientation": "portrait",
"icon": "./assets/icon.png",
"userInterfaceStyle": "light",
"splash": {
"image": "./assets/splash.png",
"resizeMode": "contain",
"backgroundColor": "#ffffff"
},
"updates": {
"fallbackToCacheTimeout": 0
},
"assetBundlePatterns": [
"**/*"
],
"ios": {
"supportsTablet": true,
"bitcode": false,
"bundleIdentifier": "co.daily.expo.BotReady",
"infoPlist": {
"UIBackgroundModes": [
"voip"
]
},
"appleTeamId": "EEBGKV9N3N"
},
"android": {
"adaptiveIcon": {
"foregroundImage": "./assets/adaptive-icon.png",
"backgroundColor": "#FFFFFF"
},
"package": "co.daily.expo.BotReady",
"permissions": [
"android.permission.ACCESS_NETWORK_STATE",
"android.permission.BLUETOOTH",
"android.permission.CAMERA",
"android.permission.INTERNET",
"android.permission.MODIFY_AUDIO_SETTINGS",
"android.permission.RECORD_AUDIO",
"android.permission.SYSTEM_ALERT_WINDOW",
"android.permission.WAKE_LOCK",
"android.permission.FOREGROUND_SERVICE",
"android.permission.FOREGROUND_SERVICE_CAMERA",
"android.permission.FOREGROUND_SERVICE_MICROPHONE",
"android.permission.FOREGROUND_SERVICE_MEDIA_PROJECTION",
"android.permission.POST_NOTIFICATIONS"
]
},
"web": {
"favicon": "./assets/favicon.png"
},
"plugins": [
"@config-plugins/react-native-webrtc",
"@daily-co/config-plugin-rn-daily-js",
[
"expo-build-properties",
{
"android": {
"minSdkVersion": 24,
"compileSdkVersion": 35,
"targetSdkVersion": 34,
"buildToolsVersion": "35.0.0"
},
"ios": {
"deploymentTarget": "15.1"
}
}
]
]
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

View File

@@ -0,0 +1,7 @@
module.exports = function(api) {
api.cache(true);
return {
presets: ['babel-preset-expo'],
plugins: [["module:react-native-dotenv"]],
};
};

View File

@@ -0,0 +1 @@
API_BASE_URL=http://YOUR_LOCAL_IP:7860

View File

@@ -0,0 +1,7 @@
import { registerRootComponent } from "expo";
import App from "./src/App";
// registerRootComponent calls AppRegistry.registerComponent('main', () => App);
// It also ensures that the environment is set up appropriately
registerRootComponent(App);

View File

@@ -0,0 +1,4 @@
// Learn more https://docs.expo.io/guides/customizing-metro
const { getDefaultConfig } = require('expo/metro-config');
module.exports = getDefaultConfig(__dirname);

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,31 @@
{
"name": "bot-ready-rn",
"version": "1.0.0",
"scripts": {
"start": "expo start --dev-client",
"android": "expo run:android --device",
"ios": "expo run:ios --device",
"web": "expo start --web"
},
"dependencies": {
"@config-plugins/react-native-webrtc": "^10.0.0",
"@daily-co/config-plugin-rn-daily-js": "0.0.7",
"@daily-co/react-native-daily-js": "^0.70.0",
"@daily-co/react-native-webrtc": "^118.0.3-daily.2",
"@react-native-async-storage/async-storage": "1.23.1",
"expo": "^52.0.0",
"expo-build-properties": "~0.13.1",
"expo-dev-client": "~5.0.5",
"expo-splash-screen": "~0.29.16",
"expo-status-bar": "~2.0.0",
"react": "18.3.1",
"react-native": "0.76.3",
"react-native-background-timer": "^2.4.1",
"react-native-dotenv": "^3.4.11",
"react-native-get-random-values": "^1.11.0"
},
"devDependencies": {
"@babel/core": "^7.12.9"
},
"private": true
}

View File

@@ -0,0 +1,121 @@
import React, { useState, useEffect } from 'react';
import {SafeAreaView, View, Text, Button, StyleSheet, ScrollView} from 'react-native';
import Daily from "@daily-co/react-native-daily-js";
import { API_BASE_URL } from "@env";
const CallScreen = () => {
const [connectionStatus, setConnectionStatus] = useState('Disconnected');
const [isConnected, setIsConnected] = useState(false);
const [callObject, setCallObject] = useState(null);
const [logs, setLogs] = useState([]);
useEffect(() => {
if (callObject) {
setupTrackListeners(callObject);
}
}, [callObject]);
const log = (message) => {
setLogs((prevLogs) => [...prevLogs, `${new Date().toISOString()} - ${message}`]);
console.log(message);
};
const setupTrackListeners = (callObject) => {
callObject.on("joined-meeting", () => {
setConnectionStatus('Connected');
setIsConnected(true);
log('Client connected');
});
callObject.on("left-meeting", () => {
setConnectionStatus('Disconnected');
setIsConnected(false);
log('Client disconnected');
});
callObject.on("participant-left", () => {
// When the bot leaves, we are also disconnecting from the call
disconnect().catch((err) => {
log(`Failed to disconnect ${err}`);
})
});
// Trigger so the bot can start sending audio
callObject.on("track-started", (evt) => {
if (evt.track.kind === "audio" && evt.participant.local === false) {
handleEventToConsole(evt)
log("Sending the message that will trigger the bot to play the audio.")
callObject.sendAppMessage("playable")
}
});
callObject.on("error", (evt) => log(`Error: ${evt.error}`));
// Other events just for awareness
callObject.on("track-stopped", handleEventToConsole);
callObject.on("participant-joined", handleEventToConsole);
callObject.on("participant-updated", handleEventToConsole);
};
const handleEventToConsole = (evt) => {
log(`Received event: ${evt.action}`);
};
const connect = async () => {
try {
const callObject = Daily.createCallObject({ subscribeToTracksAutomatically: true });
setCallObject(callObject);
const connectionUrl = `${API_BASE_URL}/connect`
const res = await fetch(connectionUrl, { method: "POST", headers: { "Content-Type": "application/json" } });
const roomInfo = await res.json();
await callObject.join({ url: roomInfo.room_url });
} catch (error) {
log(`Error connecting: ${error.message}`);
}
};
const disconnect = async () => {
if (callObject) {
try {
await callObject.leave();
await callObject.destroy();
setCallObject(null);
} catch (error) {
log(`Error disconnecting: ${error.message}`);
}
}
};
return (
<SafeAreaView style={styles.safeArea}>
<View style={styles.container}>
<View style={styles.statusBar}>
<Text>Status: <Text style={styles.status}>{connectionStatus}</Text></Text>
<View style={styles.controls}>
<Button
title={isConnected ? "Disconnect" : "Connect"}
onPress={isConnected ? disconnect : connect}
/>
</View>
</View>
<View style={styles.debugPanel}>
<Text style={styles.debugTitle}>Debug Info</Text>
<ScrollView style={styles.debugLog}>
{logs.map((logEntry, index) => (
<Text key={index} style={styles.logText}>{logEntry}</Text>
))}
</ScrollView>
</View>
</View>
</SafeAreaView>
);
};
const styles = StyleSheet.create({
safeArea: { flex: 1, backgroundColor: '#f0f0f0', padding: 20 },
container: { flex: 1, margin: 20 },
statusBar: { flexDirection: 'row', justifyContent: 'space-between', alignItems: 'center', padding: 10, backgroundColor: '#fff', borderRadius: 8, marginBottom: 20 },
status: { fontWeight: 'bold' },
controls: { flexDirection: 'row', gap: 10 },
debugPanel: { height: '80%', backgroundColor: '#fff', borderRadius: 8, padding: 20},
debugTitle: { fontSize: 16, fontWeight: 'bold' },
debugLog: { height: '100%', overflow: 'scroll', backgroundColor: '#f8f8f8', padding: 10, borderRadius: 4, fontFamily: 'monospace', fontSize: 12, lineHeight: 1.4 },
});
export default CallScreen;

View File

@@ -0,0 +1,50 @@
# Bot ready signaling Server
A FastAPI server that manages bot instances and provide endpoint for Pipecat client connections.
## Endpoints
- `POST /connect` - Pipecat client connection endpoint
## Environment Variables
Copy `env.example` to `.env` and configure:
```ini
# Required API Keys
DAILY_API_KEY= # Your Daily API key
CARTESIA_API_KEY= # Your Cartesia API key
# Optional Configuration
DAILY_API_URL= # Optional: Daily API URL (defaults to https://api.daily.co/v1)
DAILY_SAMPLE_ROOM_URL= # Optional: Fixed room URL for development
HOST= # Optional: Host address (defaults to 0.0.0.0)
FAST_API_PORT= # Optional: Port number (defaults to 7860)
```
## Running the Server
Set up and activate your virtual environment:
```bash
python3 -m venv venv
source venv/bin/activate # On Windows: venv\Scripts\activate
```
Install dependencies:
```bash
pip install -r requirements.txt
```
If you want to use the local version of `pipecat` in this repo rather than the last published version, also run:
```bash
pip install --editable "../../../[daily,cartesia,openai]"
```
Run the server:
```bash
python server.py
```

View File

@@ -0,0 +1,3 @@
DAILY_SAMPLE_ROOM_URL=https://yourdomain.daily.co/yourroom # (for joining the bot to the same room repeatedly for local dev)
DAILY_API_KEY=
CARTESIA_API_KEY=

View File

@@ -0,0 +1,4 @@
python-dotenv
fastapi[all]
uvicorn
pipecat-ai[daily,cartesia,openai]

View File

@@ -0,0 +1,64 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import os
from typing import Optional
import aiohttp
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
async def configure(aiohttp_session: aiohttp.ClientSession):
(url, token, _) = await configure_with_args(aiohttp_session)
return (url, token)
async def configure_with_args(
aiohttp_session: aiohttp.ClientSession, parser: Optional[argparse.ArgumentParser] = None
):
if not parser:
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
parser.add_argument(
"-u", "--url", type=str, required=False, help="URL of the Daily room to join"
)
parser.add_argument(
"-k",
"--apikey",
type=str,
required=False,
help="Daily API Key (needed to create an owner token for the room)",
)
args, unknown = parser.parse_known_args()
url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL")
key = args.apikey or os.getenv("DAILY_API_KEY")
if not url:
raise Exception(
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL."
)
if not key:
raise Exception(
"No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers."
)
daily_rest_helper = DailyRESTHelper(
daily_api_key=key,
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
aiohttp_session=aiohttp_session,
)
# Create a meeting token for the given room with an expiration 1 hour in
# the future.
expiry_time: float = 60 * 60
token = await daily_rest_helper.get_token(url, expiry_time)
return (url, token, args)

View File

@@ -0,0 +1,147 @@
#
# Copyright (c) 2025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import os
import subprocess
from contextlib import asynccontextmanager
from typing import Any, Dict
import aiohttp
from dotenv import load_dotenv
from fastapi import FastAPI, HTTPException, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomParams
# Load environment variables from .env file
load_dotenv(override=True)
# Dictionary to track bot processes: {pid: (process, room_url)}
bot_procs = {}
# Store Daily API helpers
daily_helpers = {}
def cleanup():
"""Cleanup function to terminate all bot processes.
Called during server shutdown.
"""
for entry in bot_procs.values():
proc = entry[0]
proc.terminate()
proc.wait()
@asynccontextmanager
async def lifespan(app: FastAPI):
"""FastAPI lifespan manager that handles startup and shutdown tasks.
- Creates aiohttp session
- Initializes Daily API helper
- Cleans up resources on shutdown
"""
aiohttp_session = aiohttp.ClientSession()
daily_helpers["rest"] = DailyRESTHelper(
daily_api_key=os.getenv("DAILY_API_KEY", ""),
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
aiohttp_session=aiohttp_session,
)
yield
await aiohttp_session.close()
cleanup()
# Initialize FastAPI app with lifespan manager
app = FastAPI(lifespan=lifespan)
# Configure CORS to allow requests from any origin
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
async def create_room_and_token() -> tuple[str, str]:
"""Helper function to create a Daily room and generate an access token.
Returns:
tuple[str, str]: A tuple containing (room_url, token)
Raises:
HTTPException: If room creation or token generation fails
"""
room = await daily_helpers["rest"].create_room(DailyRoomParams())
if not room.url:
raise HTTPException(status_code=500, detail="Failed to create room")
token = await daily_helpers["rest"].get_token(room.url)
if not token:
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room.url}")
return room.url, token
@app.post("/connect")
async def bot_connect(request: Request) -> Dict[Any, Any]:
"""Connect endpoint that creates a room and returns connection credentials.
This endpoint is called by client to establish a connection.
Returns:
Dict[Any, Any]: Authentication bundle containing room_url and token
Raises:
HTTPException: If room creation, token generation, or bot startup fails
"""
print("Creating room for RTVI connection")
room_url, token = await create_room_and_token()
print(f"Room URL: {room_url}")
# Start the bot process
try:
bot_file = "signalling_bot"
proc = subprocess.Popen(
[f"python3 -m {bot_file} -u {room_url} -t {token}"],
shell=True,
bufsize=1,
cwd=os.path.dirname(os.path.abspath(__file__)),
)
bot_procs[proc.pid] = (proc, room_url)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to start subprocess: {e}")
# Return the authentication bundle in format expected by DailyTransport
return {"room_url": room_url, "token": token}
if __name__ == "__main__":
import uvicorn
# Parse command line arguments for server configuration
default_host = os.getenv("HOST", "0.0.0.0")
default_port = int(os.getenv("FAST_API_PORT", "7860"))
parser = argparse.ArgumentParser(description="Daily Travel Companion FastAPI server")
parser.add_argument("--host", type=str, default=default_host, help="Host address")
parser.add_argument("--port", type=int, default=default_port, help="Port number")
parser.add_argument("--reload", action="store_true", help="Reload code on change")
config = parser.parse_args()
# Start the FastAPI server
uvicorn.run(
"server:app",
host=config.host,
port=config.port,
reload=config.reload,
)

View File

@@ -0,0 +1,95 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import os
import sys
from dataclasses import dataclass
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.frames.frames import AudioRawFrame, EndFrame, OutputAudioRawFrame, TTSSpeakFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineTask
from pipecat.services.cartesia.tts import CartesiaTTSService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
@dataclass
class SilenceFrame(OutputAudioRawFrame):
def __init__(
self,
*,
sample_rate: int,
duration: float,
):
# Initialize the parent class with the silent frame's data
super().__init__(
audio=self.create_silent_audio_frame(sample_rate, 1, duration).audio,
sample_rate=sample_rate,
num_channels=1,
)
@staticmethod
def create_silent_audio_frame(
sample_rate: int, num_channels: int, duration: float
) -> AudioRawFrame:
"""Create an AudioRawFrame containing silence."""
frame_size = num_channels * 2 # 2 bytes per sample for 16-bit audio
total_frames = int(sample_rate * duration)
total_bytes = total_frames * frame_size
silent_audio = bytes(total_bytes) # Create a byte array filled with zeros
return AudioRawFrame(audio=silent_audio, sample_rate=sample_rate, num_channels=num_channels)
async def main():
async with aiohttp.ClientSession() as session:
(room_url, _) = await configure(session)
transport = DailyTransport(
room_url, None, "Say One Thing", DailyParams(audio_out_enabled=True)
)
tts = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
)
runner = PipelineRunner()
task = PipelineTask(Pipeline([tts, transport.output()]))
# Register an event handler so we can play the audio when we receive a specific message
@transport.event_handler("on_app_message")
async def on_app_message(transport, message, sender):
logger.debug(f"Received app message: {message} - {sender}")
if "playable" not in message:
return
await task.queue_frames(
[
SilenceFrame(
sample_rate=task.params.audio_out_sample_rate,
duration=0.5,
),
TTSSpeakFrame(f"Hello there, how are you doing today ?"),
EndFrame(),
]
)
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,161 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
runpod.toml

View File

@@ -0,0 +1,15 @@
FROM python:3.10-bullseye
RUN mkdir /app
RUN mkdir /app/assets
RUN mkdir /app/utils
COPY *.py /app/
COPY requirements.txt /app/
WORKDIR /app
RUN pip3 install -r requirements.txt
EXPOSE 7860
CMD ["python3", "server.py"]

View File

@@ -0,0 +1,37 @@
# Simple Chatbot
<img src="image.png" width="420px">
This app connects you to a chatbot powered by GPT-4, complete with animations generated by Stable Video Diffusion.
See a video of it in action: https://x.com/kwindla/status/1778628911817183509
And a quick video walkthrough of the code: https://www.loom.com/share/13df1967161f4d24ade054e7f8753416
The first time, things might take extra time to get started since VAD (Voice Activity Detection) model needs to be downloaded.
## Get started
```python
python3 -m venv venv
source venv/bin/activate
pip install -r requirements.txt
cp env.example .env # and add your credentials
```
## Run the server
```bash
python server.py
```
Then, visit `http://localhost:7860/` in your browser to start a chatbot session.
## Build and test the Docker image
```
docker build -t chatbot .
docker run --env-file .env -p 7860:7860 chatbot
```

View File

@@ -0,0 +1,170 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import datetime
import io
import os
import sys
import wave
import aiofiles
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
# Create the recordings directory if it doesn't exist
os.makedirs("recordings", exist_ok=True)
async def save_audio(audio: bytes, sample_rate: int, num_channels: int, name: str):
if len(audio) > 0:
filename = os.path.join(
"recordings",
f"{name}_conversation_recording{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.wav",
)
with io.BytesIO() as buffer:
with wave.open(buffer, "wb") as wf:
wf.setsampwidth(2)
wf.setnchannels(num_channels)
wf.setframerate(sample_rate)
wf.writeframes(audio)
async with aiofiles.open(filename, "wb") as file:
await file.write(buffer.getvalue())
print(f"Merged audio saved to {filename}")
else:
print("No audio data to save")
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
"Chatbot",
DailyParams(
audio_out_enabled=True,
audio_in_enabled=True,
video_out_enabled=False,
vad_analyzer=SileroVADAnalyzer(),
transcription_enabled=True,
#
# Spanish
#
# transcription_settings=DailyTranscriptionSettings(
# language="es",
# tier="nova",
# model="2-general"
# )
),
)
tts = ElevenLabsTTSService(
api_key=os.getenv("ELEVENLABS_API_KEY"),
#
# English
#
voice_id="cgSgspJ2msm6clMCkdW9",
#
# Spanish
#
# model="eleven_multilingual_v2",
# voice_id="gD1IexrzCvsXPHUuT0s3",
)
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
messages = [
{
"role": "system",
#
# English
#
"content": "You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by introducing yourself. Keep all your response to 12 words or fewer.",
#
# Spanish
#
# "content": "Eres Chatbot, un amigable y útil robot. Tu objetivo es demostrar tus capacidades de una manera breve. Tus respuestas se convertiran a audio así que nunca no debes incluir caracteres especiales. Contesta a lo que el usuario pregunte de una manera creativa, útil y breve. Empieza por presentarte a ti mismo.",
},
]
context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)
# NOTE: Watch out! This will save all the conversation in memory. You
# can pass `buffer_size` to get periodic callbacks.
audiobuffer = AudioBufferProcessor(enable_turn_audio=True)
pipeline = Pipeline(
[
transport.input(), # microphone
context_aggregator.user(),
llm,
tts,
transport.output(),
audiobuffer, # used to buffer the audio in the pipeline
context_aggregator.assistant(),
]
)
task = PipelineTask(
pipeline,
params=PipelineParams(
audio_in_sample_rate=16000,
audio_out_sample_rate=16000,
enable_metrics=True,
enable_usage_metrics=True,
),
)
@audiobuffer.event_handler("on_audio_data")
async def on_audio_data(buffer, audio, sample_rate, num_channels):
await save_audio(audio, sample_rate, num_channels, "full")
@audiobuffer.event_handler("on_user_turn_audio_data")
async def on_user_turn_audio_data(buffer, audio, sample_rate, num_channels):
await save_audio(audio, sample_rate, num_channels, "user")
@audiobuffer.event_handler("on_bot_turn_audio_data")
async def on_bot_turn_audio_data(buffer, audio, sample_rate, num_channels):
await save_audio(audio, sample_rate, num_channels, "bot")
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await audiobuffer.start_recording()
await transport.capture_participant_transcription(participant["id"])
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
print(f"Participant left: {participant}")
await task.cancel()
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,4 @@
DAILY_SAMPLE_ROOM_URL=https://yourdomain.daily.co/yourroom # (for joining the bot to the same room repeatedly for local dev)
DAILY_API_KEY=7df...
OPENAI_API_KEY=sk-PL...
ELEVENLABS_API_KEY=aeb...

View File

@@ -0,0 +1,5 @@
aiofiles
python-dotenv
fastapi[all]
uvicorn
pipecat-ai[daily,openai,silero,elevenlabs]

View File

@@ -0,0 +1,55 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import os
import aiohttp
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
async def configure(aiohttp_session: aiohttp.ClientSession):
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
parser.add_argument(
"-u", "--url", type=str, required=False, help="URL of the Daily room to join"
)
parser.add_argument(
"-k",
"--apikey",
type=str,
required=False,
help="Daily API Key (needed to create an owner token for the room)",
)
args, unknown = parser.parse_known_args()
url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL")
key = args.apikey or os.getenv("DAILY_API_KEY")
if not url:
raise Exception(
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL."
)
if not key:
raise Exception(
"No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers."
)
daily_rest_helper = DailyRESTHelper(
daily_api_key=key,
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
aiohttp_session=aiohttp_session,
)
# Create a meeting token for the given room with an expiration 1 hour in
# the future.
expiry_time: float = 60 * 60
token = await daily_rest_helper.get_token(url, expiry_time)
return (url, token)

View File

@@ -0,0 +1,139 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import os
import subprocess
from contextlib import asynccontextmanager
import aiohttp
from dotenv import load_dotenv
from fastapi import FastAPI, HTTPException, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, RedirectResponse
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomParams
MAX_BOTS_PER_ROOM = 1
# Bot sub-process dict for status reporting and concurrency control
bot_procs = {}
daily_helpers = {}
load_dotenv(override=True)
def cleanup():
# Clean up function, just to be extra safe
for entry in bot_procs.values():
proc = entry[0]
proc.terminate()
proc.wait()
@asynccontextmanager
async def lifespan(app: FastAPI):
aiohttp_session = aiohttp.ClientSession()
daily_helpers["rest"] = DailyRESTHelper(
daily_api_key=os.getenv("DAILY_API_KEY", ""),
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
aiohttp_session=aiohttp_session,
)
yield
await aiohttp_session.close()
cleanup()
app = FastAPI(lifespan=lifespan)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/")
async def start_agent(request: Request):
print(f"!!! Creating room")
room = await daily_helpers["rest"].create_room(DailyRoomParams())
print(f"!!! Room URL: {room.url}")
# Ensure the room property is present
if not room.url:
raise HTTPException(
status_code=500,
detail="Missing 'room' property in request data. Cannot start agent without a target room!",
)
# Check if there is already an existing process running in this room
num_bots_in_room = sum(
1 for proc in bot_procs.values() if proc[1] == room.url and proc[0].poll() is None
)
if num_bots_in_room >= MAX_BOTS_PER_ROOM:
raise HTTPException(status_code=500, detail=f"Max bot limited reach for room: {room.url}")
# Get the token for the room
token = await daily_helpers["rest"].get_token(room.url)
if not token:
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room.url}")
# Spawn a new agent, and join the user session
# Note: this is mostly for demonstration purposes (refer to 'deployment' in README)
try:
proc = subprocess.Popen(
[f"python3 -m bot -u {room.url} -t {token}"],
shell=True,
bufsize=1,
cwd=os.path.dirname(os.path.abspath(__file__)),
)
bot_procs[proc.pid] = (proc, room.url)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to start subprocess: {e}")
return RedirectResponse(room.url)
@app.get("/status/{pid}")
def get_status(pid: int):
# Look up the subprocess
proc = bot_procs.get(pid)
# If the subprocess doesn't exist, return an error
if not proc:
raise HTTPException(status_code=404, detail=f"Bot with process id: {pid} not found")
# Check the status of the subprocess
if proc[0].poll() is None:
status = "running"
else:
status = "finished"
return JSONResponse({"bot_id": pid, "status": status})
if __name__ == "__main__":
import uvicorn
default_host = os.getenv("HOST", "0.0.0.0")
default_port = int(os.getenv("FAST_API_PORT", "7860"))
parser = argparse.ArgumentParser(description="Daily Storyteller FastAPI server")
parser.add_argument("--host", type=str, default=default_host, help="Host address")
parser.add_argument("--port", type=int, default=default_port, help="Port number")
parser.add_argument("--reload", action="store_true", help="Reload code on change")
config = parser.parse_args()
uvicorn.run(
"server:app",
host=config.host,
port=config.port,
reload=config.reload,
)

View File

@@ -0,0 +1,39 @@
# Daily Custom Tracks
This example shows how to send and receive Daily custom tracks. We will run a simple `daily-python` application to send an audio file with a custom track (named "pipecat") to a room. Then, the Pipecat bot will mirror that custom track into another custom track (named "pipecat-mirror") in the same room.
## Get started
```python
python3 -m venv venv
source venv/bin/activate
pip install -r requirements.txt
```
## Run the bot
Start the bot by giving it a Daily room URL.
```bash
python bot.py -u ROOM_URL
```
The bot will wait for the first participant to join. Then, it will mirror a custom track named "pipecat" into a new custom track named "pipecat-mirror".
## Run the sender
Now, run the custom track sender. This is a simple `daily-python` application that opens and audio file and sends it as a custom track to the same Daily room.
```bash
python custom_track_sender.py -u ROOM_URL -i office-ambience-mono-16000.mp3
```
## Open client
Finally, open the client so you can hear both custom tracks.
```bash
open index.html
```
Once the client is opened, copy the URL of the Daily room and join it. You should be able to select which custom track you want to hear.

View File

@@ -0,0 +1,89 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import sys
import aiohttp
from loguru import logger
from runner import configure
from pipecat.frames.frames import Frame, InputAudioRawFrame, OutputAudioRawFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.transports.services.daily import DailyParams, DailyTransport
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
class CustomTrackMirrorProcessor(FrameProcessor):
def __init__(self, transport_destination: str, **kwargs):
super().__init__(**kwargs)
self._transport_destination = transport_destination
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
if isinstance(frame, InputAudioRawFrame) and frame.transport_source:
output_frame = OutputAudioRawFrame(
audio=frame.audio,
sample_rate=frame.sample_rate,
num_channels=frame.num_channels,
)
output_frame.transport_destination = self._transport_destination
await self.push_frame(output_frame)
else:
await self.push_frame(frame, direction)
async def main():
async with aiohttp.ClientSession() as session:
(room_url, _) = await configure(session)
transport = DailyTransport(
room_url,
None,
"Custom tracks mirror",
DailyParams(
audio_in_enabled=True,
audio_out_enabled=True,
microphone_out_enabled=False, # Disable since we just use custom tracks
audio_out_destinations=["pipecat-mirror"],
),
)
pipeline = Pipeline(
[
transport.input(), # Transport user input
CustomTrackMirrorProcessor("pipecat-mirror"),
transport.output(), # Transport bot output
]
)
task = PipelineTask(
pipeline,
params=PipelineParams(
audio_in_sample_rate=16000,
audio_out_sample_rate=16000,
enable_metrics=True,
enable_usage_metrics=True,
),
)
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_audio(participant["id"], audio_source="pipecat")
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,74 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import time
from daily import CallClient, CustomAudioSource, Daily
from pydub import AudioSegment
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
parser.add_argument("-u", "--url", type=str, required=True, help="URL of the Daily room to join")
parser.add_argument(
"-i", "--input", type=str, required=True, help="Input audio file (needs 16000 sample rate)"
)
args, _ = parser.parse_known_args()
audio = AudioSegment.from_mp3(args.input)
raw_bytes = audio.raw_data
sample_rate = audio.frame_rate
channels = audio.channels
print(f"Length: {len(raw_bytes)} bytes")
print(f"Sample rate: {sample_rate}, Channels: {channels}")
# Initialize the Daily context & create call client
Daily.init()
client = CallClient()
# Join the room and indicate we have a custom track named "pipecat".
client.join(
args.url,
client_settings={
"publishing": {
"camera": False,
"microphone": False,
"customAudio": {"pipecat": True},
},
},
)
# Just sleep for a couple of seconds. To do this well we should really use
# completions.
time.sleep(2)
# Create the custom audio source. This is where we will write our audio.
audio_source = CustomAudioSource(sample_rate, channels)
# Create an audio track and assign it our audio source.
client.add_custom_audio_track("pipecat", audio_source)
# Just sleep for a second. To do this well we should really use completions.
time.sleep(1)
try:
# Just write one second of audio until we have read all the file.
chunk_size = sample_rate * channels * 2
while len(raw_bytes) > 0:
chunk = raw_bytes[:chunk_size]
raw_bytes = raw_bytes[chunk_size:]
audio_source.write_frames(chunk)
except KeyboardInterrupt:
client.leave()
# Just sleep for a second. To do this well we should really use completions.
time.sleep(1)
client.release()

View File

@@ -0,0 +1,173 @@
<html>
<head>
<title>daily custom tracks</title>
</head>
<script crossorigin src="https://unpkg.com/@daily-co/daily-js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/fomantic-ui/2.8.6/semantic.min.js"></script>
<link
rel="stylesheet"
type="text/css"
href="https://cdnjs.cloudflare.com/ajax/libs/fomantic-ui/2.8.6/semantic.min.css"
/>
<script>
function enableButton(buttonId, enable) {
const button = document.getElementById(buttonId);
button.disabled = !enable;
}
function enableJoinButton(enable) {
enableButton("join-button", enable);
}
function enableLeaveButton(enable) {
enableButton("leave-button", enable);
}
function destroyPlayers(query) {
const items = document.querySelectorAll(query);
if (items) {
for (const item of items) {
item.remove();
}
}
}
function destroyParticipantPlayers(participantId) {
destroyPlayers(`audio[data-participant-id="${participantId}"]`);
destroyPlayers(`button[data-participant-id="${participantId}"]`);
}
async function startPlayer(player, track) {
player.muted = false;
player.autoplay = true;
if (track != null) {
player.srcObject = new MediaStream([track]);
}
}
async function buildAudioPlayer(track, participantId) {
const audioContainer = document.getElementById("audio-container");
const player = document.createElement("audio");
player.dataset.participantId = participantId;
// Create a new button for controlling audio
const audioControlButton = document.createElement("button");
audioControlButton.className = "ui primary green button"
audioControlButton.innerText = track._mediaTag == "cam-audio" ? "english" : track._mediaTag;
audioControlButton.dataset.participantId = participantId;
audioControlButton.onclick = () => {
if (player.paused) {
player.play();
audioControlButton.className = "ui primary red button"
} else {
player.pause();
audioControlButton.className = "ui primary green button"
}
};
audioContainer.appendChild(player);
audioContainer.appendChild(audioControlButton);
await startPlayer(player, track);
player.pause()
return player;
}
function subscribeToTracks(participantId) {
console.log(`subscribing to track`);
if (participantId === "local") {
return;
}
callObject.updateParticipant(participantId, {
setSubscribedTracks: {
audio: true,
video: false,
custom: true,
},
});
}
function startDaily() {
enableJoinButton(true);
enableLeaveButton(false);
window.callObject = window.DailyIframe.createCallObject({});
callObject.on("participant-joined", (e) => {
if (!e.participant.local) {
console.log("participant-joined", e.participant);
subscribeToTracks(e.participant.session_id);
}
});
callObject.on("participant-left", (e) => {
console.log("participant-left", e.participant.session_id);
destroyParticipantPlayers(e.participant.session_id);
});
callObject.on("track-started", async (e) => {
console.log("track-started", e.track);
if (e.track.kind === "audio") {
await buildAudioPlayer(e.track, e.participant.session_id);
}
});
}
async function joinRoom() {
enableJoinButton(false);
enableLeaveButton(true);
const meetingUrl = document.getElementById("meeting-url").value;
callObject.join({
url: meetingUrl,
startVideoOff: true,
startAudioOff: true,
subscribeToTracksAutomatically: false,
receiveSettings: {
base: { video: { layer: 0 } },
},
});
}
async function leaveRoom() {
enableJoinButton(true);
enableLeaveButton(false);
callObject.leave();
const audioContainer = document.getElementById("audio-container");
audioContainer.replaceChildren();
}
</script>
<body onload="startDaily()">
<div class="ui centered page grid" style="margin-top: 30px">
<div class="ten wide column">
<div class="ui form" style="margin-top: 30px">
<div class="field">
<label>Meeting URL</label>
<input id="meeting-url" value="" />
</div>
</div>
</div>
</div>
<div class="ui centered aligned header" style="margin-top: 30px">
<button id="join-button" class="ui primary button" onclick="joinRoom()">
Join
</button>
<button id="leave-button" class="ui button" onclick="leaveRoom()">
Leave
</button>
</div>
<div id="tile" class="ui container" style="margin-top: 30px">
<div id="tile" class="ui center aligned grid">
<div id="audio-container"></div><br/>
</div>
</div>
</body>
</html>

View File

@@ -0,0 +1,2 @@
pydub
pipecat-ai[daily]

View File

@@ -0,0 +1,55 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import os
import aiohttp
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
async def configure(aiohttp_session: aiohttp.ClientSession):
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
parser.add_argument(
"-u", "--url", type=str, required=False, help="URL of the Daily room to join"
)
parser.add_argument(
"-k",
"--apikey",
type=str,
required=False,
help="Daily API Key (needed to create an owner token for the room)",
)
args, unknown = parser.parse_known_args()
url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL")
key = args.apikey or os.getenv("DAILY_API_KEY")
if not url:
raise Exception(
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL."
)
if not key:
raise Exception(
"No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers."
)
daily_rest_helper = DailyRESTHelper(
daily_api_key=key,
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
aiohttp_session=aiohttp_session,
)
# Create a meeting token for the given room with an expiration 1 hour in
# the future.
expiry_time: float = 60 * 60
token = await daily_rest_helper.get_token(url, expiry_time)
return (url, token)

View File

@@ -0,0 +1,15 @@
FROM python:3.10-bullseye
RUN mkdir /app
RUN mkdir /app/assets
RUN mkdir /app/utils
COPY *.py /app/
COPY requirements.txt /app/
WORKDIR /app
RUN pip3 install -r requirements.txt
EXPOSE 7860
CMD ["python3", "server.py"]

View File

@@ -0,0 +1,39 @@
# Daily Multi Translation
This example shows how to use Daily to stream multiple simultaneous translations using a single transport. Daily provides custom tracks and in this example we will simultaneously translate incoming audio in English to Spanish, French and German, each of them being sent to a custom track.
## Get started
```python
python3 -m venv venv
source venv/bin/activate
pip install -r requirements.txt
cp env.example .env # and add your credentials
```
## Run the server
```bash
python server.py
```
Then, visit `http://localhost:7860/` in your browser. This will open a Daily Prebuilt room where you will speak in English (make sure you are not muted).
## Open client
Next, you need to open the client that will listen to the translations.
```bash
open index.html
```
Once the client is opened, copy the URL of the Daily room created above and join it. You should be able to select which translation you want to hear.
## Build and test the Docker image
```
docker build -t daily-multi-translation .
docker run --env-file .env -p 7860:7860 daily-multi-translation
```

View File

@@ -0,0 +1,163 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import os
import sys
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.audio.mixers.soundfile_mixer import SoundfileMixer
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.observers.loggers.transcription_log_observer import TranscriptionLogObserver
from pipecat.pipeline.parallel_pipeline import ParallelPipeline
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.cartesia.tts import CartesiaTTSService
from pipecat.services.deepgram.stt import DeepgramSTTService
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
BACKGROUND_SOUND_FILE = "office-ambience-mono-16000.mp3"
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
"Multi translation bot",
DailyParams(
audio_in_enabled=True,
audio_out_enabled=True,
audio_out_mixer={
"spanish": SoundfileMixer(
sound_files={"office": BACKGROUND_SOUND_FILE}, default_sound="office"
),
"french": SoundfileMixer(
sound_files={"office": BACKGROUND_SOUND_FILE}, default_sound="office"
),
"german": SoundfileMixer(
sound_files={"office": BACKGROUND_SOUND_FILE}, default_sound="office"
),
},
audio_out_destinations=["spanish", "french", "german"],
microphone_out_enabled=False, # Disable since we just use custom tracks
vad_analyzer=SileroVADAnalyzer(),
),
)
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
tts_spanish = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_id="cefcb124-080b-4655-b31f-932f3ee743de",
transport_destination="spanish",
)
tts_french = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_id="8832a0b5-47b2-4751-bb22-6a8e2149303d",
transport_destination="french",
)
tts_german = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_id="38aabb6a-f52b-4fb0-a3d1-988518f4dc06",
transport_destination="german",
)
messages_spanish = [
{
"role": "system",
"content": "You will be provided with a sentence in English, and your task is to only translate it into Spanish.",
},
]
messages_french = [
{
"role": "system",
"content": "You will be provided with a sentence in English, and your task is to only translate it into French.",
},
]
messages_german = [
{
"role": "system",
"content": "You will be provided with a sentence in English, and your task is to only translate it into German.",
},
]
llm_spanish = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
llm_french = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
llm_german = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
context_spanish = OpenAILLMContext(messages_spanish)
context_aggregator_spanish = llm_spanish.create_context_aggregator(context_spanish)
context_french = OpenAILLMContext(messages_french)
context_aggregator_french = llm_french.create_context_aggregator(context_french)
context_german = OpenAILLMContext(messages_german)
context_aggregator_german = llm_german.create_context_aggregator(context_german)
pipeline = Pipeline(
[
transport.input(), # Transport user input
stt,
ParallelPipeline(
# Spanish pipeline.
[
context_aggregator_spanish.user(),
llm_spanish,
tts_spanish,
context_aggregator_spanish.assistant(),
],
# French pipeline.
[
context_aggregator_french.user(),
llm_french,
tts_french,
context_aggregator_french.assistant(),
],
# German pipeline.
[
context_aggregator_german.user(),
llm_german,
tts_german,
context_aggregator_german.assistant(),
],
),
transport.output(), # Transport bot output
]
)
task = PipelineTask(
pipeline,
params=PipelineParams(
audio_in_sample_rate=16000,
audio_out_sample_rate=16000,
enable_metrics=True,
enable_usage_metrics=True,
),
observers=[TranscriptionLogObserver()],
)
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,5 @@
DAILY_SAMPLE_ROOM_URL=https://yourdomain.daily.co/yourroom # (for joining the bot to the same room repeatedly for local dev)
DAILY_API_KEY=7df...
OPENAI_API_KEY=sk-PL...
DEEPGRAM_API_KEY=efb...
CARTESIA_API_KEY=aeb...

View File

@@ -0,0 +1,202 @@
<html>
<head>
<title>daily multi translation</title>
</head>
<script crossorigin src="https://unpkg.com/@daily-co/daily-js"></script>
<script
src="https://code.jquery.com/jquery-3.1.1.min.js"
integrity="sha256-hVVnYaiADRTO2PzUGmuLJr8BLUSjGIZsDYGmIJLv2b8="
crossorigin="anonymous"
></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/fomantic-ui/2.8.6/semantic.min.js"></script>
<link
rel="stylesheet"
type="text/css"
href="https://cdnjs.cloudflare.com/ajax/libs/fomantic-ui/2.8.6/semantic.min.css"
/>
<script>
function enableButton(buttonId, enable) {
const button = document.getElementById(buttonId);
button.disabled = !enable;
}
function enableJoinButton(enable) {
enableButton("join-button", enable);
}
function enableLeaveButton(enable) {
enableButton("leave-button", enable);
}
function destroyPlayers(query) {
const items = document.querySelectorAll(query);
if (items) {
for (const item of items) {
item.remove();
}
}
}
function destroyParticipantPlayers(participantId) {
destroyPlayers(`video[data-participant-id="${participantId}"]`);
destroyPlayers(`audio[data-participant-id="${participantId}"]`);
destroyPlayers(`button[data-participant-id="${participantId}"]`);
}
async function startPlayer(player, track) {
player.muted = false;
player.autoplay = true;
if (track != null) {
player.srcObject = new MediaStream([track]);
}
}
async function buildVideoPlayer(track, participantId) {
const videoContainer = document.getElementById("video-container");
const player = document.createElement("video");
player.dataset.participantId = participantId;
videoContainer.appendChild(player);
await startPlayer(player, track);
await player.play();
return player;
}
async function buildAudioPlayer(track, participantId) {
const audioContainer = document.getElementById("audio-container");
const player = document.createElement("audio");
player.dataset.participantId = participantId;
// Create a new button for controlling audio
const audioControlButton = document.createElement("button");
audioControlButton.className = "ui primary green button"
audioControlButton.innerText = track._mediaTag == "cam-audio" ? "english" : track._mediaTag;
audioControlButton.dataset.participantId = participantId;
audioControlButton.onclick = () => {
if (player.paused) {
player.play();
audioControlButton.className = "ui primary red button"
} else {
player.pause();
audioControlButton.className = "ui primary green button"
}
};
audioContainer.appendChild(player);
audioContainer.appendChild(audioControlButton);
await startPlayer(player, track);
player.pause()
return player;
}
function subscribeToTracks(participantId) {
console.log(`subscribing to track`);
if (participantId === "local") {
return;
}
callObject.updateParticipant(participantId, {
setSubscribedTracks: {
audio: true,
video: true,
custom: true,
},
});
}
function startDaily() {
enableJoinButton(true);
enableLeaveButton(false);
window.callObject = window.DailyIframe.createCallObject({});
callObject.on("participant-joined", (e) => {
if (!e.participant.local) {
console.log("participant-joined", e.participant);
subscribeToTracks(e.participant.session_id);
}
});
callObject.on("participant-left", (e) => {
console.log("participant-left", e.participant.session_id);
destroyParticipantPlayers(e.participant.session_id);
});
callObject.on("track-started", async (e) => {
console.log("track-started", e.track);
if (e.track.kind === "video") {
await buildVideoPlayer(e.track, e.participant.session_id);
} else if (e.track.kind === "audio") {
await buildAudioPlayer(e.track, e.participant.session_id);
}
});
}
async function joinRoom() {
enableJoinButton(false);
enableLeaveButton(true);
const meetingUrl = document.getElementById("meeting-url").value;
callObject.join({
url: meetingUrl,
startVideoOff: true,
startAudioOff: true,
subscribeToTracksAutomatically: false,
receiveSettings: {
base: { video: { layer: 0 } },
},
});
}
async function leaveRoom() {
enableJoinButton(true);
enableLeaveButton(false);
callObject.leave();
const videoContainer = document.getElementById("video-container");
videoContainer.replaceChildren();
const audioContainer = document.getElementById("audio-container");
audioContainer.replaceChildren();
}
</script>
<body onload="startDaily()">
<div class="ui centered page grid" style="margin-top: 30px">
<div class="ten wide column">
<div class="ui form" style="margin-top: 30px">
<div class="field">
<label>Meeting URL</label>
<input id="meeting-url" value="" />
</div>
</div>
</div>
</div>
<div class="ui centered aligned header" style="margin-top: 30px">
<button id="join-button" class="ui primary button" onclick="joinRoom()">
Join
</button>
<button id="leave-button" class="ui button" onclick="leaveRoom()">
Leave
</button>
</div>
<div id="tile" class="ui container" style="margin-top: 30px">
<div id="tile" class="ui center aligned grid">
<div id="audio-container"></div><br/>
</div>
</div>
<div id="tile" class="ui container" style="margin-top: 30px">
<div id="tile" class="ui center aligned grid">
<div id="video-container" class="ui segment"></div>
</div>
</div>
</body>
</html>

View File

@@ -0,0 +1,5 @@
aiofiles
python-dotenv
fastapi[all]
uvicorn
pipecat-ai[daily,deepgram,openai,silero,cartesia,soundfile]

View File

@@ -0,0 +1,55 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import os
import aiohttp
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
async def configure(aiohttp_session: aiohttp.ClientSession):
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
parser.add_argument(
"-u", "--url", type=str, required=False, help="URL of the Daily room to join"
)
parser.add_argument(
"-k",
"--apikey",
type=str,
required=False,
help="Daily API Key (needed to create an owner token for the room)",
)
args, unknown = parser.parse_known_args()
url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL")
key = args.apikey or os.getenv("DAILY_API_KEY")
if not url:
raise Exception(
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL."
)
if not key:
raise Exception(
"No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers."
)
daily_rest_helper = DailyRESTHelper(
daily_api_key=key,
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
aiohttp_session=aiohttp_session,
)
# Create a meeting token for the given room with an expiration 1 hour in
# the future.
expiry_time: float = 60 * 60
token = await daily_rest_helper.get_token(url, expiry_time)
return (url, token)

View File

@@ -0,0 +1,139 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import os
import subprocess
from contextlib import asynccontextmanager
import aiohttp
from dotenv import load_dotenv
from fastapi import FastAPI, HTTPException, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, RedirectResponse
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomParams
MAX_BOTS_PER_ROOM = 1
# Bot sub-process dict for status reporting and concurrency control
bot_procs = {}
daily_helpers = {}
load_dotenv(override=True)
def cleanup():
# Clean up function, just to be extra safe
for entry in bot_procs.values():
proc = entry[0]
proc.terminate()
proc.wait()
@asynccontextmanager
async def lifespan(app: FastAPI):
aiohttp_session = aiohttp.ClientSession()
daily_helpers["rest"] = DailyRESTHelper(
daily_api_key=os.getenv("DAILY_API_KEY", ""),
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
aiohttp_session=aiohttp_session,
)
yield
await aiohttp_session.close()
cleanup()
app = FastAPI(lifespan=lifespan)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/")
async def start_agent(request: Request):
print(f"!!! Creating room")
room = await daily_helpers["rest"].create_room(DailyRoomParams())
print(f"!!! Room URL: {room.url}")
# Ensure the room property is present
if not room.url:
raise HTTPException(
status_code=500,
detail="Missing 'room' property in request data. Cannot start agent without a target room!",
)
# Check if there is already an existing process running in this room
num_bots_in_room = sum(
1 for proc in bot_procs.values() if proc[1] == room.url and proc[0].poll() is None
)
if num_bots_in_room >= MAX_BOTS_PER_ROOM:
raise HTTPException(status_code=500, detail=f"Max bot limited reach for room: {room.url}")
# Get the token for the room
token = await daily_helpers["rest"].get_token(room.url)
if not token:
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room.url}")
# Spawn a new agent, and join the user session
# Note: this is mostly for demonstration purposes (refer to 'deployment' in README)
try:
proc = subprocess.Popen(
[f"python3 -m bot -u {room.url} -t {token}"],
shell=True,
bufsize=1,
cwd=os.path.dirname(os.path.abspath(__file__)),
)
bot_procs[proc.pid] = (proc, room.url)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to start subprocess: {e}")
return RedirectResponse(room.url)
@app.get("/status/{pid}")
def get_status(pid: int):
# Look up the subprocess
proc = bot_procs.get(pid)
# If the subprocess doesn't exist, return an error
if not proc:
raise HTTPException(status_code=404, detail=f"Bot with process id: {pid} not found")
# Check the status of the subprocess
if proc[0].poll() is None:
status = "running"
else:
status = "finished"
return JSONResponse({"bot_id": pid, "status": status})
if __name__ == "__main__":
import uvicorn
default_host = os.getenv("HOST", "0.0.0.0")
default_port = int(os.getenv("FAST_API_PORT", "7860"))
parser = argparse.ArgumentParser(description="Daily Storyteller FastAPI server")
parser.add_argument("--host", type=str, default=default_host, help="Host address")
parser.add_argument("--port", type=int, default=default_port, help="Port number")
parser.add_argument("--reload", action="store_true", help="Reload code on change")
config = parser.parse_args()
uvicorn.run(
"server:app",
host=config.host,
port=config.port,
reload=config.reload,
)

View File

@@ -0,0 +1,13 @@
FROM python:3.11-bullseye
# Open port 7860 for http service
ENV FAST_API_PORT=7860
EXPOSE 7860
# Install Python dependencies
COPY *.py .
COPY ./requirements.txt requirements.txt
RUN pip3 install --no-cache-dir --upgrade -r requirements.txt
# Start the FastAPI server
CMD python3 bot_runner.py --port ${FAST_API_PORT}

View File

@@ -0,0 +1,39 @@
# Fly.io deployment example
This project modifies the `bot_runner.py` server to launch a new machine for each user session. This is a recommended approach for production vs. running shell processess as your deployment will quickly run out of system resources under load.
For this example, we are using Daily as a WebRTC transport and provisioning a new room and token for each session. You can use another transport, such as WebSockets, by modifying the `bot.py` and `bot_runner.py` files accordingly.
## Setting up your fly.io deployment
### Create your fly.toml file
You can copy the `example-fly.toml` as a reference. Be sure to change the app name to something unique.
### Create your .env file
Copy the base `env.example` to `.env` and enter the necessary API keys.
`FLY_APP_NAME` should match that in the `fly.toml` file.
### Launch a new fly.io project
`fly launch` or `fly launch --org your-org-name`
### Set the necessary app secrets from your .env
Note: you can do this manually via the fly.io dashboard under the "secrets" sub-section of your deployment (e.g. "https://fly.io/apps/fly-app-name/secrets") or run the following terminal command:
`cat .env | tr '\n' ' ' | xargs flyctl secrets set`
### Deploy your machine
`fly deploy`
## Connecting to your bot
Send a post request to your running fly.io instance:
`curl --location --request POST 'https://YOUR_FLY_APP_NAME/'`
This request will wait until the machine enters into a `starting` state, before returning the a room URL and token to join.

View File

@@ -0,0 +1,113 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import asyncio
import os
import sys
from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
daily_api_key = os.getenv("DAILY_API_KEY", "")
daily_api_url = os.getenv("DAILY_API_URL", "https://api.daily.co/v1")
async def main(room_url: str, token: str):
transport = DailyTransport(
room_url,
token,
"Chatbot",
DailyParams(
api_url=daily_api_url,
api_key=daily_api_key,
audio_in_enabled=True,
audio_out_enabled=True,
video_out_enabled=False,
vad_analyzer=SileroVADAnalyzer(),
transcription_enabled=True,
),
)
tts = ElevenLabsTTSService(
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
)
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
messages = [
{
"role": "system",
"content": "You are Chatbot, a friendly, helpful robot. Your output will be converted to audio so don't include special characters other than '!' or '?' in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by saying hello.",
},
]
context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)
pipeline = Pipeline(
[
transport.input(),
context_aggregator.user(),
llm,
tts,
transport.output(),
context_aggregator.assistant(),
]
)
task = PipelineTask(
pipeline,
params=PipelineParams(
enable_metrics=True,
enable_usage_metrics=True,
),
)
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
@transport.event_handler("on_call_state_updated")
async def on_call_state_updated(transport, state):
if state == "left":
# Here we don't want to cancel, we just want to finish sending
# whatever is queued, so we use an EndFrame().
await task.queue_frame(EndFrame())
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Pipecat Bot")
parser.add_argument("-u", type=str, help="Room URL")
parser.add_argument("-t", type=str, help="Token")
config = parser.parse_args()
asyncio.run(main(config.u, config.t))

View File

@@ -0,0 +1,209 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import os
import subprocess
from contextlib import asynccontextmanager
import aiohttp
from dotenv import load_dotenv
from fastapi import FastAPI, HTTPException, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from pipecat.transports.services.helpers.daily_rest import (
DailyRESTHelper,
DailyRoomObject,
DailyRoomParams,
DailyRoomProperties,
)
load_dotenv(override=True)
# ------------ Configuration ------------ #
MAX_SESSION_TIME = 5 * 60 # 5 minutes
REQUIRED_ENV_VARS = [
"DAILY_API_KEY",
"OPENAI_API_KEY",
"ELEVENLABS_API_KEY",
"ELEVENLABS_VOICE_ID",
"FLY_API_KEY",
"FLY_APP_NAME",
]
FLY_API_HOST = os.getenv("FLY_API_HOST", "https://api.machines.dev/v1")
FLY_APP_NAME = os.getenv("FLY_APP_NAME", "pipecat-fly-example")
FLY_API_KEY = os.getenv("FLY_API_KEY", "")
FLY_HEADERS = {"Authorization": f"Bearer {FLY_API_KEY}", "Content-Type": "application/json"}
daily_helpers = {}
# ----------------- API ----------------- #
@asynccontextmanager
async def lifespan(app: FastAPI):
aiohttp_session = aiohttp.ClientSession()
daily_helpers["rest"] = DailyRESTHelper(
daily_api_key=os.getenv("DAILY_API_KEY", ""),
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
aiohttp_session=aiohttp_session,
)
yield
await aiohttp_session.close()
app = FastAPI(lifespan=lifespan)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# ----------------- Main ----------------- #
async def spawn_fly_machine(room_url: str, token: str):
async with aiohttp.ClientSession() as session:
# Use the same image as the bot runner
async with session.get(
f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines", headers=FLY_HEADERS
) as r:
if r.status != 200:
text = await r.text()
raise Exception(f"Unable to get machine info from Fly: {text}")
data = await r.json()
image = data[0]["config"]["image"]
# Machine configuration
cmd = f"python3 bot.py -u {room_url} -t {token}"
cmd = cmd.split()
worker_props = {
"config": {
"image": image,
"auto_destroy": True,
"init": {"cmd": cmd},
"restart": {"policy": "no"},
"guest": {"cpu_kind": "shared", "cpus": 1, "memory_mb": 1024},
},
}
# Spawn a new machine instance
async with session.post(
f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines", headers=FLY_HEADERS, json=worker_props
) as r:
if r.status != 200:
text = await r.text()
raise Exception(f"Problem starting a bot worker: {text}")
data = await r.json()
# Wait for the machine to enter the started state
vm_id = data["id"]
async with session.get(
f"{FLY_API_HOST}/apps/{FLY_APP_NAME}/machines/{vm_id}/wait?state=started",
headers=FLY_HEADERS,
) as r:
if r.status != 200:
text = await r.text()
raise Exception(f"Bot was unable to enter started state: {text}")
print(f"Machine joined room: {room_url}")
@app.post("/")
async def start_bot(request: Request) -> JSONResponse:
try:
data = await request.json()
# Is this a webhook creation request?
if "test" in data:
return JSONResponse({"test": True})
except Exception as e:
pass
# Use specified room URL, or create a new one if not specified
room_url = os.getenv("DAILY_SAMPLE_ROOM_URL", "")
if not room_url:
params = DailyRoomParams(properties=DailyRoomProperties())
try:
room: DailyRoomObject = await daily_helpers["rest"].create_room(params=params)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Unable to provision room {e}")
else:
# Check passed room URL exists, we should assume that it already has a sip set up
try:
room: DailyRoomObject = await daily_helpers["rest"].get_room_from_url(room_url)
except Exception:
raise HTTPException(status_code=500, detail=f"Room not found: {room_url}")
# Give the agent a token to join the session
token = await daily_helpers["rest"].get_token(room.url, MAX_SESSION_TIME)
if not room or not token:
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room_url}")
# Launch a new fly.io machine, or run as a shell process (not recommended)
run_as_process = os.getenv("RUN_AS_PROCESS", False)
if run_as_process:
try:
subprocess.Popen(
[f"python3 -m bot -u {room.url} -t {token}"],
shell=True,
bufsize=1,
cwd=os.path.dirname(os.path.abspath(__file__)),
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to start subprocess: {e}")
else:
try:
await spawn_fly_machine(room.url, token)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to spawn VM: {e}")
# Grab a token for the user to join with
user_token = await daily_helpers["rest"].get_token(room.url, MAX_SESSION_TIME)
return JSONResponse(
{
"room_url": room.url,
"token": user_token,
}
)
if __name__ == "__main__":
# Check environment variables
for env_var in REQUIRED_ENV_VARS:
if env_var not in os.environ:
raise Exception(f"Missing environment variable: {env_var}.")
parser = argparse.ArgumentParser(description="Pipecat Bot Runner")
parser.add_argument(
"--host", type=str, default=os.getenv("HOST", "0.0.0.0"), help="Host address"
)
parser.add_argument("--port", type=int, default=os.getenv("PORT", 7860), help="Port number")
parser.add_argument(
"--reload", action="store_true", default=False, help="Reload code on change"
)
config = parser.parse_args()
try:
import uvicorn
uvicorn.run("bot_runner:app", host=config.host, port=config.port, reload=config.reload)
except KeyboardInterrupt:
print("Pipecat runner shutting down...")

View File

@@ -0,0 +1,8 @@
DAILY_API_KEY=
DAILY_SAMPLE_ROOM_URL= # Enter a Daily room URL to use a set room URL each time (useful for local testing)
OPENAI_API_KEY=
ELEVENLABS_API_KEY=
ELEVENLABS_VOICE_ID=
FLY_API_KEY=
FLY_APP_NAME=
RUN_AS_PROCESS= # Spawn fly.io machine for each session or run as local process

View File

@@ -0,0 +1,25 @@
# fly.toml app configuration file generated for pipecat-fly-example on 2024-07-01T15:04:53+01:00
#
# See https://fly.io/docs/reference/configuration/ for information about how to use this file.
#
app = 'pipecat-fly-example'
primary_region = 'sjc'
[build]
[env]
FLY_APP_NAME = 'pipecat-fly-example'
[http_service]
internal_port = 7860
force_https = true
auto_stop_machines = true
auto_start_machines = true
min_machines_running = 0
processes = ['app']
[[vm]]
memory = 512
cpu_kind = 'shared'
cpus = 1

View File

@@ -0,0 +1,5 @@
pipecat-ai[daily,openai,silero]
fastapi
uvicorn
python-dotenv
loguru

View File

@@ -0,0 +1,94 @@
# Modal clone
modal-examples
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
build/
dist/
*.egg-info/
*.egg
.installed.cfg
.eggs/
downloads/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
MANIFEST
# Virtual Environments
venv/
env/
.env
.venv/
ENV/
env.bak/
venv.bak/
# IDE
.idea/
.vscode/
.spyderproject
.spyproject
.ropeproject
# Testing and Coverage
.coverage
.coverage.*
htmlcov/
.pytest_cache/
.tox/
.nox/
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
cover/
# Logs and Databases
*.log
*.db
db.sqlite3
db.sqlite3-journal
pip-log.txt
# System Files
.DS_Store
Thumbs.db
desktop.ini
*.swp
*.swo
*.bak
*.tmp
*~
# Build and Documentation
docs/_build/
.pybuilder/
target/
instance/
.webassets-cache
.pdm.toml
.pdm-python
.pdm-build/
__pypackages__/
# Other
*.mo
*.pot
*.sage.py
.mypy_cache/
.dmypy.json
dmypy.json
.pyre/
.pytype/
cython_debug/
.ipynb_checkpoints

View File

@@ -0,0 +1,91 @@
# Deploying Pipecat to Modal.com
Deployment example for [modal.com](https://www.modal.com). This example demonstrates how to deploy a FastAPI webapp to Modal with an RTVI compatible `/connect` endpoint that launches a Pipecat pipeline in a separate Modal container and returns a room/token for the client to join. This example also supports providing a parameter to the `/connect` endpoint for specifying which Pipecat pipeline to launch; openai, gemini, or vllm. The vllm pipeline points to a self-hosted OpenAI compatible LLM, using a llama model (neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16), deployed to Modal.
![](diagram.jpg)
# Running this Example
## Install the Modal CLI
Setup a Modal account and install it on your machine if you have not already, following their easy 3 steps in their [Getting Started Guide](https://modal.com/docs/guide#getting-started)
## Deploy a self-serve LLM
1. Deploy Modal's OpenAI-compatible LLM service:
```bash
git clone https://github.com/modal-labs/modal-examples
cd modal-examples
modal deploy 06_gpu_and_ml/llm-serving/vllm_inference.py
```
Refer to Modal's guide and example for [Deploying an OpenAI-compatible LLM service with vLLM](https://modal.com/docs/examples/vllm_inference) for more details.
2. Take note of the endpoint URL from the previous step, which will look like:
```
https://{your-workspace}--example-vllm-openai-compatible-serve.modal.run
```
You'll need this for the `bot_vllm.py` file in the next section.
**Note:** The default Modal LLM example uses Llama-3.1 and will shut down after 15 minutes of inactivity. Cold starts take 5-10 minutes. To prepare the service, we recommend visiting the `/docs` endpoint (`https://<Modal workspace>--example-vllm-openai-compatible-serve.modal.run/docs`) for your deployed LLM and wait for it to fully load before connecting your client.
## Deploy FastAPI App and Pipecat pipeline to Modal
1. Setup environment variables
```bash
cd server
cp env.example .env
# Modify .env to provide your service API Keys
```
Alternatively, you can configure your Modal app to use [secrets](https://modal.com/docs/guide/secrets)
2. Update the `modal_url` in `server/src/bot_vllm.py` to point to the url produced from the self-serve llm deploy, mentioned above.
3. From within the `server` directory, test the app locally:
```bash
modal serve app.py
```
4. Deploy to production
```bash
modal deploy app.py
```
5. Note the endpoint URL produced from this deployment. It will look like:
```bash
https://{your-workspace}--pipecat-modal-fastapi-app.modal.run
```
You'll need this URL for the client's `app.js` configuration mentioned in its README.
## Launch your bots on Modal
### Option 1: Direct Link
Simply click on the url displayed after running the server or deploy step to launch an agent and be redirected to a Daily room to talk with the launched bot. This will use the OpenAI pipeline.
### Option 2: Connect via an RTVI Client
Follow the instructions provided in the [client folder's README](client/javascript/README.md) for building and running a custom client that connects to your Modal endpoint. The provided client provides a dropdown for choosing which bot pipeline to run.
# Navigating your llm, server, and Pipecat logs
In your [Modal dashboard](https://modal.com/apps), you should have two Apps listed under Live Apps:
1. `example-vllm-openai-compatible`: This App contains the containers and logs used to run your self-hosted LLM. There will be just one App Function listed: `serve`. Click on this function to view logs for your LLM.
2. `pipecat-modal`: This App contains the containers and logs used to run your `connect` endpoints and Pipecat pipelines. It will list two App Functions:
1. `fastapi_app`: This function is running the endpoints that your client will interact with and initiate starting a new pipeline (`/`, `/connect`, `/status`). Click on this function to see logs for each endpoint hit.
2. `bot_runner`: This function handles launching and running a bot pipeline. Click on this function to get a list of all pipeline runs and access each run's logs.
# Modal + Pipecat Tips
- In most other Pipecat examples, we use `Popen` to launch the pipeline process from the `/connect` endpoint. In this example, we use a Modal function instead. This allows us to run the pipelines using a separately defined Modal image as well as run each pipeline in an isolated container.
- For the FastAPI and most common Pipecat Pipeline containers, a default `debian_slim` CPU-only should be all that's required to run. GPU containers are needed for self-hosted services.
- To minimize cold starts of the pipeline and reduce latency for users, set `min_containers=1` on the Modal Function that launches the pipeline to ensure at least one warm instance of your function is always available.
- For next steps on running a self-hosted llm and reducing latency, check out all of [Modal's LLM examples](https://modal.com/docs/examples/vllm_inference).

View File

@@ -0,0 +1 @@
node_modules

View File

@@ -0,0 +1,29 @@
# JavaScript Implementation
Basic implementation using the [Pipecat JavaScript SDK](https://docs.pipecat.ai/client/js/introduction).
## Setup
1. Deploy the Modal server. See the main [README](../../README).
2. Navigate to the `client/javascript` directory:
```bash
cd client/javascript
```
3. Modify the baseUrl in src/app.js to point to your deployed Modal endpoint
4. Install dependencies:
```bash
npm install
```
5. Run the client app:
```
npm run dev
```
6. Visit http://localhost:5173 in your browser.

View File

@@ -0,0 +1,49 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>AI Chatbot</title>
</head>
<body>
<div class="container">
<div class="status-bar">
<div class="status">
Status: <span id="connection-status">Disconnected</span>
</div>
<div class="controls">
<select id="bot-selector">
<option value="openai">OpenAI</option>
<option value="gemini">Gemini</option>
<option value="vllm">Llama</option>
</select>
<button id="connect-btn">Connect</button>
<button id="disconnect-btn" disabled>Disconnect</button>
</div>
</div>
<div class="main-content">
<div class="bot-container">
<div id="bot-video-container"></div>
<audio id="bot-audio" autoplay></audio>
</div>
</div>
<div class="device-bar">
<div class="device-controls">
<select id="device-selector"></select>
<button id="mic-toggle-btn">Mute Mic</button>
</div>
</div>
<div class="debug-panel">
<h3>Debug Info</h3>
<div id="debug-log"></div>
</div>
</div>
<script type="module" src="/src/app.js"></script>
<link rel="stylesheet" href="/src/style.css" />
</body>
</html>

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,21 @@
{
"name": "client",
"version": "1.0.0",
"main": "index.js",
"scripts": {
"dev": "vite",
"build": "vite build",
"preview": "vite preview"
},
"keywords": [],
"author": "",
"license": "ISC",
"description": "",
"devDependencies": {
"vite": "^6.3.5"
},
"dependencies": {
"@pipecat-ai/client-js": "^1.0.0",
"@pipecat-ai/daily-transport": "^1.0.0"
}
}

View File

@@ -0,0 +1,376 @@
/**
* Copyright (c) 20242025, Daily
*
* SPDX-License-Identifier: BSD 2-Clause License
*/
/**
* Pipecat Client Implementation
*
* This client connects to an RTVI-compatible bot server using WebRTC (via Daily).
* It handles audio/video streaming and manages the connection lifecycle.
*
* Requirements:
* - A running RTVI bot server (defaults to http://localhost:7860)
* - The server must implement the /connect endpoint that returns Daily.co room credentials
* - Browser with WebRTC support
*/
import { PipecatClient, RTVIEvent } from '@pipecat-ai/client-js';
import { DailyTransport } from '@pipecat-ai/daily-transport';
/**
* ChatbotClient handles the connection and media management for a real-time
* voice and video interaction with an AI bot.
*/
class ChatbotClient {
constructor() {
// Initialize client state
this.pcClient = null;
this.setupDOMElements();
this.initializeClientAndTransport();
this.setupEventListeners();
}
/**
* Set up references to DOM elements and create necessary media elements
*/
setupDOMElements() {
// Get references to UI control elements
this.connectBtn = document.getElementById('connect-btn');
this.disconnectBtn = document.getElementById('disconnect-btn');
this.statusSpan = document.getElementById('connection-status');
this.debugLog = document.getElementById('debug-log');
this.botVideoContainer = document.getElementById('bot-video-container');
this.deviceSelector = document.getElementById('device-selector');
// Create an audio element for bot's voice output
this.botAudio = document.createElement('audio');
this.botAudio.autoplay = true;
this.botAudio.playsInline = true;
document.body.appendChild(this.botAudio);
}
/**
* Set up event listeners for connect/disconnect buttons
*/
setupEventListeners() {
this.connectBtn.addEventListener('click', () => this.connect());
this.disconnectBtn.addEventListener('click', () => this.disconnect());
// Populate device selector
this.pcClient.getAllMics().then((mics) => {
console.log('Available mics:', mics);
mics.forEach((device) => {
const option = document.createElement('option');
option.value = device.deviceId;
option.textContent = device.label || `Microphone ${device.deviceId}`;
this.deviceSelector.appendChild(option);
});
});
this.deviceSelector.addEventListener('change', (event) => {
const selectedDeviceId = event.target.value;
console.log('Selected device ID:', selectedDeviceId);
this.pcClient.updateMic(selectedDeviceId);
});
// Handle mic mute/unmute toggle
const micToggleBtn = document.getElementById('mic-toggle-btn');
micToggleBtn.addEventListener('click', () => {
let micEnabled = this.pcClient.isMicEnabled;
micToggleBtn.textContent = micEnabled ? 'Unmute Mic' : 'Mute Mic';
this.pcClient.enableMic(!micEnabled);
// Add logic to mute/unmute the mic
if (micEnabled) {
console.log('Mic muted');
// Add code to mute the mic
} else {
console.log('Mic unmuted');
// Add code to unmute the mic
}
});
}
/**
* Set up the Pipecat client and Daily transport
*/
async initializeClientAndTransport() {
// Initialize the Pipecat client with a DailyTransport and our configuration
this.pcClient = new PipecatClient({
transport: new DailyTransport(),
enableMic: true, // Enable microphone for user input
enableCam: false,
callbacks: {
// Handle connection state changes
onConnected: () => {
this.updateStatus('Connected');
this.connectBtn.disabled = true;
this.disconnectBtn.disabled = false;
this.log('Client connected');
},
onDisconnected: () => {
this.updateStatus('Disconnected');
this.connectBtn.disabled = false;
this.disconnectBtn.disabled = true;
this.log('Client disconnected');
},
// Handle transport state changes
onTransportStateChanged: (state) => {
this.updateStatus(`Transport: ${state}`);
this.log(`Transport state changed: ${state}`);
if (state === 'connecting') {
window.startTime = Date.now();
}
if (state === 'ready') {
this.setupMediaTracks();
console.warn('TIME TO BOT READY:', Date.now() - window.startTime);
}
},
// Handle bot connection events
onBotConnected: (participant) => {
this.log(`Bot connected: ${JSON.stringify(participant)}`);
},
onBotDisconnected: (participant) => {
this.log(`Bot disconnected: ${JSON.stringify(participant)}`);
},
onBotReady: (data) => {
this.log(`Bot ready: ${JSON.stringify(data)}`);
this.setupMediaTracks();
},
// Transcript events
onUserTranscript: (data) => {
// Only log final transcripts
if (data.final) {
this.log(`User: ${data.text}`);
}
},
onBotTranscript: (data) => {
this.log(`Bot: ${data.text}`);
},
// Error handling
onMessageError: (error) => {
console.log('Message error:', error);
},
onMicUpdated: (data) => {
console.log('Mic updated:', data);
this.deviceSelector.value = data.deviceId;
},
onError: (error) => {
console.log('Error:', JSON.stringify(error));
},
},
});
// Set up listeners for media track events
this.setupTrackListeners();
await this.pcClient.initDevices();
window.client = this.pcClient;
}
/**
* Add a timestamped message to the debug log
*/
log(message) {
const entry = document.createElement('div');
entry.textContent = `${new Date().toISOString()} - ${message}`;
// Add styling based on message type
if (message.startsWith('User: ')) {
entry.style.color = '#2196F3'; // blue for user
} else if (message.startsWith('Bot: ')) {
entry.style.color = '#4CAF50'; // green for bot
}
this.debugLog.appendChild(entry);
this.debugLog.scrollTop = this.debugLog.scrollHeight;
console.log(message);
}
/**
* Update the connection status display
*/
updateStatus(status) {
this.statusSpan.textContent = status;
this.log(`Status: ${status}`);
}
/**
* Check for available media tracks and set them up if present
* This is called when the bot is ready or when the transport state changes to ready
*/
setupMediaTracks() {
if (!this.pcClient) return;
// Get current tracks from the client
const tracks = this.pcClient.tracks();
// Set up any available bot tracks
if (tracks.bot?.audio) {
this.setupAudioTrack(tracks.bot.audio);
}
if (tracks.bot?.video) {
this.setupVideoTrack(tracks.bot.video);
}
}
/**
* Set up listeners for track events (start/stop)
* This handles new tracks being added during the session
*/
setupTrackListeners() {
if (!this.pcClient) return;
// Listen for new tracks starting
this.pcClient.on(RTVIEvent.TrackStarted, (track, participant) => {
// Only handle non-local (bot) tracks
if (!participant?.local) {
if (track.kind === 'audio') {
this.setupAudioTrack(track);
} else if (track.kind === 'video') {
this.setupVideoTrack(track);
}
this.log(
`Track started event: ${track.kind} from ${
participant?.name || 'unknown'
}`
);
} else {
this.log('Local mic unmuted');
}
});
// Listen for tracks stopping
this.pcClient.on(RTVIEvent.TrackStopped, (track, participant) => {
if (participant.local) {
this.log('Local mic muted');
return;
}
this.log(
`Track stopped event: ${track.kind} from ${
participant?.name || 'unknown'
}`
);
});
}
/**
* Set up an audio track for playback
* Handles both initial setup and track updates
*/
setupAudioTrack(track) {
this.log('Setting up audio track');
// Check if we're already playing this track
if (this.botAudio.srcObject) {
const oldTrack = this.botAudio.srcObject.getAudioTracks()[0];
if (oldTrack?.id === track.id) return;
}
// Create a new MediaStream with the track and set it as the audio source
this.botAudio.srcObject = new MediaStream([track]);
}
/**
* Set up a video track for display
* Handles both initial setup and track updates
*/
setupVideoTrack(track) {
this.log('Setting up video track');
const videoEl = document.createElement('video');
videoEl.autoplay = true;
videoEl.playsInline = true;
videoEl.muted = true;
videoEl.style.width = '100%';
videoEl.style.height = '100%';
videoEl.style.objectFit = 'cover';
// Check if we're already displaying this track
if (this.botVideoContainer.querySelector('video')?.srcObject) {
const oldTrack = this.botVideoContainer
.querySelector('video')
.srcObject.getVideoTracks()[0];
if (oldTrack?.id === track.id) return;
}
// Create a new MediaStream with the track and set it as the video source
videoEl.srcObject = new MediaStream([track]);
this.botVideoContainer.innerHTML = '';
this.botVideoContainer.appendChild(videoEl);
}
/**
* Initialize and connect to the bot
* This sets up the Pipecat client, initializes devices, and establishes the connection
*/
async connect() {
try {
const botSelector = document.getElementById('bot-selector');
const selectedBot = botSelector.value;
// Initialize audio/video devices
this.log('Initializing devices...');
await this.pcClient.initDevices();
// Connect to the bot
this.log(`Connecting to bot: ${selectedBot}`);
await this.pcClient.connect({
// REPLACE WITH YOUR MODAL URL ENDPOINT
endpoint:
'https://<your-workspace>--pipecat-modal-fastapi-app.modal.run/connect',
requestData: {
bot_name: selectedBot,
},
});
this.log('Connection complete');
} catch (error) {
// Handle any errors during connection
console.error('Connection error:', error);
this.log(`Error connecting: ${JSON.stringify(error.message)}`);
this.log(`Error stack: ${error.stack}`);
this.updateStatus('Error');
// Clean up if there's an error
if (this.pcClient) {
try {
await this.pcClient.disconnect();
} catch (disconnectError) {
this.log(`Error during disconnect: ${disconnectError.message}`);
}
}
}
}
/**
* Disconnect from the bot and clean up media resources
*/
async disconnect() {
if (this.pcClient) {
try {
// Disconnect the Pipecat client
await this.pcClient.disconnect();
// Clean up audio
if (this.botAudio.srcObject) {
this.botAudio.srcObject.getTracks().forEach((track) => track.stop());
this.botAudio.srcObject = null;
}
// Clean up video
if (this.botVideoContainer.querySelector('video')?.srcObject) {
const video = this.botVideoContainer.querySelector('video');
video.srcObject.getTracks().forEach((track) => track.stop());
video.srcObject = null;
}
this.botVideoContainer.innerHTML = '';
} catch (error) {
this.log(`Error disconnecting: ${error.message}`);
}
}
}
}
// Initialize the client when the page loads
window.addEventListener('DOMContentLoaded', () => {
new ChatbotClient();
});

View File

@@ -0,0 +1,135 @@
body {
margin: 0;
padding: 20px;
font-family: Arial, sans-serif;
background-color: #f0f0f0;
}
.container {
max-width: 1200px;
margin: 0 auto;
}
.status-bar,
.device-bar {
display: flex;
justify-content: space-between;
align-items: center;
padding: 10px;
background-color: #fff;
border-radius: 8px;
margin-bottom: 20px;
}
.controls,
.device-controls {
display: flex;
align-items: center;
gap: 10px; /* Adds spacing between elements */
}
.device-controls {
margin-left: auto;
}
.controls button,
.device-controls button {
padding: 8px 16px;
margin-left: 10px;
border: none;
border-radius: 4px;
cursor: pointer;
}
#bot-selector,
#device-selector {
padding: 8px 16px;
padding-right: 40px;
border: none;
border-radius: 4px;
background-color: #6c757d; /* Gray background */
color: white; /* White text */
cursor: pointer;
appearance: none; /* Removes default browser styling for dropdowns */
background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' fill='white'%3E%3Cpath d='M7 10l5 5 5-5z'/%3E%3C/svg%3E"); /* Custom arrow */
background-repeat: no-repeat;
background-position: right 8px center; /* Position the arrow */
}
#bot-selector:focus,
#device-selector:focus {
outline: none;
box-shadow: 0 0 4px rgba(0, 0, 0, 0.3); /* Add a subtle focus effect */
}
#connect-btn {
background-color: #4caf50;
color: white;
}
#disconnect-btn {
background-color: #f44336;
color: white;
}
#mic-toggle-btn {
}
button:disabled {
opacity: 0.5;
cursor: not-allowed;
}
.main-content {
background-color: #fff;
border-radius: 8px;
padding: 20px;
margin-bottom: 20px;
}
.bot-container {
display: flex;
flex-direction: column;
align-items: center;
}
#bot-video-container {
width: 640px;
height: 360px;
background-color: #e0e0e0;
border-radius: 8px;
margin: 20px auto;
overflow: hidden;
display: flex;
align-items: center;
justify-content: center;
}
#bot-video-container video {
width: 100%;
height: 100%;
object-fit: cover;
}
.debug-panel {
background-color: #fff;
border-radius: 8px;
padding: 20px;
}
.debug-panel h3 {
margin: 0 0 10px 0;
font-size: 16px;
font-weight: bold;
}
#debug-log {
height: 200px;
overflow-y: auto;
background-color: #f8f8f8;
padding: 10px;
border-radius: 4px;
font-family: monospace;
font-size: 12px;
line-height: 1.4;
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 114 KiB

View File

@@ -0,0 +1,307 @@
"""modal_example.
This module shows a simple example of how to deploy a bot using Modal and FastAPI.
It includes:
- FastAPI endpoints for starting agents and checking bot statuses.
- Dynamic loading of bot implementations.
- Use of a Daily transport for bot communication.
"""
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import importlib
import os
from contextlib import asynccontextmanager
from typing import Any, Dict, Literal
import aiohttp
import modal
from fastapi import APIRouter, FastAPI, HTTPException
from fastapi.responses import JSONResponse, RedirectResponse
from pydantic import BaseModel
# container specifications for the FastAPI web server
web_image = (
modal.Image.debian_slim(python_version="3.13")
.pip_install_from_requirements("requirements.txt")
.pip_install("pipecat-ai[daily]")
.add_local_dir("src", remote_path="/root/src")
)
# container specifications for the Pipecat pipeline
bot_image = (
modal.Image.debian_slim(python_version="3.13")
.apt_install("ffmpeg")
.pip_install_from_requirements("requirements.txt")
.pip_install("pipecat-ai[daily,elevenlabs,openai,silero,google]")
.add_local_dir("src", remote_path="/root/src")
)
app = modal.App("pipecat-modal", secrets=[modal.Secret.from_dotenv()])
router = APIRouter()
bot_jobs = {}
daily_helpers = {}
# Names of all supported bot implementations
# These correspond to the bot files in the src directory
BotName = Literal["openai", "gemini", "vllm"]
def cleanup():
"""Cleanup function to terminate all bot processes.
Called during server shutdown.
"""
for entry in bot_jobs.values():
func = modal.FunctionCall.from_id(entry[0])
if func:
func.cancel()
def get_bot_file(bot_name: BotName) -> str:
"""Retrieve the bot file name corresponding to the provided bot_name.
Args:
bot_name (BotName): The name of the bot (e.g., 'openai', 'gemini', 'vllm').
Returns:
str: The file name corresponding to the bot implementation.
Raises:
ValueError: If the bot name is invalid or not supported.
"""
# bot_implementation = os.getenv("BOT_IMPLEMENTATION", "openai").lower().strip()
bot_implementation = bot_name.lower().strip()
if not bot_implementation:
bot_implementation = "openai"
if bot_implementation not in ["openai", "gemini", "vllm"]:
raise ValueError(
f"Invalid BOT_IMPLEMENTATION: {bot_implementation}. Must be 'openai' or 'gemini' or 'vllm'"
)
return f"bot_{bot_implementation}"
def get_runner(path: str, bot_file: str) -> callable:
"""Dynamically import the run_bot function based on the bot name.
Args:
path (str): The path to the bot files (e.g., 'src').
bot_file (str): The file name of the bot implementation (e.g., 'openai', 'gemini', 'vllm').
Returns:
function: The run_bot function from the specified bot module.
Raises:
ImportError: If the specified bot module or run_bot function is not found.
"""
try:
# Dynamically construct the module name
module_name = f"{path}.{bot_file}"
# Import the module
module = importlib.import_module(module_name)
# Get the run_bot function from the module
return getattr(module, "run_bot")
except (ImportError, AttributeError) as e:
raise ImportError(f"Failed to import run_bot from {module_name}: {e}")
async def create_room_and_token() -> tuple[str, str]:
"""Create a Daily room and generate an authentication token.
This function checks for existing room URL and token in the environment variables.
If not found, it creates a new room using the Daily API and generates a token for it.
Returns:
tuple[str, str]: A tuple containing the room URL and the authentication token.
Raises:
HTTPException: If room creation or token generation fails.
"""
from pipecat.transports.services.helpers.daily_rest import DailyRoomParams
room_url = os.getenv("DAILY_SAMPLE_ROOM_URL", None)
token = os.getenv("DAILY_SAMPLE_ROOM_TOKEN", None)
if not room_url:
room = await daily_helpers["rest"].create_room(DailyRoomParams())
if not room.url:
raise HTTPException(status_code=500, detail="Failed to create room")
room_url = room.url
token = await daily_helpers["rest"].get_token(room_url)
if not token:
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room_url}")
return room_url, token
@app.function(image=bot_image, min_containers=1)
async def bot_runner(room_url, token, bot_name: BotName = "openai"):
"""Launch the provided bot process, providing the given room URL and token for the bot to join.
Args:
room_url (str): The URL of the Daily room where the bot and client will communicate.
token (str): The authentication token for the room.
bot_name (BotName): The name of the bot implementation to use. Defaults to "openai".
Raises:
HTTPException: If the bot pipeline fails to start.
"""
try:
path = "src"
bot_file = get_bot_file(bot_name)
run_bot = get_runner(path, bot_file)
print(f"Starting bot process: {bot_file} -u {room_url} -t {token}")
await run_bot(room_url, token)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to start bot pipeline: {e}")
@asynccontextmanager
async def lifespan(app: FastAPI):
"""FastAPI lifespan manager that handles startup and shutdown tasks.
- Creates aiohttp session
- Initializes Daily API helper
- Cleans up resources on shutdown
"""
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
aiohttp_session = aiohttp.ClientSession()
daily_helpers["rest"] = DailyRESTHelper(
daily_api_key=os.getenv("DAILY_API_KEY", ""),
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
aiohttp_session=aiohttp_session,
)
yield
await aiohttp_session.close()
cleanup()
class ConnectData(BaseModel):
"""Data provided by client to specify the bot pipeline.
Attributes:
bot_name (BotName): The name of the bot to connect to. Defaults to "openai".
"""
bot_name: BotName = "openai"
async def start(data: ConnectData):
"""Internal method to start a bot agent and return the room URL and token.
Args:
data (ConnectData): The data containing the bot name to use.
Returns:
tuple[str, str]: A tuple containing the room URL and token.
"""
room_url, token = await create_room_and_token()
launch_bot_func = modal.Function.from_name("pipecat-modal", "bot_runner")
function_id = launch_bot_func.spawn(room_url, token, data.bot_name)
bot_jobs[function_id] = (function_id, room_url)
return room_url, token
@router.get("/")
async def start_agent():
"""A user endpoint for launching a bot agent and redirecting to the created room URL.
This function retrieves the bot implementation from the environment,
starts the bot agent, and redirects the user to the room URL to
interact with the bot through a Daily Prebuilt Interface.
Returns:
RedirectResponse: A response that redirects to the room URL.
"""
bot_name = os.getenv("BOT_IMPLEMENTATION", "openai").lower().strip()
print(f"Starting bot: {bot_name}")
room_url, token = await start(ConnectData(bot_name=bot_name))
return RedirectResponse(room_url)
@router.post("/connect")
async def rtvi_connect(data: ConnectData) -> Dict[Any, Any]:
"""A user endpoint for launching a bot agent and retrieving the room/token credentials.
This function retrieves the bot implementation from the request, if provided,
starts the bot agent, and returns the room URL and token for the bot. This allows the
client to then connect to the bot using their own RTVI interface.
Args:
data (ConnectData): Optional. The data containing the bot name to use.
Returns:
Dict[Any, Any]: A dictionary containing the room URL and token.
"""
print(f"Starting bot: {data.bot_name}")
if data is None or not data.bot_name:
data.bot_name = os.getenv("BOT_IMPLEMENTATION", "openai").lower().strip()
room_url, token = await start(data)
return {"room_url": room_url, "token": token}
@router.get("/status/{fid}")
def get_status(fid: str):
"""Retrieve the status of a bot process by its function ID.
Args:
fid (str): The function ID of the bot process.
Returns:
JSONResponse: A JSON response containing the bot's status and result code.
Raises:
HTTPException: If the bot process with the given ID is not found.
"""
func = modal.FunctionCall.from_id(fid)
if not func:
raise HTTPException(status_code=404, detail=f"Bot with process id: {fid} not found")
try:
result = func.get(timeout=0)
return JSONResponse({"bot_id": fid, "status": "finished", "code": result})
except modal.exception.OutputExpiredError:
return JSONResponse({"bot_id": fid, "status": "finished", "code": 404})
except TimeoutError:
return JSONResponse({"bot_id": fid, "status": "running", "code": 202})
@app.function(image=web_image, min_containers=1)
@modal.concurrent(max_inputs=1)
@modal.asgi_app()
def fastapi_app():
"""Create and configure the FastAPI application.
This function initializes the FastAPI app with middleware, routes, and lifespan management.
It is decorated to be used as a Modal ASGI app.
"""
from fastapi.middleware.cors import CORSMiddleware
# Initialize FastAPI app
web_app = FastAPI(lifespan=lifespan)
web_app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Include the endpoints from this file
web_app.include_router(router)
return web_app

Some files were not shown because too many files have changed in this diff Show More