Compare commits

..

1 Commits

Author SHA1 Message Date
Aleix Conchillo Flaqué
41695806e8 process SystemFrames in a queue as high priority frames 2025-05-07 17:05:35 -07:00
1234 changed files with 98943 additions and 93416 deletions

48
.github/workflows/android.yaml vendored Normal file
View File

@@ -0,0 +1,48 @@
name: android
on:
push:
branches:
- main
paths:
- "examples/simple-chatbot/client/android/**"
pull_request:
branches:
- "**"
paths:
- "examples/simple-chatbot/client/android/**"
workflow_dispatch:
inputs:
sdk_git_ref:
type: string
description: "Which git ref of the app to build"
concurrency:
group: build-android-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
sdk:
name: "Simple chatbot demo"
runs-on: ubuntu-latest
steps:
- name: Checkout repo
uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.sdk_git_ref || github.ref }}
- name: "Install Java"
uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: '17'
- name: Build demo app
working-directory: examples/simple-chatbot/client/android
run: ./gradlew :simple-chatbot-client:assembleDebug
- name: Upload demo APK
uses: actions/upload-artifact@v4
with:
name: Simple Chatbot Android Client
path: examples/simple-chatbot/client/android/simple-chatbot-client/build/outputs/apk/debug/simple-chatbot-client-debug.apk

View File

@@ -21,20 +21,24 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
version: "latest"
- name: Set up Python
run: uv python install 3.10
- name: Install development dependencies
run: uv sync --group dev
id: setup_python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Setup virtual environment
run: |
python -m venv .venv
- name: Install basic Python dependencies
run: |
source .venv/bin/activate
python -m pip install --upgrade pip
pip install -r dev-requirements.txt
- name: Build project
run: uv build
- name: Install project in editable mode
run: uv pip install --editable .
run: |
source .venv/bin/activate
python -m build
- name: Install project and other Python dependencies
run: |
source .venv/bin/activate
pip install --editable .

View File

@@ -18,28 +18,35 @@ jobs:
steps:
- name: Checkout repo
uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
version: "latest"
- name: Set up Python
run: uv python install 3.12
id: setup_python
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Cache virtual environment
uses: actions/cache@v3
with:
# We are hashing dev-requirements.txt and test-requirements.txt which
# contain all dependencies needed to run the tests.
key: venv-${{ runner.os }}-${{ steps.setup_python.outputs.python-version}}-${{ hashFiles('dev-requirements.txt') }}-${{ hashFiles('test-requirements.txt') }}
path: .venv
- name: Install system packages
id: install_system_packages
run: |
sudo apt-get install -y portaudio19-dev
- name: Install dependencies
- name: Setup virtual environment
run: |
uv sync --group dev --extra anthropic --extra aws --extra google --extra langchain
python -m venv .venv
- name: Install basic Python dependencies
run: |
source .venv/bin/activate
python -m pip install --upgrade pip
pip install -r dev-requirements.txt -r test-requirements.txt
- name: Run tests with coverage
run: |
uv run coverage run
uv run coverage xml
source .venv/bin/activate
coverage run
coverage xml
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v5
with:

View File

@@ -17,27 +17,30 @@ concurrency:
jobs:
ruff-format:
name: "Code quality checks"
name: "Formatting checker"
runs-on: ubuntu-latest
steps:
- name: Checkout repo
uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
version: "latest"
- name: Set up Python
run: uv python install 3.10
- name: Install development dependencies
run: uv sync --group dev
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Setup virtual environment
run: |
python -m venv .venv
- name: Install development Python dependencies
run: |
source .venv/bin/activate
python -m pip install --upgrade pip
pip install -r dev-requirements.txt
- name: Ruff formatter
id: ruff-format
run: uv run ruff format --diff
- name: Ruff linter (all rules)
run: |
source .venv/bin/activate
ruff format --diff
- name: Ruff import linter
id: ruff-check
run: uv run ruff check
run: |
source .venv/bin/activate
ruff check --select I

View File

@@ -5,29 +5,35 @@ on:
inputs:
gitref:
type: string
description: 'what git tag to build (e.g. v0.0.74)'
description: "what git ref to build"
required: true
jobs:
build:
name: 'Build and upload wheels'
name: "Build and upload wheels"
runs-on: ubuntu-latest
steps:
- name: Checkout repo
uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.gitref }}
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
version: 'latest'
- name: Set up Python
run: uv python install 3.12
- name: Install development dependencies
run: uv sync --group dev
id: setup_python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Setup virtual environment
run: |
python -m venv .venv
- name: Install basic Python dependencies
run: |
source .venv/bin/activate
python -m pip install --upgrade pip
pip install -r dev-requirements.txt
- name: Build project
run: uv build
run: |
source .venv/bin/activate
python -m build
- name: Upload wheels
uses: actions/upload-artifact@v4
with:
@@ -35,9 +41,9 @@ jobs:
path: ./dist
publish-to-pypi:
name: 'Publish to PyPI'
name: "Publish to PyPI"
runs-on: ubuntu-latest
needs: [build]
needs: [ build ]
environment:
name: pypi
url: https://pypi.org/p/pipecat-ai
@@ -56,12 +62,12 @@ jobs:
print-hash: true
publish-to-test-pypi:
name: 'Publish to Test PyPI'
name: "Publish to Test PyPI"
runs-on: ubuntu-latest
needs: [build]
needs: [ build ]
environment:
name: testpypi
url: https://test.pypi.org/p/pipecat-ai
url: https://pypi.org/p/pipecat-ai
permissions:
id-token: write
steps:
@@ -70,7 +76,7 @@ jobs:
with:
name: wheels
path: ./dist
- name: Publish to Test PyPI
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
verbose: true

View File

@@ -4,7 +4,7 @@ on: workflow_dispatch
jobs:
build:
name: 'Build and upload wheels'
name: "Build and upload wheels"
runs-on: ubuntu-latest
steps:
- name: Checkout repo
@@ -12,16 +12,23 @@ jobs:
with:
fetch-tags: true
fetch-depth: 100
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
version: 'latest'
- name: Set up Python
run: uv python install 3.12
- name: Install development dependencies
run: uv sync --group dev
id: setup_python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Setup virtual environment
run: |
python -m venv .venv
- name: Install basic Python dependencies
run: |
source .venv/bin/activate
python -m pip install --upgrade pip
pip install -r dev-requirements.txt
- name: Build project
run: uv build
run: |
source .venv/bin/activate
python -m build
- name: Upload wheels
uses: actions/upload-artifact@v4
with:
@@ -29,12 +36,12 @@ jobs:
path: ./dist
publish-to-test-pypi:
name: 'Publish to Test PyPI'
name: "Publish to Test PyPI"
runs-on: ubuntu-latest
needs: [build]
needs: [ build ]
environment:
name: testpypi
url: https://test.pypi.org/p/pipecat-ai
url: https://pypi.org/p/pipecat-ai
permissions:
id-token: write
steps:
@@ -43,7 +50,7 @@ jobs:
with:
name: wheels
path: ./dist
- name: Publish to Test PyPI
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
verbose: true

View File

@@ -1,61 +0,0 @@
name: Python Compatibility Test
on:
push:
branches: [main, develop]
paths: ['pyproject.toml']
pull_request:
branches: [main, develop]
paths: ['pyproject.toml']
jobs:
test-compatibility:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ['3.10.18', '3.11.13', '3.12.11', '3.13.5']
name: Python ${{ matrix.python-version }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y \
portaudio19-dev \
libcairo2-dev \
libgirepository1.0-dev \
pkg-config
- name: Install uv
uses: astral-sh/setup-uv@v4
with:
version: 'latest'
- name: Set up Python ${{ matrix.python-version }}
run: |
uv python install ${{ matrix.python-version }}
uv python pin ${{ matrix.python-version }}
- name: Test uv sync with all extras (Python < 3.13)
if: "!startsWith(matrix.python-version, '3.13.')"
run: |
uv sync --group dev --all-extras --no-extra krisp
- name: Test uv sync without PyTorch extras (Python 3.13+)
if: startsWith(matrix.python-version, '3.13.')
run: |
uv sync --group dev --all-extras \
--no-extra krisp \
--no-extra ultravox \
--no-extra local-smart-turn \
--no-extra moondream \
--no-extra mlx-whisper
- name: Verify installation
run: |
uv run python --version
uv run python -c "import pipecat; print('✅ Pipecat imports successfully')"

View File

@@ -1,51 +0,0 @@
name: Sync Quickstart to pipecat-quickstart repo
on:
push:
branches: [main]
paths:
- 'examples/quickstart/**'
workflow_dispatch: # Manual trigger
jobs:
sync-quickstart:
runs-on: ubuntu-latest
steps:
- name: Checkout main repo
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Checkout quickstart repo
uses: actions/checkout@v4
with:
repository: pipecat-ai/pipecat-quickstart
token: ${{ secrets.QUICKSTART_SYNC_TOKEN }}
path: quickstart-repo
- name: Sync files (excluding uv.lock and README.md)
run: |
# Copy all files except uv.lock and README.md
find examples/quickstart -type f \
-not -name "README.md" \
-not -name "uv.lock" \
-exec cp {} quickstart-repo/ \;
- name: Commit and push changes
run: |
cd quickstart-repo
git config user.name "GitHub Action"
git config user.email "action@github.com"
git add .
# Only commit if there are changes
if ! git diff --staged --quiet; then
git commit -m "Sync from pipecat main repo
Updated files from examples/quickstart/
Commit: ${{ github.sha }}
"
git push
else
echo "No changes to sync"
fi

View File

@@ -22,23 +22,31 @@ jobs:
steps:
- name: Checkout repo
uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
version: "latest"
- name: Set up Python
run: uv python install 3.12
id: setup_python
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Cache virtual environment
uses: actions/cache@v3
with:
# We are hashing dev-requirements.txt and test-requirements.txt which
# contain all dependencies needed to run the tests.
key: venv-${{ runner.os }}-${{ steps.setup_python.outputs.python-version}}-${{ hashFiles('dev-requirements.txt') }}-${{ hashFiles('test-requirements.txt') }}
path: .venv
- name: Install system packages
id: install_system_packages
run: |
sudo apt-get install -y portaudio19-dev
- name: Install dependencies
- name: Setup virtual environment
run: |
uv sync --group dev --extra anthropic --extra aws --extra google --extra langchain
python -m venv .venv
- name: Install basic Python dependencies
run: |
source .venv/bin/activate
python -m pip install --upgrade pip
pip install -r dev-requirements.txt -r test-requirements.txt
- name: Test with pytest
run: |
uv run pytest
source .venv/bin/activate
pytest

7
.gitignore vendored
View File

@@ -31,6 +31,8 @@ MANIFEST
fly.toml
# Examples
examples/telnyx-chatbot/templates/streams.xml
examples/twilio-chatbot/templates/streams.xml
examples/**/node_modules/
examples/**/.expo/
examples/**/dist/
@@ -48,7 +50,4 @@ examples/**/web-build/
# Documentation
docs/api/_build/
docs/api/api
# uv
.python-version
docs/api/api

View File

@@ -1,8 +1,8 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.12.1
rev: v0.9.7
hooks:
- id: ruff
language_version: python3
args: [--fix]
args: [ --select, I, ]
- id: ruff-format

View File

@@ -9,14 +9,22 @@ build:
- python3-dev
- libasound2-dev
jobs:
post_install:
- pip install uv
- UV_PROJECT_ENVIRONMENT=$READTHEDOCS_VIRTUALENV_PATH uv sync --group docs --all-extras --no-extra krisp --no-extra gstreamer --no-extra ultravox --no-extra local_smart_turn --no-extra moondream --no-extra riva --no-extra mlx-whisper
pre_build:
- python -m pip install --upgrade pip
- pip install wheel setuptools
post_build:
- echo "Build completed"
sphinx:
configuration: docs/api/conf.py
fail_on_warning: false
python:
install:
- requirements: docs/api/requirements.txt
- method: pip
path: .
search:
ranking:
api/*: 5

File diff suppressed because it is too large Load Diff

View File

@@ -1,336 +0,0 @@
# Community Integrations Guide
Pipecat welcomes community-maintained integrations! As our ecosystem grows, we've established a process for any developer to create and maintain their own service integrations while ensuring discoverability for the Pipecat community.
## Overview
**What we support:** Community-maintained integrations that live in separate repositories and are maintained by their authors.
**What we don't do:** The Pipecat team does not code review, test, or maintain community integrations. We provide guidance and list approved integrations for discoverability.
**Why this approach:** This allows the community to move quickly while keeping the Pipecat core team focused on maintaining the framework itself.
## Submitting your Integration
To be listed as an official community integration, follow these steps:
### Step 1: Build Your Integration
Create your integration following the patterns and examples shown in the "Integration Patterns and Examples" section below.
### Step 2: Set Up Your Repository
Your repository must contain these components:
- **Source code** - Complete implementation following Pipecat patterns
- **Foundational example** - Single file example showing basic usage (see [Pipecat examples](https://github.com/pipecat-ai/pipecat/tree/main/examples/foundational))
- **README.md** - Must include:
- Introduction and explanation of your integration
- Installation instructions
- Usage instructions with Pipecat Pipeline
- How to run your example
- Pipecat version compatibility (e.g., "Tested with Pipecat v0.0.86")
- Company attribution: If you work for the company providing the service, please mention this in your README. This helps build confidence that the integration will be actively maintained.
- **LICENSE** - Permissive license (BSD-2 like Pipecat, or equivalent open source terms)
- **Code documentation** - Source code with docstrings (we recommend following [Pipecat's docstring conventions](https://github.com/pipecat-ai/pipecat/blob/main/CONTRIBUTING.md#docstring-conventions))
- **Changelog** - Maintain a changelog for version updates
### Step 3: Join Discord
Join our Discord: https://discord.gg/pipecat
### Step 4: Submit for Listing
Submit a pull request to add your integration to our [Community Integrations documentation page](https://docs.pipecat.ai/server/services/community-integrations).
**To submit:**
1. Fork the [Pipecat docs repository](https://github.com/pipecat-ai/docs)
2. Edit the file `server/services/community-integrations.mdx`
3. Add your integration to the appropriate service category table with:
- Service name
- Link to your repository
- Maintainer GitHub username(s)
4. Include a link to your demo video (approx 30-60 seconds) in your PR description showing:
- Core functionality of your integration
- Handling of an interruption (if applicable to service type)
5. Submit your pull request
Once your PR is submitted, post in the `#community-integrations` Discord channel to let us know.
## Integration Patterns and Examples
### STT (Speech-to-Text) Services
#### Websocket-based Services
**Base class:** `STTService`
**Examples:**
- [DeepgramSTTService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/deepgram/stt.py)
- [SpeechmaticsSTTService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/speechmatics/stt.py)
#### File-based Services
**Base class:** `SegmentedSTTService`
**Examples:**
- [RivaSTTService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/riva/stt.py)
- [FalSTTService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/fal/stt.py)
#### Key requirements:
- STT services should push `InterimTranscriptionFrames` and `TranscriptionFrames`
- If confidence values are available, filter for values >50% confidence
### LLM (Large Language Model) Services
#### OpenAI-Compatible Services
**Base class:** `OpenAILLMService`
**Examples:**
- [AzureLLMService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/azure/llm.py)
- [GrokLLMService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/grok/llm.py) - Shows overriding the base class where needed
#### Non-OpenAI Compatible Services
**Requires:** Full implementation
**Examples:**
- [AnthropicLLMService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/anthropic/llm.py)
- [GoogleLLMService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/google/llm.py)
#### Key requirements:
- **Frame sequence:** Output must follow this frame sequence pattern:
- `LLMFullResponseStartFrame` - Signals the start of an LLM response
- `LLMTextFrame` - Contains LLM content, typically streamed as tokens
- `LLMFullResponseEndFrame` - Signals the end of an LLM response
- **Context aggregation:** Implement context aggregation to collect user and assistant content:
- Aggregators come in pairs with a `user()` instance and `assistant()` instance
- Context must adhere to the `LLMContext` universal format
- Aggregators should handle adding messages, function calls, and images to the context
### TTS (Text-to-Speech) Services
#### AudioContextWordTTSService
**Use for:** Websocket-based services supporting word/timestamp alignment
**Example:**
- [CartesiaTTSService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/cartesia/tts.py)
#### InterruptibleTTSService
**Use for:** Websocket-based services without word/timestamp alignment, requiring disconnection on interruption
**Example:**
- [SarvamTTSService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/sarvam/tts.py)
#### WordTTSService
**Use for:** HTTP-based services supporting word/timestamp alignment
**Example:**
- [ElevenLabsHttpTTSService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/elevenlabs/tts.py)
#### TTSService
**Use for:** HTTP-based services without word/timestamp alignment
**Example:**
- [GoogleHttpTTSService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/google/tts.py)
#### Key requirements:
- For websocket services, use asyncio WebSocket implementation (required for v13+ support)
- Handle idle service timeouts with keepalives
- TTSServices push both audio (`TTSRawAudioFrame`) and text (`TTSTextFrame`) frames
### Telephony Serializers
Pipecat supports telephony provider integration using websocket connections to exchange MediaStreams. These services use a FrameSerializer to serialize and deserialize inputs from the FastAPIWebsocketTransport.
**Examples:**
- [Twilio](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/serializers/twilio.py)
- [Telnyx](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/serializers/telnyx.py)
#### Key requirements:
- Include hang-up functionality using the provider's native API, ideally using `aiohttp`
- Support DTMF (dual-tone multi-frequency) events if the provider supports them:
- Deserialize DTMF events from the provider's protocol to `InputDTMFFrame`
- Use `KeypadEntry` enum for valid keypad entries (0-9, \*, #, A-D)
- Handle invalid DTMF digits gracefully by returning `None`
### Image Generation Services
**Base class:** `ImageGenService`
**Examples:**
- [FalImageGenService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/fal/image.py)
- [GoogleImageGenService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/google/image.py)
#### Key requirements:
- Must implement `run_image_gen` method returning an `AsyncGenerator`
### Vision Services
Vision services process images and provide analysis such as descriptions, object detection, or visual question answering.
**Base class:** `VisionService`
**Example:**
- [MoondreamVisionService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/moondream/vision.py)
#### Key requirements:
- Must implement `run_vision` method that takes an `LLMContext` and returns an `AsyncGenerator[Frame, None]`
- The method processes the latest image in the context and yields frames with analysis results
- Typically yields `TextFrame` objects containing descriptions or answers
## Implementation Guidelines
### Naming Conventions
- **STT:** `VendorSTTService`
- **LLM:** `VendorLLMService`
- **TTS:**
- Websocket: `VendorTTSService`
- HTTP: `VendorHttpTTSService`
- **Image:** `VendorImageGenService`
- **Vision:** `VendorVisionService`
- **Telephony:** `VendorFrameSerializer`
### Metrics Support
Enable metrics in your service:
```python
def can_generate_metrics(self) -> bool:
"""Check if this service can generate processing metrics.
Returns:
True, as this service supports metrics.
"""
return True
```
### Dynamic Settings Updates
STT, LLM, and TTS services support `ServiceUpdateSettingsFrame` for dynamic configuration changes. The base STTService has an `_update_settings()` method that handles settings, and the private `_settings` `Dict` is used to store settings and provide access to the subclass.
```python
async def set_language(self, language: Language):
"""Set the recognition language and reconnect.
Args:
language: The language to use for speech recognition.
"""
logger.info(f"Switching STT language to: [{language}]")
self._settings["language"] = language
await self._disconnect()
await self._connect()
```
Note that, in this example, Deepgram requires the websocket connection be disconnected and reconnected to reinitialize the service with the new value. Consider if your service requires reconnection.
### Sample Rate Handling
Sample rates are set via PipelineParams and passed to each frame processor at initialization. The pattern is to _not_ set the sample rate value in the constructor of a given service. Instead, use the `start()` method to initialize sample rates from the frame:
```python
async def start(self, frame: StartFrame):
"""Start the service."""
await super().start(frame)
self._settings["output_format"]["sample_rate"] = self.sample_rate
await self._connect()
```
Note that `self.sample_rate` is a `@property` set in the TTSService base class, which provides access to the private sample rate value obtained from the StartFrame.
### Tracing Decorators
Use Pipecat's tracing decorators:
- **STT:** `@traced_stt` - decorate a function that handles `transcript`, `is_final`, `language` as args
- **LLM:** `@traced_llm` - decorate the `_process_context()` method
- **TTS:** `@traced_tts` - decorate the `run_tts()` method
## Best Practices
### Packaging and Distribution
- Use [uv](https://docs.astral.sh/uv/) for packaging (encouraged)
- Consider releasing to PyPI for easier installation
- Follow semantic versioning principles
- Maintain a changelog
### HTTP Communication
For REST-based communication, use aiohttp. Pipecat includes this as a required dependency, so using it prevents adding an additional dependency to your integration.
### Error Handling
- Wrap API calls in appropriate try/catch blocks
- Handle rate limits and network failures gracefully
- Provide meaningful error messages
- When errors occur, raise exceptions AND push `ErrorFrame`s to notify the pipeline:
```python
from pipecat.frames.frames import ErrorFrame
try:
# Your API call
result = await self._make_api_call()
except Exception as e:
# Push error frame to pipeline
await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
# Raise or handle as appropriate
raise
```
### Testing
- Your foundational example serves as a valuable integration-level test
- Unit tests are nice to have. As the Pipecat teams provides better guidance, we will encourage unit testing more
## Disclaimer
Community integrations are community-maintained and not officially supported by the Pipecat team. Users should evaluate these integrations independently. The Pipecat team reserves the right to remove listings that become unmaintained or problematic.
## Staying Up to Date
Pipecat evolves rapidly to support the latest AI technologies and patterns. While we strive to minimize breaking changes, they do occur as the framework matures.
**We strongly recommend:**
- Join our Discord at https://discord.gg/pipecat and monitor the `#announcements` channel for release notifications
- Follow our changelog: https://github.com/pipecat-ai/pipecat/blob/main/CHANGELOG.md
- Test your integration against new Pipecat releases promptly
- Update your README with the last tested Pipecat version
This helps ensure your integration remains compatible and your users have clear expectations about version support.
## Questions?
Join our Discord community at https://discord.gg/pipecat and post in the `#community-integrations` channel for guidance and support.
For additional questions, you can also reach out to us at pipecat-ai@daily.co.

View File

@@ -1,9 +1,5 @@
## Contributing to Pipecat
**Want to add a new service integration?**
We encourage community-maintained integrations! Please see our [Community Integration Guide](COMMUNITY_INTEGRATIONS.md) for the process and requirements.
**Want to contribute to Pipecat core?**
We welcome contributions of all kinds! Your help is appreciated. Follow these steps to get involved:
1. **Fork this repository**: Start by forking the Pipecat Documentation repository to your GitHub account.
@@ -35,23 +31,6 @@ git push origin your-branch-name
Our maintainers will review your PR, and once everything is good, your contributions will be merged!
## Dependency Management
This project uses [uv](https://docs.astral.sh/uv/) for dependency management. The `uv.lock` file is committed to ensure reproducible builds.
### Adding or Updating Dependencies
1. Edit `pyproject.toml` to add/update dependencies
2. Run `uv lock` to update the lockfile with new dependency resolution
3. Run `uv sync` to install the updated dependencies locally
4. Always commit both files together:
```bash
git add pyproject.toml uv.lock
git commit -m "feat: add new dependency for feature X"
```
**Important:** Never manually edit `uv.lock`. It's auto-generated by `uv lock`.
## Code Style and Documentation
### Python Code Style
@@ -62,150 +41,36 @@ We use Ruff for code linting and formatting. Please ensure your code passes all
We follow Google-style docstrings with these specific conventions:
**Regular Classes:**
- Class docstrings should fully document all parameters used in `__init__`
- We don't require separate docstrings for `__init__` methods when parameters are documented in the class docstring
- Property methods should have docstrings explaining their purpose and return value
- Class docstring describes the class purpose and key functionality
- `__init__` method has its own docstring with complete `Args:` section documenting all parameters
- All public methods must have docstrings with `Args:` and `Returns:` sections as appropriate
**Dataclasses:**
- Class docstring describes the purpose and documents all fields in a `Parameters:` section
- No `__init__` docstring (auto-generated)
**Properties:**
- Must have docstrings with `Returns:` section
**Abstract Methods:**
- Must have docstrings explaining what subclasses should implement
**`__init__.py` Files:**
- **Skip docstrings** for pure import/re-export modules
- **Add brief docstrings** for top-level packages or those with initialization logic
**Enums:**
- Class docstring describes the enumeration purpose
- Use `Parameters:` section to document each enum value and its meaning
- No `__init__` docstring (Enums don't have custom constructors)
**Code Examples in Docstrings:**
- Use `Examples:` as a section header for multiple examples
- Use descriptive text followed by double colons (`::`) for each example
- **Always include a blank line after the `::"`**
- Indent all code consistently within each block
- Separate multiple examples with blank lines for readability
**Lists and Bullets in Docstrings:**
- Use dashes (`-`) for bullet points, not asterisks (`*`)
- **Add a blank line before bullet lists** when they follow a colon
- Use section headers like "Supported features:" or "Behavior:" before lists
- For complex nested information, consider using paragraph format instead
**Deprecations:**
- Use `warnings.warn()` in code for runtime deprecation warnings
- Add `.. deprecated::` directive in docstrings for documentation visibility
- Include version information and describe current status
- Describe parameters in present tense, use directive to indicate deprecation status
#### Examples:
Example of correctly documented class:
```python
# Regular class
class MyService(BaseService):
"""Description of what the service does.
class MyClass:
"""Class description.
Provides detailed explanation of the service's functionality,
key features, and usage patterns.
Additional details about the class.
Supported features:
- Feature one with detailed explanation
- Feature two with additional context
- Feature three for advanced use cases
Args:
param1: Description of first parameter.
param2: Description of second parameter.
"""
def __init__(self, param1: str, old_param: str = None, **kwargs):
"""Initialize the service.
Args:
param1: Description of param1.
old_param: Controls legacy behavior.
.. deprecated:: 1.2.0
This parameter no longer has any effect and will be removed in version 2.0.
**kwargs: Additional arguments passed to parent.
"""
if old_param is not None:
import warnings
warnings.warn(
"Parameter 'old_param' is deprecated and will be removed in version 2.0.",
DeprecationWarning,
)
super().__init__(**kwargs)
def __init__(self, param1, param2):
# No docstring required here as parameters are documented above
self.param1 = param1
self.param2 = param2
@property
def sample_rate(self) -> int:
"""Get the current sample rate.
def some_property(self) -> str:
"""Get the formatted property value.
Returns:
The sample rate in Hz.
A string representation of the property.
"""
return self._sample_rate
async def process_data(self, data: str) -> bool:
"""Process the provided data.
Args:
data: The data to process.
Returns:
True if processing succeeded.
"""
pass
# Dataclass with code examples
@dataclass
class MessageFrame:
"""Frame containing messages in OpenAI format.
Supports both simple and content list message formats.
Example::
[
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi there!"}
]
Parameters:
messages: List of messages in OpenAI format.
"""
messages: List[dict]
# Enum class
class Status(Enum):
"""Status codes for processing operations.
Parameters:
PENDING: Operation is queued but not started.
RUNNING: Operation is currently in progress.
COMPLETED: Operation finished successfully.
FAILED: Operation encountered an error.
"""
PENDING = "pending"
RUNNING = "running"
COMPLETED = "completed"
FAILED = "failed"
return f"Property: {self.param1}"
```
# Contributor Covenant Code of Conduct

40
Dockerfile Normal file
View File

@@ -0,0 +1,40 @@
# setup
FROM python:3.11.5
WORKDIR /app
COPY requirements.txt /app
COPY *.py /app
COPY pyproject.toml /app
COPY src/ /app/src/
COPY examples/ /app/examples/
WORKDIR /app
RUN ls --recursive /app/
RUN pip3 install --upgrade -r requirements.txt
RUN python -m build .
RUN pip3 install .
RUN pip3 install gunicorn
# If running on Ubuntu, Azure TTS requires some extra config
# https://learn.microsoft.com/en-us/azure/ai-services/speech-service/quickstarts/setup-platform?pivots=programming-language-python&tabs=linux%2Cubuntu%2Cdotnetcli%2Cdotnet%2Cjre%2Cmaven%2Cnodejs%2Cmac%2Cpypi
RUN wget -O - https://www.openssl.org/source/openssl-1.1.1w.tar.gz | tar zxf -
WORKDIR openssl-1.1.1w
RUN ./config --prefix=/usr/local
RUN make -j $(nproc)
RUN make install_sw install_ssldirs
RUN ldconfig -v
ENV SSL_CERT_DIR=/etc/ssl/certs
#ENV LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
RUN apt clean
RUN apt-get update
RUN apt-get -y install build-essential libssl-dev ca-certificates libasound2 wget
ENV PYTHONUNBUFFERED=1
WORKDIR /app
EXPOSE 8000
# run
CMD ["gunicorn", "--workers=2", "--log-level", "debug", "--chdir", "examples/server", "--capture-output", "daily-bot-manager:app", "--bind=0.0.0.0:8000"]

View File

@@ -1,4 +0,0 @@
prune docs
prune examples
prune scripts
prune tests

241
README.md
View File

@@ -2,15 +2,12 @@
<img alt="pipecat" width="300px" height="auto" src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/pipecat.png">
</div></h1>
[![PyPI](https://img.shields.io/pypi/v/pipecat-ai)](https://pypi.org/project/pipecat-ai) ![Tests](https://github.com/pipecat-ai/pipecat/actions/workflows/tests.yaml/badge.svg) [![codecov](https://codecov.io/gh/pipecat-ai/pipecat/graph/badge.svg?token=LNVUIVO4Y9)](https://codecov.io/gh/pipecat-ai/pipecat) [![Docs](https://img.shields.io/badge/Documentation-blue)](https://docs.pipecat.ai) [![Discord](https://img.shields.io/discord/1239284677165056021)](https://discord.gg/pipecat) [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/pipecat-ai/pipecat)
[![](https://getmanta.ai/api/badges?text=Manta%20Graph&link=manta)](https://getmanta.ai/pipecat)
[![PyPI](https://img.shields.io/pypi/v/pipecat-ai)](https://pypi.org/project/pipecat-ai) ![Tests](https://github.com/pipecat-ai/pipecat/actions/workflows/tests.yaml/badge.svg) [![codecov](https://codecov.io/gh/pipecat-ai/pipecat/graph/badge.svg?token=LNVUIVO4Y9)](https://codecov.io/gh/pipecat-ai/pipecat) [![Docs](https://img.shields.io/badge/Documentation-blue)](https://docs.pipecat.ai) [![Discord](https://img.shields.io/discord/1239284677165056021)](https://discord.gg/pipecat)
# 🎙️ Pipecat: Real-Time Voice & Multimodal AI Agents
**Pipecat** is an open-source Python framework for building real-time voice and multimodal conversational agents. Orchestrate audio and video, AI services, different transports, and conversation pipelines effortlessly—so you can focus on what makes your agent unique.
> Want to dive right in? Try the [quickstart](https://docs.pipecat.ai/getting-started/quickstart).
## 🚀 What You Can Build
- **Voice Assistants** natural, streaming conversations with AI
@@ -20,6 +17,8 @@
- **Business Agents** customer intake, support bots, guided flows
- **Complex Dialog Systems** design logic with structured conversations
🧭 Looking to build structured conversations? Check out [Pipecat Flows](https://github.com/pipecat-ai/pipecat-flows) for managing complex conversational states and transitions.
## 🧠 Why Pipecat?
- **Voice-first**: Integrates speech recognition, text-to-speech, and conversation handling
@@ -27,158 +26,170 @@
- **Composable Pipelines**: Build complex behavior from modular components
- **Real-Time**: Ultra-low latency interaction with different transports (e.g. WebSockets or WebRTC)
## 🌐 Pipecat Ecosystem
### 📱 Client SDKs
Building client applications? You can connect to Pipecat from any platform using our official SDKs:
<a href="https://docs.pipecat.ai/client/js/introduction">JavaScript</a> | <a href="https://docs.pipecat.ai/client/react/introduction">React</a> | <a href="https://docs.pipecat.ai/client/react-native/introduction">React Native</a> |
<a href="https://docs.pipecat.ai/client/ios/introduction">Swift</a> | <a href="https://docs.pipecat.ai/client/android/introduction">Kotlin</a> | <a href="https://docs.pipecat.ai/client/c++/introduction">C++</a> | <a href="https://github.com/pipecat-ai/pipecat-esp32">ESP32</a>
### 🧭 Structured conversations
Looking to build structured conversations? Check out [Pipecat Flows](https://github.com/pipecat-ai/pipecat-flows) for managing complex conversational states and transitions.
### 🪄 Beautiful UIs
Want to build beautiful and engaging experiences? Checkout the [Voice UI Kit](https://github.com/pipecat-ai/voice-ui-kit), a collection of components, hooks and templates for building voice AI applications quickly.
### 🛠️ Create and deploy projects
Create a new project in under a minute with the [Pipecat CLI](https://github.com/pipecat-ai/pipecat-cli). Then use the CLI to monitor and deploy your agent to production.
### 🔍 Debugging
Looking for help debugging your pipeline and processors? Check out [Whisker](https://github.com/pipecat-ai/whisker), a real-time Pipecat debugger.
### 🖥️ Terminal
Love terminal applications? Check out [Tail](https://github.com/pipecat-ai/tail), a terminal dashboard for Pipecat.
### 📺️ Pipecat TV Channel
Catch new features, interviews, and how-tos on our [Pipecat TV](https://www.youtube.com/playlist?list=PLzU2zoMTQIHjqC3v4q2XVSR3hGSzwKFwH) channel.
## 🎬 See it in action
<p float="left">
<a href="https://github.com/pipecat-ai/pipecat-examples/tree/main/simple-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat-examples/main/simple-chatbot/image.png" width="400" /></a>&nbsp;
<a href="https://github.com/pipecat-ai/pipecat-examples/tree/main/storytelling-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat-examples/main/storytelling-chatbot/image.png" width="400" /></a>
<a href="https://github.com/pipecat-ai/pipecat/tree/main/examples/simple-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/examples/simple-chatbot/image.png" width="400" /></a>&nbsp;
<a href="https://github.com/pipecat-ai/pipecat/tree/main/examples/storytelling-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/examples/storytelling-chatbot/image.png" width="400" /></a>
<br/>
<a href="https://github.com/pipecat-ai/pipecat-examples/tree/main/translation-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat-examples/main/translation-chatbot/image.png" width="400" /></a>&nbsp;
<a href="https://github.com/pipecat-ai/pipecat/blob/main/examples/foundational/12-describe-video.py"><img src="https://github.com/pipecat-ai/pipecat/blob/main/examples/foundational/assets/moondream.png" width="400" /></a>
<a href="https://github.com/pipecat-ai/pipecat/tree/main/examples/translation-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/examples/translation-chatbot/image.png" width="400" /></a>&nbsp;
<a href="https://github.com/pipecat-ai/pipecat/tree/main/examples/moondream-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/examples/moondream-chatbot/image.png" width="400" /></a>
</p>
## 📱 Client SDKs
You can connect to Pipecat from any platform using our official SDKs:
| Platform | SDK Repo | Description |
| -------- | ------------------------------------------------------------------------------ | -------------------------------- |
| Web | [pipecat-client-web](https://github.com/pipecat-ai/pipecat-client-web) | JavaScript and React client SDKs |
| iOS | [pipecat-client-ios](https://github.com/pipecat-ai/pipecat-client-ios) | Swift SDK for iOS |
| Android | [pipecat-client-android](https://github.com/pipecat-ai/pipecat-client-android) | Kotlin SDK for Android |
| C++ | [pipecat-client-cxx](https://github.com/pipecat-ai/pipecat-client-cxx) | C++ client SDK |
## 🧩 Available services
| Category | Services |
| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Sarvam](https://docs.pipecat.ai/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Ultravox](https://docs.pipecat.ai/server/services/stt/ultravox), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/server/services/llm/mistral), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova) [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
| Text-to-Speech | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Hume](https://docs.pipecat.ai/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [Speechmatics](https://docs.pipecat.ai/server/services/tts/speechmatics), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) |
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local |
| Serializers | [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx) |
| Video | [HeyGen](https://docs.pipecat.ai/server/services/video/heygen), [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/fal), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [ai-coustics](https://docs.pipecat.ai/server/utilities/audio/aic-filter) |
| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
| Category | Services |
|---------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Parakeet (NVIDIA)](https://docs.pipecat.ai/server/services/stt/parakeet), [Ultravox](https://docs.pipecat.ai/server/services/stt/ultravox), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
| Text-to-Speech | [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [FastPitch (NVIDIA)](https://docs.pipecat.ai/server/services/tts/fastpitch), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
| Speech-to-Speech | [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) |
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local |
| Video | [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/fal), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) |
| Analytics & Metrics | [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
📚 [View full services documentation →](https://docs.pipecat.ai/server/services/supported-services)
## ⚡ Getting started
You can get started with Pipecat running on your local machine, then move your agent processes to the cloud when you're ready.
You can get started with Pipecat running on your local machine, then move your agent processes to the cloud when youre ready.
1. Install uv
```shell
# Install the module
pip install pipecat-ai
```bash
curl -LsSf https://astral.sh/uv/install.sh | sh
```
# Set up your environment
cp dot-env.template .env
```
> **Need help?** Refer to the [uv install documentation](https://docs.astral.sh/uv/getting-started/installation/).
To keep things lightweight, only the core framework is included by default. If you need support for third-party AI services, you can add the necessary dependencies with:
2. Install the module
```bash
# For new projects
uv init my-pipecat-app
cd my-pipecat-app
uv add pipecat-ai
# Or for existing projects
uv add pipecat-ai
```
3. Set up your environment
```bash
cp env.example .env
```
4. To keep things lightweight, only the core framework is included by default. If you need support for third-party AI services, you can add the necessary dependencies with:
```bash
uv add "pipecat-ai[option,...]"
```
> **Using pip?** You can still use `pip install pipecat-ai` and `pip install "pipecat-ai[option,...]"` to get set up.
```shell
pip install "pipecat-ai[option,...]"
```
## 🧪 Code examples
- [Foundational](https://github.com/pipecat-ai/pipecat/tree/main/examples/foundational) — small snippets that build on each other, introducing one or two concepts at a time
- [Example apps](https://github.com/pipecat-ai/pipecat-examples) — complete applications that you can use as starting points for development
- [Example apps](https://github.com/pipecat-ai/pipecat/tree/main/examples/) — complete applications that you can use as starting points for development
## 🛠️ Contributing to the framework
## 🛠️ Hacking on the framework itself
### Prerequisites
1. Set up a virtual environment before following these instructions. From the root of the repo:
**Minimum Python Version:** 3.10
**Recommended Python Version:** 3.12
### Setup Steps
1. Clone the repository and navigate to it:
```bash
git clone https://github.com/pipecat-ai/pipecat.git
cd pipecat
```shell
python3 -m venv venv
source venv/bin/activate
```
2. Install development and testing dependencies:
2. Install the development dependencies:
```bash
uv sync --group dev --all-extras \
--no-extra gstreamer \
--no-extra krisp \
--no-extra local \
--no-extra ultravox # (ultravox not fully supported on macOS)
```shell
pip install -r dev-requirements.txt
```
3. Install the git pre-commit hooks:
3. Install the git pre-commit hooks (these help ensure your code follows project rules):
```bash
uv run pre-commit install
```shell
pre-commit install
```
> **Note**: Some extras (local, gstreamer) require system dependencies. See documentation if you encounter build errors.
4. Install the `pipecat-ai` package locally in editable mode:
```shell
pip install -e .
```
> The `-e` or `--editable` option allows you to modify the code without reinstalling.
5. Include optional dependencies as needed. For example:
```shell
pip install -e ".[daily,deepgram,cartesia,openai,silero]"
```
6. (Optional) If you want to use this package from another directory:
```shell
pip install "path_to_this_repo[option,...]"
```
### Running tests
To run all tests, from the root directory:
Install the test dependencies:
```bash
uv run pytest
```shell
pip install -r test-requirements.txt
```
Run a specific test suite:
From the root directory, run:
```bash
uv run pytest tests/test_name.py
```shell
pytest
```
### Setting up your editor
This project uses strict [PEP 8](https://peps.python.org/pep-0008/) formatting via [Ruff](https://github.com/astral-sh/ruff).
#### Emacs
You can use [use-package](https://github.com/jwiegley/use-package) to install [emacs-lazy-ruff](https://github.com/christophermadsen/emacs-lazy-ruff) package and configure `ruff` arguments:
```elisp
(use-package lazy-ruff
:ensure t
:hook ((python-mode . lazy-ruff-mode))
:config
(setq lazy-ruff-format-command "ruff format")
(setq lazy-ruff-check-command "ruff check --select I"))
```
`ruff` was installed in the `venv` environment described before, so you should be able to use [pyvenv-auto](https://github.com/ryotaro612/pyvenv-auto) to automatically load that environment inside Emacs.
```elisp
(use-package pyvenv-auto
:ensure t
:defer t
:hook ((python-mode . pyvenv-auto-run)))
```
#### Visual Studio Code
Install the
[Ruff](https://marketplace.visualstudio.com/items?itemName=charliermarsh.ruff) extension. Then edit the user settings (_Ctrl-Shift-P_ `Open User Settings (JSON)`) and set it as the default Python formatter, and enable formatting on save:
```json
"[python]": {
"editor.defaultFormatter": "charliermarsh.ruff",
"editor.formatOnSave": true
}
```
#### PyCharm
`ruff` was installed in the `venv` environment described before, now to enable autoformatting on save, go to `File` -> `Settings` -> `Tools` -> `File Watchers` and add a new watcher with the following settings:
1. **Name**: `Ruff formatter`
2. **File type**: `Python`
3. **Working directory**: `$ContentRoot$`
4. **Arguments**: `format $FilePath$`
5. **Program**: `$PyInterpreterDirectory$/ruff`
## 🤝 Contributing
We welcome contributions from the community! Whether you're fixing bugs, improving documentation, or adding new features, here's how you can help:

View File

@@ -1,5 +0,0 @@
# Security Policy
## Reporting a Vulnerability
Please email `disclosures@daily.co`.

13
dev-requirements.txt Normal file
View File

@@ -0,0 +1,13 @@
build~=1.2.2
coverage~=7.6.12
grpcio-tools~=1.67.1
pip-tools~=7.4.1
pre-commit~=4.0.1
pyright~=1.1.397
pytest~=8.3.4
pytest-asyncio~=0.25.3
pytest-aiohttp==1.1.0
ruff~=0.11.1
setuptools~=70.0.0
setuptools_scm~=8.1.0
python-dotenv~=1.0.1

10
docs/README.md Normal file
View File

@@ -0,0 +1,10 @@
# Pipecat Docs
## [Architecture Overview](architecture.md)
Learn about the thinking behind the framework's design.
## [A Frame's Progress](frame-progress.md)
See how a Frame is processed through a Transport, a Pipeline, and a series of Frame Processors.

View File

@@ -1,27 +1,10 @@
#!/bin/bash
# Build docs using uv
echo "Installing dependencies with uv..."
uv sync --group docs --all-extras --no-extra krisp --no-extra gstreamer --no-extra ultravox --no-extra local_smart_turn --no-extra moondream --no-extra riva --no-extra mlx-whisper
# Check if sphinx-build is available
if ! uv run sphinx-build --version &> /dev/null; then
echo "Error: sphinx-build is not available" >&2
exit 1
fi
# Clean previous build
rm -rf _build
echo "Building documentation..."
# Build docs matching ReadTheDocs configuration
uv run sphinx-build -b html -d _build/doctrees . _build/html -W --keep-going
sphinx-build -b html -d _build/doctrees . _build/html -W --keep-going
if [ $? -eq 0 ]; then
echo "Documentation built successfully!"
# Open docs (MacOS)
open _build/html/index.html
else
echo "Documentation build failed!" >&2
exit 1
fi
# Open docs (MacOS)
open _build/html/index.html

View File

@@ -1,7 +1,5 @@
import logging
import os
import sys
from datetime import datetime
from pathlib import Path
# Configure logging
@@ -15,8 +13,7 @@ sys.path.insert(0, str(project_root / "src"))
# Project information
project = "pipecat-ai"
current_year = datetime.now().year
copyright = f"2024-{current_year}, Daily" if current_year > 2024 else "2024, Daily"
copyright = "2024, Daily"
author = "Daily"
# General configuration
@@ -27,61 +24,107 @@ extensions = [
"sphinx.ext.intersphinx",
]
suppress_warnings = [
"autodoc.mocked_object",
"toc.not_included",
]
# Napoleon settings
napoleon_google_docstring = True
napoleon_numpy_docstring = False
napoleon_include_init_with_doc = True
# AutoDoc settings
autodoc_default_options = {
"members": True,
"member-order": "bysource",
"undoc-members": False,
"exclude-members": "__weakref__,model_config",
"special-members": "__init__",
"undoc-members": True,
"exclude-members": "__weakref__",
"no-index": True,
"show-inheritance": True,
}
# Mock imports for optional dependencies
autodoc_mock_imports = [
# Krisp - has build issues on some platforms
"pipecat_ai_krisp",
"riva",
"livekit",
"pyht", # Base PlayHT package
"pyht.async_client", # PlayHT specific imports
"pyht.client",
"pyht.protos",
"pyht.protos.api_pb2",
"pipecat_ai_playht", # PlayHT wrapper
"aiortc",
"aiortc.mediastreams",
"cv2",
"av",
"pyneuphonic",
"mem0",
"mlx_whisper",
"anthropic",
"assemblyai",
"boto3",
"azure",
"cartesia",
"deepgram",
"elevenlabs",
"fal",
"gladia",
"google",
"krisp",
"krisp_audio",
# System-specific GUI libraries
"langchain",
"lmnt",
"noisereduce",
"openai",
"openpipe",
"simli",
"soundfile",
"pipecat_ai_krisp",
"pyaudio",
"_tkinter",
"tkinter",
# Platform-specific audio libraries (if needed)
"gi",
"gi.require_version",
"gi.repository",
# OpenCV - sometimes has import issues during docs build
"cv2",
# Heavy ML packages excluded from ReadTheDocs
# ultravox dependencies
"daily",
"daily_python",
"pydantic.BaseModel",
"pydantic.Field",
"pydantic._internal._model_construction",
"pydantic._internal._fields",
# Moondream dependencies
"torch",
"transformers",
"intel_extension_for_pytorch",
# Ultravox dependencies
"huggingface_hub",
"vllm",
"vllm.engine.arg_utils",
# local-smart-turn dependencies
"coremltools",
"coremltools.models",
"coremltools.models.MLModel",
"torch",
"torch.nn",
"torch.nn.functional",
"torchaudio",
# moondream dependencies
"transformers",
"transformers.AutoTokenizer",
"transformers.AutoFeatureExtractor",
"AutoFeatureExtractor",
"timm",
"einops",
"intel_extension_for_pytorch",
"huggingface_hub",
# riva dependencies
# Langchain dependencies
"langchain_core",
"langchain_core.messages",
"langchain_core.runnables",
"langchain_core.messages.AIMessageChunk",
"langchain_core.runnables.Runnable",
# LiveKit dependencies
"livekit",
"livekit.rtc",
"livekit_api",
"livekit_protocol",
"tenacity",
"tenacity.retry",
"tenacity.stop_after_attempt",
"tenacity.wait_exponential",
"rtc",
"rtc.Room",
"rtc.RoomOptions",
"rtc.AudioSource",
"rtc.LocalAudioTrack",
"rtc.TrackPublishOptions",
"rtc.TrackSource",
"rtc.AudioStream",
"rtc.AudioFrameEvent",
"rtc.AudioFrame",
"rtc.Track",
"rtc.TrackKind",
"rtc.RemoteParticipant",
"rtc.RemoteTrackPublication",
"rtc.DataPacket",
# Riva dependencies
"riva",
"riva.client",
"riva.client.Auth",
@@ -91,45 +134,96 @@ autodoc_mock_imports = [
"riva.client.AudioEncoding",
"riva.client.proto.riva_tts_pb2",
"riva.client.SpeechSynthesisService",
# MLX dependencies (Apple Silicon specific)
"mlx",
"mlx_whisper", # Note: might need underscore format too
# Local CoreML Smart Turn dependencies
"coremltools",
"coremltools.models",
"coremltools.models.MLModel",
"torch",
"torch.nn",
"torch.nn.functional",
"transformers",
"transformers.AutoFeatureExtractor",
# Also add specific classes that are imported
"AutoFeatureExtractor",
]
# HTML output settings
html_theme = "sphinx_rtd_theme"
html_static_path = ["_static"] if os.path.exists("_static") else []
autodoc_typehints = "signature" # Show type hints in the signature only, not in the docstring
html_static_path = ["_static"]
autodoc_typehints = "description"
html_show_sphinx = False
def import_core_modules():
"""Import core pipecat modules for autodoc to discover."""
core_modules = [
"pipecat",
"pipecat.frames",
"pipecat.pipeline",
"pipecat.processors",
"pipecat.services",
"pipecat.transports",
"pipecat.audio",
"pipecat.adapters",
"pipecat.clocks",
"pipecat.metrics",
"pipecat.observers",
"pipecat.runner",
"pipecat.serializers",
"pipecat.sync",
"pipecat.transcriptions",
"pipecat.utils",
]
def verify_modules():
"""Verify that required modules are available."""
required_modules = {
"services": [
"assemblyai",
"aws",
"cartesia",
"deepgram",
"google",
"lmnt",
"riva",
"simli",
],
"serializers": ["livekit"],
"vad": ["silero", "vad_analyzer"],
"transports": {
"services": ["daily", "livekit"],
"local": ["audio", "tk"],
"network": ["fastapi_websocket", "websocket_server"],
},
}
for module_name in core_modules:
try:
__import__(module_name)
logger.info(f"Successfully imported {module_name}")
except ImportError as e:
logger.warning(f"Failed to import {module_name}: {e}")
# Skip importing modules that are in autodoc_mock_imports
skipped_modules = set(autodoc_mock_imports)
missing = []
for category, modules in required_modules.items():
if isinstance(modules, dict):
# Handle nested structure
for subcategory, submodules in modules.items():
for module in submodules:
# Check if module is in autodoc_mock_imports
if (
f"pipecat.{category}.{subcategory}.{module}" in skipped_modules
or module in skipped_modules
):
logger.info(
f"Skipping import of mocked module: pipecat.{category}.{subcategory}.{module}"
)
continue
try:
__import__(f"pipecat.{category}.{subcategory}.{module}")
logger.info(
f"Successfully imported pipecat.{category}.{subcategory}.{module}"
)
except (ImportError, TypeError, NameError) as e:
missing.append(f"pipecat.{category}.{subcategory}.{module}")
logger.warning(
f"Optional module not available: pipecat.{category}.{subcategory}.{module} - {str(e)}"
)
else:
# Handle flat structure
for module in modules:
# Check if module is in autodoc_mock_imports
if f"pipecat.{category}.{module}" in skipped_modules or module in skipped_modules:
logger.info(f"Skipping import of mocked module: pipecat.{category}.{module}")
continue
try:
__import__(f"pipecat.{category}.{module}")
logger.info(f"Successfully imported pipecat.{category}.{module}")
except (ImportError, TypeError, NameError) as e:
missing.append(f"pipecat.{category}.{module}")
logger.warning(
f"Optional module not available: pipecat.{category}.{module} - {str(e)}"
)
if missing:
logger.warning(f"Some optional modules are not available: {missing}")
def clean_title(title: str) -> str:
@@ -141,7 +235,36 @@ def clean_title(title: str) -> str:
parts = title.split(".")
title = parts[-1]
return title
# Special cases for service names and common acronyms
special_cases = {
"ai": "AI",
"aws": "AWS",
"api": "API",
"vad": "VAD",
"assemblyai": "AssemblyAI",
"deepgram": "Deepgram",
"elevenlabs": "ElevenLabs",
"openai": "OpenAI",
"openpipe": "OpenPipe",
"playht": "PlayHT",
"xtts": "XTTS",
"lmnt": "LMNT",
}
# Check if the entire title is a special case
if title.lower() in special_cases:
return special_cases[title.lower()]
# Otherwise, capitalize each word
words = title.split("_")
cleaned_words = []
for word in words:
if word.lower() in special_cases:
cleaned_words.append(special_cases[word.lower()])
else:
cleaned_words.append(word.capitalize())
return " ".join(cleaned_words)
def setup(app):
@@ -166,8 +289,9 @@ def setup(app):
excludes = [
str(project_root / "src/pipecat/pipeline/to_be_updated"),
str(project_root / "src/pipecat/examples"),
str(project_root / "src/pipecat/tests"),
str(project_root / "src/pipecat/processors/gstreamer"),
str(project_root / "src/pipecat/services/to_be_updated"),
str(project_root / "src/pipecat/vad"), # deprecated
"**/test_*.py",
"**/tests/*.py",
]
@@ -208,4 +332,5 @@ def setup(app):
logger.error(f"Error generating API documentation: {e}", exc_info=True)
import_core_modules()
# Run module verification
verify_modules()

View File

@@ -1,36 +1,81 @@
Pipecat API Reference
=====================
Pipecat API Reference Docs
==========================
Welcome to the Pipecat API reference.
Welcome to Pipecat's API reference documentation!
Use the navigation on the left to browse modules, or search using the search box.
**New to Pipecat?** Check out the `main documentation <https://docs.pipecat.ai>`_ for tutorials, guides, and client SDK information.
Pipecat is an open source framework for building voice and multimodal assistants.
It provides a flexible pipeline architecture for connecting various AI services,
audio processing, and transport layers.
Quick Links
-----------
* `GitHub Repository <https://github.com/pipecat-ai/pipecat>`_
* `Join our Community <https://discord.gg/pipecat>`_
* `Website <https://pipecat.ai>`_
API Reference
-------------
Core Components
~~~~~~~~~~~~~~~
* :mod:`Frames <pipecat.frames>`
* :mod:`Processors <pipecat.processors>`
* :mod:`Pipeline <pipecat.pipeline>`
Audio Processing
~~~~~~~~~~~~~~~~
* :mod:`Audio <pipecat.audio>`
Services
~~~~~~~~
* :mod:`Services <pipecat.services>`
Transport & Serialization
~~~~~~~~~~~~~~~~~~~~~~~~~
* :mod:`Transports <pipecat.transports>`
* :mod:`Local <pipecat.transports.local>`
* :mod:`Network <pipecat.transports.network>`
* :mod:`Services <pipecat.transports.services>`
* :mod:`Serializers <pipecat.serializers>`
Utilities
~~~~~~~~~
* :mod:`Adapters <pipecat.adapters>`
* :mod:`Clocks <pipecat.clocks>`
* :mod:`Metrics <pipecat.metrics>`
* :mod:`Observers <pipecat.observers>`
* :mod:`Sync <pipecat.sync>`
* :mod:`Transcriptions <pipecat.transcriptions>`
* :mod:`Utils <pipecat.utils>`
.. toctree::
:maxdepth: 2
:maxdepth: 3
:caption: API Reference
:hidden:
Adapters <api/pipecat.adapters>
Audio <api/pipecat.audio>
Clocks <api/pipecat.clocks>
Extensions <api/pipecat.extensions>
Frames <api/pipecat.frames>
Metrics <api/pipecat.metrics>
Observers <api/pipecat.observers>
Pipeline <api/pipecat.pipeline>
Processors <api/pipecat.processors>
Runner <api/pipecat.runner>
Serializers <api/pipecat.serializers>
Services <api/pipecat.services>
Sync <api/pipecat.sync>
Transcriptions <api/pipecat.transcriptions>
Transports <api/pipecat.transports>
Utils <api/pipecat.utils>
Utils <api/pipecat.utils>
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

53
docs/api/requirements.txt Normal file
View File

@@ -0,0 +1,53 @@
# Sphinx dependencies
sphinx>=8.1.3
sphinx-rtd-theme
sphinx-markdown-builder
sphinx-autodoc-typehints
toml
# Install all extras individually to ensure they're properly resolved
pipecat-ai[anthropic]
pipecat-ai[assemblyai]
pipecat-ai[aws]
pipecat-ai[azure]
pipecat-ai[cartesia]
pipecat-ai[cerebras]
pipecat-ai[deepseek]
pipecat-ai[daily]
pipecat-ai[deepgram]
pipecat-ai[elevenlabs]
pipecat-ai[fal]
pipecat-ai[fireworks]
pipecat-ai[fish]
pipecat-ai[gladia]
pipecat-ai[google]
pipecat-ai[grok]
pipecat-ai[groq]
# pipecat-ai[krisp] # Mocked
pipecat-ai[koala]
# pipecat-ai[langchain] # Mocked
# pipecat-ai[livekit] # Mocked
pipecat-ai[lmnt]
pipecat-ai[local]
# pipecat-ai[local-smart-turn] # Mocked
# pipecat-ai[mem0] # Mocked
# pipecat-ai[mlx-whisper] # Mocked
# pipecat-ai[moondream] # Mocked
pipecat-ai[nim]
# pipecat-ai[neuphonic] # Mocked
pipecat-ai[noisereduce]
pipecat-ai[openai]
# pipecat-ai[openpipe]
# pipecat-ai[playht] # Mocked due to grpcio conflict with riva
pipecat-ai[qwen]
pipecat-ai[remote-smart-turn]
# pipecat-ai[riva] # Mocked
pipecat-ai[silero]
pipecat-ai[simli]
pipecat-ai[soundfile]
pipecat-ai[tavus]
pipecat-ai[together]
# pipecat-ai[ultravox] # Mocked
# pipecat-ai[webrtc] # Mocked
pipecat-ai[websocket]
pipecat-ai[whisper]

17
docs/architecture.md Normal file
View File

@@ -0,0 +1,17 @@
# Pipecat architecture guide
## Frames
Frames can represent discrete chunks of data, for instance a chunk of text, a chunk of audio, or an image. They can also be used to as control flow, for instance a frame that indicates that there is no more data available, or that a user started or stopped talking. They can also represent more complex data structures, such as a message array used for an LLM completion.
## FrameProcessors
Frame processors operate on frames. Every frame processor implements a `process_frame` method that consumes one frame and produces zero or more frames. Frame processors can do simple transforms, such as concatenating text fragments into sentences, or they can treat frames as input for an AI Service, and emit chat completions based on message arrays or transform text into audio or images.
## Pipelines
Pipelines are lists of frame processors linked together. Frame processors can push frames upstream or downstream to their peers. A very simple pipeline might chain an LLM frame processor to a text-to-speech frame processor, with a transport as an output.
## Transports
Transports provide input and output frame processors to receive or send frames respectively. For example, the `DailyTransport` does this with a WebRTC session joined to a Daily.co room.

46
docs/frame-progress.md Normal file
View File

@@ -0,0 +1,46 @@
# A Frame's Progress
1. A user says “Hello, LLM” and the cloud transcription service delivers a transcription to the Transport.
![A transcript frame arrives](images/frame-progress-01.png)
2. The Transport places a Transcription frame in the Pipelines source queue.
![Frame in source queue](images/frame-progress-02.png)
3. The Pipeline passes the Transcription frame to the first Frame Processor in its list, the LLM User Message Aggregator.
![To UMA](images/frame-progress-03.png)
4. The LLM User Message Aggregator updates the LLM Context with a `{“user”: “Hello LLM”}` message.
![Update context](images/frame-progress-04.png)
5. The LLM User Message Aggregator yields an LLM Message Frame, containing the updated LLM Context. The Pipeline passes this frame to the LLM Frame Processor.
![Update context](images/frame-progress-05.png)
6. The LLM Frame Processor creates a streaming chat completion based on the LLM context and yields the first chunk of a response, Text Frame with the value “Hi, “. The Pipeline passes this frame to the TTS Frame Processor. The TTS Frame Processor aggregates this response but doesnt yield anything, yet, because its waiting for a full sentence.
![LLM yields Text](images/frame-progress-06.png)
7. The LLM Frame Processor yields another Text Frame with the value “there.”. The Pipeline passes this frame to the TTS Frame Processor.
![LLM yields more Text](images/frame-progress-07.png)
8. The TTS Frame Processor now has a full sentence, so it starts streaming audio based on “Hi, there.” It yields the first chunk of streaming audio as an Audio frame, which the Pipeline passes to the LLM Assistant Message Aggregator.
![TTS yields Audio](images/frame-progress-08.png)
9. The LLM Assistant Message Aggregator doesnt do anything with Audio frames, so it immediately yields the frame, unchanged. This is the convention for all Frame Processors: frames that the processor doesnt process should be immediately yielded.
![pass-through](images/frame-progress-09.png)
10. The Pipeline places the first Audio frame in its sink queue, which is being watched by the Transport. Since the frame is now in a queue, the Pipeline can continue processing other frames. Note that the source and sink queues form a sort of “boundary of concurrent processing” between a Pipeline and the outside world. In a Pipeline, Frames are processed sequentially; once a Frame is on a queue it can be processed in parallel with the frames being processed by the Pipeline. TODO: link to a more in-depth section about this.
![sink queue](images/frame-progress-10.png)
11. The TTS Frame Processor yields another Audio frame as the Transport transmits the first Audio frame.
![parallel audio](images/frame-progress-11.png)
12. As before, the LLM Assistant Message Aggregator immediately yields the Audio frame and the Pipeline places the Audio frame in the sink queue.
![sink queue 2](images/frame-progress-12.png)
13. The TTS Frame Processor has no more frames to yield. The LLM Frame Processor emits an LLM Response End Frame, which the Pipeline passes to the TTS Frame Processor.
![response end](images/frame-progress-13.png)
14. The TTS Frame Processor immediately yields the LLM Response End Frame, so the Pipeline passes it along to the LLM Assistant Message Aggregator. The LLM Assistant Message Aggregator updates the LLM Context with the full response from the LLM. TODO TODO: I realized I forgot that the TSS Frame Processor also yields the Text frames that the LLM emitted so that the LLM Assistant Message Aggregator could accumulate them, arrggh.
![response end](images/frame-progress-14.png)
15. The system is quiet, and waiting for the next message from the Transport.
![response end](images/frame-progress-15.png)

110
docs/frame.md Normal file
View File

@@ -0,0 +1,110 @@
# Understanding Different Frame Types in the Pipecat System
In the Pipecat system, frames are used to represent different types of data and control signals that flow through the pipeline. Understanding these frame types is crucial for working with the system effectively. This tutorial will cover the main categories of frames and their specific uses.
## 1. Base Frame Classes
### Frame
The `Frame` class is the base class for all frames. It includes:
- `id`: A unique identifier
- `name`: A descriptive name
- `pts`: Presentation timestamp (optional)
### DataFrame
`DataFrame` is a subclass of `Frame` and serves as a base for most data-carrying frames.
## 2. Audio Frames
### AudioRawFrame
Represents a chunk of audio with properties:
- `audio`: Raw audio data
- `sample_rate`: Audio sample rate
- `num_channels`: Number of audio channels
Subclasses include:
- `InputAudioRawFrame`: For audio from input sources
- `OutputAudioRawFrame`: For audio to be played by output devices
- `TTSAudioRawFrame`: For audio generated by Text-to-Speech services
## 3. Image Frames
### ImageRawFrame
Represents an image with properties:
- `image`: Raw image data
- `size`: Image dimensions
- `format`: Image format (e.g., JPEG, PNG)
Subclasses include:
- `InputImageRawFrame`: For images from input sources
- `OutputImageRawFrame`: For images to be displayed
- `UserImageRawFrame`: For images associated with a specific user
- `VisionImageRawFrame`: For images with associated text for description
- `URLImageRawFrame`: For images with an associated URL
### SpriteFrame
Represents an animated sprite, containing a list of `ImageRawFrame` objects.
## 4. Text and Transcription Frames
### TextFrame
Represents a chunk of text, used for various purposes in the pipeline.
### TranscriptionFrame
A specialized `TextFrame` for speech transcriptions, including:
- `user_id`: ID of the speaking user
- `timestamp`: When the transcription was generated
- `language`: Detected language of the speech
### InterimTranscriptionFrame
Similar to `TranscriptionFrame`, but for interim (not final) transcriptions.
## 5. LLM (Language Model) Frames
### LLMMessagesFrame
Contains a list of messages for an LLM service to process.
### LLMMessagesAppendFrame and LLMMessagesUpdateFrame
Used to modify the current context of LLM messages.
### LLMSetToolsFrame
Specifies tools (functions) available for the LLM to use.
### LLMEnablePromptCachingFrame
Controls prompt caching in certain LLMs.
## 6. System and Control Frames
### SystemFrame
Base class for system-level frames.
Important system frames include:
- `StartFrame`: Initiates a pipeline
- `CancelFrame`: Stops a pipeline immediately
- `ErrorFrame`: Notifies of errors (with `FatalErrorFrame` for unrecoverable errors)
- `EndTaskFrame` and `CancelTaskFrame`: Control pipeline tasks
- `StartInterruptionFrame` and `StopInterruptionFrame`: Indicate user speech for interruptions
### ControlFrame
Base class for control-flow frames.
Notable control frames:
- `EndFrame`: Signals the end of a pipeline
- `LLMFullResponseStartFrame` and `LLMFullResponseEndFrame`: Bracket LLM responses
- `UserStartedSpeakingFrame` and `UserStoppedSpeakingFrame`: Indicate user speech activity
- `BotStartedSpeakingFrame` and `BotStoppedSpeakingFrame`: Indicate bot speech activity
- `TTSStartedFrame` and `TTSStoppedFrame`: Bracket Text-to-Speech responses
## 7. Special Purpose Frames
### MetricsFrame
Contains performance metrics data.
### FunctionCallInProgressFrame and FunctionCallResultFrame
Used for handling LLM function (tool) calls.
### ServiceUpdateSettingsFrame
Base class for updating service settings, with specific subclasses for LLM, TTS, and STT services.
## Conclusion
Understanding these frame types is essential for working with the Pipecat system. Each frame type serves a specific purpose in the pipeline, whether it's carrying data (like audio or images), controlling the flow of the pipeline, or managing system-level operations. By using the appropriate frame types, you can effectively process and transmit various kinds of information through your pipeline.

Binary file not shown.

After

Width:  |  Height:  |  Size: 98 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 91 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 92 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 92 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 98 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 94 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 94 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 95 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 94 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 96 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 110 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 102 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 111 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 117 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 98 KiB

103
dot-env.template Normal file
View File

@@ -0,0 +1,103 @@
# Anthropic
ANTHROPIC_API_KEY=...
# AWS
AWS_SECRET_ACCESS_KEY=...
AWS_ACCESS_KEY_ID=...
AWS_REGION=...
# Azure
AZURE_SPEECH_REGION=...
AZURE_SPEECH_API_KEY=...
AZURE_CHATGPT_API_KEY=...
AZURE_CHATGPT_ENDPOINT=https://...
AZURE_CHATGPT_MODEL=...
AZURE_DALLE_API_KEY=...
AZURE_DALLE_ENDPOINT=https://...
AZURE_DALLE_MODEL=...
# Cartesia
CARTESIA_API_KEY=...
# Daily
DAILY_API_KEY=...
DAILY_SAMPLE_ROOM_URL=https://...
# ElevenLabs
ELEVENLABS_API_KEY=...
ELEVENLABS_VOICE_ID=...
# Neuphonic
NEUPHONIC_API_KEY=...
# Fal
FAL_KEY=...
# Fireworks
FIREWORKS_API_KEY=...
# Gladia
GLADIA_API_KEY=...
# LMNT
LMNT_API_KEY=...
LMNT_VOICE_ID=...
# PlayHT
PLAY_HT_USER_ID=...
PLAY_HT_API_KEY=...
# OpenAI
OPENAI_API_KEY=...
# OpenPipe
OPENPIPE_API_KEY=...
# Tavus
TAVUS_API_KEY=...
TAVUS_REPLICA_ID=...
TAVUS_PERSONA_ID=...
# Simli
SIMLI_API_KEY=...
SIMLI_FACE_ID=...
# Krisp
KRISP_MODEL_PATH=...
# DeepSeek
DEEPSEEK_API_KEY=...
# Groq
GROQ_API_KEY=...
# Grok
GROK_API_KEY=...
# Together.ai
TOGETHER_API_KEY=...
# Cerebras
CEREBRAS_API_KEY=...
# Fish Audio
FISH_API_KEY=...
# Assembly AI
ASSEMBLYAI_API_KEY=...
# OpenRouter
OPENROUTER_API_KEY=...
# Piper
PIPER_BASE_URL=...
# Smart turn
LOCAL_SMART_TURN_MODEL_PATH=
FAL_SMART_TURN_API_KEY=...
# Twilio
TWILIO_ACCOUNT_SID=
TWILIO_AUTH_TOKEN=

View File

@@ -1,193 +0,0 @@
# AI-COUSTICS
AICOUSTICS_LICENSE_KEY=...
# Anthropic
ANTHROPIC_API_KEY=...
# Assembly AI
ASSEMBLYAI_API_KEY=...
# Async
ASYNCAI_API_KEY=...
ASYNCAI_VOICE_ID=...
# AWS
AWS_SECRET_ACCESS_KEY=...
AWS_ACCESS_KEY_ID=...
AWS_REGION=...
# Azure
AZURE_SPEECH_REGION=...
AZURE_SPEECH_API_KEY=...
AZURE_CHATGPT_API_KEY=...
AZURE_CHATGPT_ENDPOINT=https://...
AZURE_CHATGPT_MODEL=...
AZURE_REALTIME_API_KEY=...
AZURE_REALTIME_BASE_URL=...
AZURE_DALLE_API_KEY=...
AZURE_DALLE_ENDPOINT=https://...
AZURE_DALLE_MODEL=...
# Cartesia
CARTESIA_API_KEY=...
CARTESIA_VOICE_ID=...
# Cerebras
CEREBRAS_API_KEY=...
# Daily
DAILY_API_KEY=...
DAILY_SAMPLE_ROOM_URL=https://...
# Deepgram
DEEPGRAM_API_KEY=...
# DeepSeek
DEEPSEEK_API_KEY=...
# ElevenLabs
ELEVENLABS_API_KEY=...
ELEVENLABS_VOICE_ID=...
# Fal
FAL_KEY=...
# Fireworks
FIREWORKS_API_KEY=...
# Fish Audio
FISH_API_KEY=...
# Gladia
GLADIA_API_KEY=...
GLADIA_REGION=...
# Google
GOOGLE_API_KEY=...
GOOGLE_VERTEX_TEST_CREDENTIALS=...
GOOGLE_CLOUD_PROJECT_ID=...
GOOGLE_CLOUD_LOCATION=...
GOOGLE_TEST_CREDENTIALS=...
# Grok
GROK_API_KEY=...
# Groq
GROQ_API_KEY=...
# Heygen
HEYGEN_API_KEY=...
# Hume
HUME_API_KEY=...
HUME_VOICE_ID=...
# Inworld
INWORLD_API_KEY=...
# Krisp
KRISP_MODEL_PATH=...
# Krisp Viva
KRISP_VIVA_MODEL_PATH=...
# LiveKit
LIVEKIT_API_KEY=...
LIVEKIT_API_SECRET=...
# LMNT
LMNT_API_KEY=...
LMNT_VOICE_ID=...
# MiniMax
MINIMAX_API_KEY=...
MINIMAX_GROUP_ID=...
# Mistral
MISTRAL_API_KEY=...
# Neuphonic
NEUPHONIC_API_KEY=...
# NVIDIA
NVIDIA_API_KEY=...
# OpenAI
OPENAI_API_KEY=...
# OpenPipe
OPENPIPE_API_KEY=...
# OpenRouter
OPENROUTER_API_KEY=...
# Perplexity
PERPLEXITY_API_KEY=...
# Picovoice Koala
KOALA_ACCESS_KEY=...
# Piper
PIPER_BASE_URL=...
# PlayHT
PLAYHT_USER_ID=...
PLAYHT_API_KEY=...
# Plivo
PLIVO_AUTH_ID=...
PLIVO_AUTH_TOKEN=...
# Qwen
QWEN_API_KEY=...
# Rime
RIME_API_KEY=...
RIME_VOICE_ID=...
# SambaNova
SAMBANOVA_API_KEY=...
# Sarvam AI
SARVAM_API_KEY=...
# Sentry
SENTRY_DSN=...
# Simli
SIMLI_API_KEY=...
SIMLI_FACE_ID=...
# Smart turn
LOCAL_SMART_TURN_MODEL_PATH=...
FAL_SMART_TURN_API_KEY=...
# Soniox
SONIOX_API_KEY=...
# Speechmatics
SPEECHMATICS_API_KEY=...
# Tavus
TAVUS_API_KEY=...
TAVUS_REPLICA_ID=...
# Telnyx
TELNYX_API_KEY=...
TELNYX_ACCOUNT_SID=...
# Together.ai
TOGETHER_API_KEY=...
# Twilio
TWILIO_ACCOUNT_SID=...
TWILIO_AUTH_TOKEN=...
# WhatsApp
WHATSAPP_TOKEN=...
WHATSAPP_WEBHOOK_VERIFICATION_TOKEN=...
WHATSAPP_PHONE_NUMBER_ID=...
WHATSAPP_APP_SECRET=...

View File

@@ -1,31 +1,88 @@
# Pipecat Examples
This directory contains examples to help you learn how to build with Pipecat.
## Getting Started
# Pipecat &mdash; Examples
New to Pipecat? Start here:
## Foundational snippets
Small snippets that build on each other, introducing one or two concepts at a time.
- **[Quickstart](quickstart/)** - Get your first voice AI bot running in 5 minutes _(coming soon)_
- **[Client/Server Web](client-server-web/)** - Learn to build web applications with Pipecat's client SDKs _(coming soon)_
- **[Phone Bot with Twilio](phone-bot-twilio/)** - Connect your bot to a phone number _(coming soon)_
➡️ [Take a look](https://github.com/pipecat-ai/pipecat/tree/main/examples/foundational)
## Foundational Examples
## Chatbot examples
Collection of self-contained real-time voice and video AI demo applications built with Pipecat.
Single-file examples that introduce core Pipecat concepts one at a time. These examples:
### Quickstart
- Build on each other progressively
- Focus on specific features or integrations
- Are used for testing with every Pipecat release
Each project has its own set of dependencies and configuration variables. They intentionally avoids shared code across projects &mdash; you can grab whichever demo folder you want to work with as a starting point.
See the **[Foundational Examples README](foundational/)** for the complete list.
We recommend you start with a virtual environment:
## More Advanced Examples
```shell
cd pipecat-ai/examples/simple-chatbot
Ready to explore complex use cases? Visit **[pipecat-examples](https://github.com/pipecat-ai/pipecat-examples)** for:
python -m venv venv
- Production-ready applications
- Multi-platform client implementations
- Telephony integrations
- Multimodal and creative applications
- Deployment and monitoring examples
source venv/bin/activate
pip install -r requirements.txt
```
Next, follow the steps in the README for each demo.
Make sure you `pip install -r requirements.txt` for each demo project, so you can be sure to have the necessary service dependencies that extend the functionality of Pipecat. You can read more about the framework architecture [here](https://github.com/pipecat-ai/pipecat/tree/main/docs).
## Projects:
| Project | Description | Services |
|----------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------|
| [Simple Chatbot](simple-chatbot) | Basic voice-driven conversational bot. A good starting point for learning the flow of the framework. | Deepgram, ElevenLabs, OpenAI, Daily, Daily Prebuilt UI |
| [Storytelling Chatbot](storytelling-chatbot) | Stitches together multiple third-party services to create a collaborative storytime experience. | Deepgram, ElevenLabs, OpenAI, Fal, Daily, Custom UI |
| [Translation Chatbot](translation-chatbot) | Listens for user speech, then translates that speech to Spanish and speaks the translation back. Demonstrates multi-participant use-cases. | Deepgram, Azure, OpenAI, Daily, Daily Prebuilt UI |
| [Moondream Chatbot](moondream-chatbot) | Demonstrates how to add vision capabilities to GPT4. **Note: works best with a GPU** | Deepgram, ElevenLabs, OpenAI, Moondream, Daily, Daily Prebuilt UI |
| [Patient intake](patient-intake) | A chatbot that can call functions in response to user input. | Deepgram, ElevenLabs, OpenAI, Daily, Daily Prebuilt UI |
| [Phone Chatbot](phone-chatbot) | A chatbot that connects to PSTN/SIP phone calls, powered by Daily or Twilio. | Deepgram, ElevenLabs, OpenAI, Daily, Twilio |
| [Twilio Chatbot](twilio-chatbot) | A chatbot that connects to an incoming phone call from Twilio. | Deepgram, ElevenLabs, OpenAI, Daily, Twilio |
| [studypal](studypal) | A chatbot to have a conversation about any article on the web | |
| [WebSocket Chatbot Server](websocket-server) | A real-time websocket server that handles audio streaming and bot interactions with speech-to-text and text-to-speech capabilities. | Cartesia, Deepgram, OpenAI, Websockets |
> [!IMPORTANT]
> These example projects use Daily as a WebRTC transport and can be joined using their hosted Prebuilt UI.
> It provides a quick way to join a real-time session with your bot and test your ideas without building any frontend code. If you'd like to see an example of a custom UI, try Storybot.
## FAQ
### Deployment
For each of these demos we've included a `Dockerfile`. Out of the box, this should provide everything needed to get the respective demo running on a VM:
```shell
docker build username/app:tag .
docker run -p 7860:7860 --env-file ./.env username/app:tag
docker push ...
```
### SSL
If you're working with a custom UI (such as with the Storytelling Chatbot), it's important to ensure your deployment platform supports HTTPS, as accessing user devices such as mics and webcams requires SSL.
If you try to run a custom UI without SSL, you may see an error in the console telling you that `navigator` is undefined, or no devices are available.
### Are these examples production ready?
Yes, kind of.
These demos attempt to keep things simple and are unopinionated regarding environment or scalability.
We're using FastAPI to spawn a subprocess for the bots / agents &mdash; useful for small tests, but not so great for production grade apps with many concurrent users. You can see how this works in each project's `start` endpoint in `server.py`.
Creating virtualized worker pools and on-demand instances is out of scope for these examples, but we hope to add some examples to this repo soon!
For projects that have CUDA as a requirement, such as Moondream Chatbot, be sure to deploy to a GPU-powered platform (such as [fly.io](https://fly.io) or [Runpod](https://runpod.io).)
## Getting help
➡️ [Join our Discord](https://discord.gg/pipecat)
➡️ [Reach us on Twitter](https://x.com/pipecat_ai)

View File

@@ -0,0 +1,45 @@
# Bot ready signaling
A simple Pipecat example demonstrating how to handle signaling between the client and the bot,
ensuring that the bot starts sending audio only when the client is available,
thereby avoiding the risk of cutting off the beginning of the audio.
## Quick Start
### First, start the bot server:
1. Navigate to the server directory:
```bash
cd server
```
2. Create and activate a virtual environment:
```bash
python3 -m venv venv
source venv/bin/activate # On Windows: venv\Scripts\activate
```
3. Install requirements:
```bash
pip install -r requirements.txt
```
4. Copy env.example to .env and configure:
- Add your API keys
5. Start the server:
```bash
python server.py
```
### Next, connect using the client app:
For client-side setup, refer to the [JavaScript Guide](client/javascript/README.md).
## Important Note
Ensure the bot server is running before using any client implementations.
## Requirements
- Python 3.10+
- Node.js 16+ (for JavaScript)
- Daily API key
- Cartesia API key
- Modern web browser with WebRTC support

View File

@@ -0,0 +1,27 @@
# JavaScript Implementation
Basic implementation using the [Pipecat JavaScript SDK](https://docs.pipecat.ai/client/js/introduction).
## Setup
1. Run the bot server. See the [server README](../../README).
2. Navigate to the `client/javascript` directory:
```bash
cd client/javascript
```
3. Install dependencies:
```bash
npm install
```
4. Run the client app:
```
npm run dev
```
5. Visit http://localhost:5173 in your browser.

View File

@@ -0,0 +1,34 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>AI Chatbot</title>
</head>
<body>
<div class="container">
<div class="status-bar">
<div class="status">
Status: <span id="connection-status">Disconnected</span>
</div>
<div class="controls">
<button id="connect-btn">Connect</button>
<button id="disconnect-btn" disabled>Disconnect</button>
</div>
</div>
<audio id="bot-audio" autoplay></audio>
<div class="debug-panel">
<h3>Debug Info</h3>
<div id="debug-log"></div>
</div>
</div>
<script type="module" src="/src/app.js"></script>
<link rel="stylesheet" href="/src/style.css">
</body>
</html>

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,20 @@
{
"name": "client",
"version": "1.0.0",
"main": "index.js",
"scripts": {
"dev": "vite",
"build": "vite build",
"preview": "vite preview"
},
"keywords": [],
"author": "",
"license": "ISC",
"description": "",
"devDependencies": {
"vite": "^6.0.9"
},
"dependencies": {
"@daily-co/daily-js": "0.74.0"
}
}

View File

@@ -0,0 +1,216 @@
/**
* Copyright (c) 20242025, Daily
*
* SPDX-License-Identifier: BSD 2-Clause License
*/
import Daily from "@daily-co/daily-js";
/**
* ChatbotClient handles the connection and media management for a real-time
* voice interaction with an AI bot.
*/
class ChatbotClient {
constructor() {
// Initialize client state
this.dailyCallObject = null;
this.setupDOMElements();
this.setupEventListeners();
}
/**
* Set up references to DOM elements and create necessary media elements
*/
setupDOMElements() {
// Get references to UI control elements
this.connectBtn = document.getElementById('connect-btn');
this.disconnectBtn = document.getElementById('disconnect-btn');
this.statusSpan = document.getElementById('connection-status');
this.debugLog = document.getElementById('debug-log');
// Create an audio element for bot's voice output
this.botAudio = document.createElement('audio');
this.botAudio.autoplay = true;
this.botAudio.playsInline = true;
document.body.appendChild(this.botAudio);
}
/**
* Set up event listeners for connect/disconnect buttons
*/
setupEventListeners() {
this.connectBtn.addEventListener('click', () => this.connect());
this.disconnectBtn.addEventListener('click', () => this.disconnect());
}
/**
* Add a timestamped message to the debug log
*/
log(message) {
const entry = document.createElement('div');
entry.textContent = `${new Date().toISOString()} - ${message}`;
// Add styling based on message type
if (message.startsWith('User: ')) {
entry.style.color = '#2196F3'; // blue for user
} else if (message.startsWith('Bot: ')) {
entry.style.color = '#4CAF50'; // green for bot
}
this.debugLog.appendChild(entry);
this.debugLog.scrollTop = this.debugLog.scrollHeight;
console.log(message);
}
/**
* Update the connection status display
*/
updateStatus(status) {
this.statusSpan.textContent = status;
this.log(`Status: ${status}`);
}
handleEventToConsole (evt) {
this.log(`Received event: ${evt.action}`);
};
/**
* Set up listeners for track events (start/stop)
* This handles new tracks being added during the session
*/
setupTrackListeners() {
if (!this.dailyCallObject) return;
this.dailyCallObject.on("joined-meeting", () => {
this.updateStatus('Connected');
this.connectBtn.disabled = true;
this.disconnectBtn.disabled = false;
this.log('Client connected');
});
this.dailyCallObject.on("track-started", (evt) => {
if (evt.track.kind === "audio" && evt.participant.local === false) {
this.log("Audio track started.")
this.setupAudioTrack(evt.track);
}
});
this.dailyCallObject.on("track-stopped", this.handleEventToConsole.bind(this));
this.dailyCallObject.on("participant-joined", this.handleEventToConsole.bind(this));
this.dailyCallObject.on("participant-updated", this.handleEventToConsole.bind(this));
this.dailyCallObject.on("participant-left", () => {
// When the bot leaves, we are also disconnecting from the call
this.disconnect()
});
this.dailyCallObject.on("left-meeting", () => {
this.updateStatus('Disconnected');
this.connectBtn.disabled = false;
this.disconnectBtn.disabled = true;
this.log('Client disconnected');
});
this.dailyCallObject.on("error", this.handleEventToConsole.bind(this));
}
/**
* Set up an audio track for playback
* Handles both initial setup and track updates
*/
setupAudioTrack(track) {
this.log(`Setting up audio track, track state: ${track.readyState}, muted: ${track.muted}`);
// Check if we're already playing this track
if (this.botAudio.srcObject) {
const oldTrack = this.botAudio.srcObject.getAudioTracks()[0];
if (oldTrack?.id === track.id) return;
}
// Create a new MediaStream with the track and set it as the audio source
this.botAudio.srcObject = new MediaStream([track]);
this.botAudio.onplaying = async (event) => {
this.log("onplaying")
this.log("Will send the audio message to play the audio at the next tick")
this.dailyCallObject.sendAppMessage("playable")
}
}
async fetchRoomInfo() {
let connectUrl = '/connect'
let res = await fetch(connectUrl, {
method: "POST",
mode: "cors",
headers: new Headers({
"Content-Type": "application/json"
}),
})
if (res.ok) {
return res.json();
}
}
/**
* Initialize and connect to the bot
* This sets up the RTVI client, initializes devices, and establishes the connection
*/
async connect() {
try {
// Initialize the client
this.dailyCallObject = Daily.createCallObject({
subscribeToTracksAutomatically: true,
});
// Set up listeners for media track events
this.setupTrackListeners();
this.log('Creating the bot...');
let roomInfo = await this.fetchRoomInfo()
// Connect to the bot
this.log('Connecting to bot...');
// Only for making debugger easier
window.callObject = this.dailyCallObject;
await this.dailyCallObject.join({
url: roomInfo.room_url,
});
this.log('Connection complete');
} catch (error) {
// Handle any errors during connection
this.log(`Error connecting: ${error.message}`);
this.log(`Error stack: ${error.stack}`);
this.updateStatus('Error');
// Clean up if there's an error
if (this.dailyCallObject) {
try {
await this.dailyCallObject.leave();
} catch (disconnectError) {
this.log(`Error during disconnect: ${disconnectError.message}`);
}
}
}
}
/**
* Disconnect from the bot and clean up media resources
*/
async disconnect() {
if (this.dailyCallObject) {
try {
// Disconnect the RTVI client
await this.dailyCallObject.leave();
await this.dailyCallObject.destroy();
this.dailyCallObject = null;
// Clean up audio
if (this.botAudio.srcObject) {
this.botAudio.srcObject.getTracks().forEach((track) => track.stop());
this.botAudio.srcObject = null;
}
} catch (error) {
this.log(`Error disconnecting: ${error.message}`);
}
}
}
}
// Initialize the client when the page loads
window.addEventListener('DOMContentLoaded', () => {
new ChatbotClient();
});

View File

@@ -0,0 +1,98 @@
body {
margin: 0;
padding: 20px;
font-family: Arial, sans-serif;
background-color: #f0f0f0;
}
.container {
max-width: 1200px;
margin: 0 auto;
}
.status-bar {
display: flex;
justify-content: space-between;
align-items: center;
padding: 10px;
background-color: #fff;
border-radius: 8px;
margin-bottom: 20px;
}
.controls button {
padding: 8px 16px;
margin-left: 10px;
border: none;
border-radius: 4px;
cursor: pointer;
}
#connect-btn {
background-color: #4caf50;
color: white;
}
#disconnect-btn {
background-color: #f44336;
color: white;
}
button:disabled {
opacity: 0.5;
cursor: not-allowed;
}
.main-content {
background-color: #fff;
border-radius: 8px;
padding: 20px;
margin-bottom: 20px;
}
.bot-container {
display: flex;
flex-direction: column;
align-items: center;
}
#bot-video-container {
width: 640px;
height: 360px;
background-color: #e0e0e0;
border-radius: 8px;
margin: 20px auto;
overflow: hidden;
display: flex;
align-items: center;
justify-content: center;
}
#bot-video-container video {
width: 100%;
height: 100%;
object-fit: cover;
}
.debug-panel {
background-color: #fff;
border-radius: 8px;
padding: 20px;
}
.debug-panel h3 {
margin: 0 0 10px 0;
font-size: 16px;
font-weight: bold;
}
#debug-log {
height: 200px;
overflow-y: auto;
background-color: #f8f8f8;
padding: 10px;
border-radius: 4px;
font-family: monospace;
font-size: 12px;
line-height: 1.4;
}

View File

@@ -0,0 +1,13 @@
import { defineConfig } from 'vite';
export default defineConfig({
server: {
proxy: {
// Proxy /api requests to the backend server
'/connect': {
target: 'http://0.0.0.0:7860', // Replace with your backend URL
changeOrigin: true,
},
},
},
});

View File

@@ -0,0 +1 @@
22.14

View File

@@ -0,0 +1,60 @@
# React Native Implementation
Basic implementation using the [Pipecat React Native SDK](https://docs.pipecat.ai/client/react-native/introduction).
## Usage
### Expo requirements
This project cannot be used with an [Expo Go](https://docs.expo.dev/workflow/expo-go/) app because [it requires custom native code](https://docs.expo.io/workflow/customizing/).
When a project requires custom native code or a config plugin, we need to transition from using [Expo Go](https://docs.expo.dev/workflow/expo-go/)
to a [development build](https://docs.expo.dev/development/introduction/).
More details about the custom native code used by this demo can be found in [rn-daily-js-expo-config-plugin](https://github.com/daily-co/rn-daily-js-expo-config-plugin).
### Building remotely
If you do not have experience with Xcode and Android Studio builds or do not have them installed locally on your computer, you will need to follow [this guide from Expo to use EAS Build](https://docs.expo.dev/development/create-development-builds/#create-and-install-eas-build).
### Building locally
You will need to have installed locally on your computer:
- [Xcode](https://developer.apple.com/xcode/) to build for iOS;
- [Android Studio](https://developer.android.com/studio) to build for Android;
#### Install the demo dependencies
```bash
# Use the version of node specified in .nvmrc
nvm i
# Install dependencies
npm i
# Before a native app can be compiled, the native source code must be generated.
npx expo prebuild
# Configure the environment variable to connect to the local server
cp env.example .env
# edit .env and add your local ip address, for example: http://192.168.1.16:7860
```
#### Running on Android
After plugging in an Android device [configured for debugging](https://developer.android.com/studio/debug/dev-options), run the following command:
```
npm run android
```
#### Running on iOS
Run the following command:
```
npm run ios
```
#### Connect to the server
Use the http://localhost:5173 in your app.

View File

@@ -0,0 +1,75 @@
{
"expo": {
"name": "bot-ready-rn",
"slug": "bot-ready-rn",
"version": "1.0.0",
"orientation": "portrait",
"icon": "./assets/icon.png",
"userInterfaceStyle": "light",
"splash": {
"image": "./assets/splash.png",
"resizeMode": "contain",
"backgroundColor": "#ffffff"
},
"updates": {
"fallbackToCacheTimeout": 0
},
"assetBundlePatterns": [
"**/*"
],
"ios": {
"supportsTablet": true,
"bitcode": false,
"bundleIdentifier": "co.daily.expo.BotReady",
"infoPlist": {
"UIBackgroundModes": [
"voip"
]
},
"appleTeamId": "EEBGKV9N3N"
},
"android": {
"adaptiveIcon": {
"foregroundImage": "./assets/adaptive-icon.png",
"backgroundColor": "#FFFFFF"
},
"package": "co.daily.expo.BotReady",
"permissions": [
"android.permission.ACCESS_NETWORK_STATE",
"android.permission.BLUETOOTH",
"android.permission.CAMERA",
"android.permission.INTERNET",
"android.permission.MODIFY_AUDIO_SETTINGS",
"android.permission.RECORD_AUDIO",
"android.permission.SYSTEM_ALERT_WINDOW",
"android.permission.WAKE_LOCK",
"android.permission.FOREGROUND_SERVICE",
"android.permission.FOREGROUND_SERVICE_CAMERA",
"android.permission.FOREGROUND_SERVICE_MICROPHONE",
"android.permission.FOREGROUND_SERVICE_MEDIA_PROJECTION",
"android.permission.POST_NOTIFICATIONS"
]
},
"web": {
"favicon": "./assets/favicon.png"
},
"plugins": [
"@config-plugins/react-native-webrtc",
"@daily-co/config-plugin-rn-daily-js",
[
"expo-build-properties",
{
"android": {
"minSdkVersion": 24,
"compileSdkVersion": 35,
"targetSdkVersion": 34,
"buildToolsVersion": "35.0.0"
},
"ios": {
"deploymentTarget": "15.1"
}
}
]
]
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

View File

@@ -0,0 +1,7 @@
module.exports = function(api) {
api.cache(true);
return {
presets: ['babel-preset-expo'],
plugins: [["module:react-native-dotenv"]],
};
};

View File

@@ -0,0 +1 @@
API_BASE_URL=http://YOUR_LOCAL_IP:7860

View File

@@ -0,0 +1,7 @@
import { registerRootComponent } from "expo";
import App from "./src/App";
// registerRootComponent calls AppRegistry.registerComponent('main', () => App);
// It also ensures that the environment is set up appropriately
registerRootComponent(App);

View File

@@ -0,0 +1,4 @@
// Learn more https://docs.expo.io/guides/customizing-metro
const { getDefaultConfig } = require('expo/metro-config');
module.exports = getDefaultConfig(__dirname);

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,31 @@
{
"name": "bot-ready-rn",
"version": "1.0.0",
"scripts": {
"start": "expo start --dev-client",
"android": "expo run:android --device",
"ios": "expo run:ios --device",
"web": "expo start --web"
},
"dependencies": {
"@config-plugins/react-native-webrtc": "^10.0.0",
"@daily-co/config-plugin-rn-daily-js": "0.0.7",
"@daily-co/react-native-daily-js": "^0.70.0",
"@daily-co/react-native-webrtc": "^118.0.3-daily.2",
"@react-native-async-storage/async-storage": "1.23.1",
"expo": "^52.0.0",
"expo-build-properties": "~0.13.1",
"expo-dev-client": "~5.0.5",
"expo-splash-screen": "~0.29.16",
"expo-status-bar": "~2.0.0",
"react": "18.3.1",
"react-native": "0.76.3",
"react-native-background-timer": "^2.4.1",
"react-native-dotenv": "^3.4.11",
"react-native-get-random-values": "^1.11.0"
},
"devDependencies": {
"@babel/core": "^7.12.9"
},
"private": true
}

View File

@@ -0,0 +1,121 @@
import React, { useState, useEffect } from 'react';
import {SafeAreaView, View, Text, Button, StyleSheet, ScrollView} from 'react-native';
import Daily from "@daily-co/react-native-daily-js";
import { API_BASE_URL } from "@env";
const CallScreen = () => {
const [connectionStatus, setConnectionStatus] = useState('Disconnected');
const [isConnected, setIsConnected] = useState(false);
const [callObject, setCallObject] = useState(null);
const [logs, setLogs] = useState([]);
useEffect(() => {
if (callObject) {
setupTrackListeners(callObject);
}
}, [callObject]);
const log = (message) => {
setLogs((prevLogs) => [...prevLogs, `${new Date().toISOString()} - ${message}`]);
console.log(message);
};
const setupTrackListeners = (callObject) => {
callObject.on("joined-meeting", () => {
setConnectionStatus('Connected');
setIsConnected(true);
log('Client connected');
});
callObject.on("left-meeting", () => {
setConnectionStatus('Disconnected');
setIsConnected(false);
log('Client disconnected');
});
callObject.on("participant-left", () => {
// When the bot leaves, we are also disconnecting from the call
disconnect().catch((err) => {
log(`Failed to disconnect ${err}`);
})
});
// Trigger so the bot can start sending audio
callObject.on("track-started", (evt) => {
if (evt.track.kind === "audio" && evt.participant.local === false) {
handleEventToConsole(evt)
log("Sending the message that will trigger the bot to play the audio.")
callObject.sendAppMessage("playable")
}
});
callObject.on("error", (evt) => log(`Error: ${evt.error}`));
// Other events just for awareness
callObject.on("track-stopped", handleEventToConsole);
callObject.on("participant-joined", handleEventToConsole);
callObject.on("participant-updated", handleEventToConsole);
};
const handleEventToConsole = (evt) => {
log(`Received event: ${evt.action}`);
};
const connect = async () => {
try {
const callObject = Daily.createCallObject({ subscribeToTracksAutomatically: true });
setCallObject(callObject);
const connectionUrl = `${API_BASE_URL}/connect`
const res = await fetch(connectionUrl, { method: "POST", headers: { "Content-Type": "application/json" } });
const roomInfo = await res.json();
await callObject.join({ url: roomInfo.room_url });
} catch (error) {
log(`Error connecting: ${error.message}`);
}
};
const disconnect = async () => {
if (callObject) {
try {
await callObject.leave();
await callObject.destroy();
setCallObject(null);
} catch (error) {
log(`Error disconnecting: ${error.message}`);
}
}
};
return (
<SafeAreaView style={styles.safeArea}>
<View style={styles.container}>
<View style={styles.statusBar}>
<Text>Status: <Text style={styles.status}>{connectionStatus}</Text></Text>
<View style={styles.controls}>
<Button
title={isConnected ? "Disconnect" : "Connect"}
onPress={isConnected ? disconnect : connect}
/>
</View>
</View>
<View style={styles.debugPanel}>
<Text style={styles.debugTitle}>Debug Info</Text>
<ScrollView style={styles.debugLog}>
{logs.map((logEntry, index) => (
<Text key={index} style={styles.logText}>{logEntry}</Text>
))}
</ScrollView>
</View>
</View>
</SafeAreaView>
);
};
const styles = StyleSheet.create({
safeArea: { flex: 1, backgroundColor: '#f0f0f0', padding: 20 },
container: { flex: 1, margin: 20 },
statusBar: { flexDirection: 'row', justifyContent: 'space-between', alignItems: 'center', padding: 10, backgroundColor: '#fff', borderRadius: 8, marginBottom: 20 },
status: { fontWeight: 'bold' },
controls: { flexDirection: 'row', gap: 10 },
debugPanel: { height: '80%', backgroundColor: '#fff', borderRadius: 8, padding: 20},
debugTitle: { fontSize: 16, fontWeight: 'bold' },
debugLog: { height: '100%', overflow: 'scroll', backgroundColor: '#f8f8f8', padding: 10, borderRadius: 4, fontFamily: 'monospace', fontSize: 12, lineHeight: 1.4 },
});
export default CallScreen;

View File

@@ -0,0 +1,50 @@
# Bot ready signaling Server
A FastAPI server that manages bot instances and provide endpoint for Pipecat client connections.
## Endpoints
- `POST /connect` - Pipecat client connection endpoint
## Environment Variables
Copy `env.example` to `.env` and configure:
```ini
# Required API Keys
DAILY_API_KEY= # Your Daily API key
CARTESIA_API_KEY= # Your Cartesia API key
# Optional Configuration
DAILY_API_URL= # Optional: Daily API URL (defaults to https://api.daily.co/v1)
DAILY_SAMPLE_ROOM_URL= # Optional: Fixed room URL for development
HOST= # Optional: Host address (defaults to 0.0.0.0)
FAST_API_PORT= # Optional: Port number (defaults to 7860)
```
## Running the Server
Set up and activate your virtual environment:
```bash
python3 -m venv venv
source venv/bin/activate # On Windows: venv\Scripts\activate
```
Install dependencies:
```bash
pip install -r requirements.txt
```
If you want to use the local version of `pipecat` in this repo rather than the last published version, also run:
```bash
pip install --editable "../../../[daily,cartesia,openai]"
```
Run the server:
```bash
python server.py
```

View File

@@ -0,0 +1,3 @@
DAILY_SAMPLE_ROOM_URL=https://yourdomain.daily.co/yourroom # (for joining the bot to the same room repeatedly for local dev)
DAILY_API_KEY=
CARTESIA_API_KEY=

View File

@@ -0,0 +1,4 @@
python-dotenv
fastapi[all]
uvicorn
pipecat-ai[daily,cartesia,openai]

View File

@@ -0,0 +1,64 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import os
from typing import Optional
import aiohttp
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
async def configure(aiohttp_session: aiohttp.ClientSession):
(url, token, _) = await configure_with_args(aiohttp_session)
return (url, token)
async def configure_with_args(
aiohttp_session: aiohttp.ClientSession, parser: Optional[argparse.ArgumentParser] = None
):
if not parser:
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
parser.add_argument(
"-u", "--url", type=str, required=False, help="URL of the Daily room to join"
)
parser.add_argument(
"-k",
"--apikey",
type=str,
required=False,
help="Daily API Key (needed to create an owner token for the room)",
)
args, unknown = parser.parse_known_args()
url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL")
key = args.apikey or os.getenv("DAILY_API_KEY")
if not url:
raise Exception(
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL."
)
if not key:
raise Exception(
"No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers."
)
daily_rest_helper = DailyRESTHelper(
daily_api_key=key,
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
aiohttp_session=aiohttp_session,
)
# Create a meeting token for the given room with an expiration 1 hour in
# the future.
expiry_time: float = 60 * 60
token = await daily_rest_helper.get_token(url, expiry_time)
return (url, token, args)

View File

@@ -0,0 +1,147 @@
#
# Copyright (c) 2025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import os
import subprocess
from contextlib import asynccontextmanager
from typing import Any, Dict
import aiohttp
from dotenv import load_dotenv
from fastapi import FastAPI, HTTPException, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomParams
# Load environment variables from .env file
load_dotenv(override=True)
# Dictionary to track bot processes: {pid: (process, room_url)}
bot_procs = {}
# Store Daily API helpers
daily_helpers = {}
def cleanup():
"""Cleanup function to terminate all bot processes.
Called during server shutdown.
"""
for entry in bot_procs.values():
proc = entry[0]
proc.terminate()
proc.wait()
@asynccontextmanager
async def lifespan(app: FastAPI):
"""FastAPI lifespan manager that handles startup and shutdown tasks.
- Creates aiohttp session
- Initializes Daily API helper
- Cleans up resources on shutdown
"""
aiohttp_session = aiohttp.ClientSession()
daily_helpers["rest"] = DailyRESTHelper(
daily_api_key=os.getenv("DAILY_API_KEY", ""),
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
aiohttp_session=aiohttp_session,
)
yield
await aiohttp_session.close()
cleanup()
# Initialize FastAPI app with lifespan manager
app = FastAPI(lifespan=lifespan)
# Configure CORS to allow requests from any origin
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
async def create_room_and_token() -> tuple[str, str]:
"""Helper function to create a Daily room and generate an access token.
Returns:
tuple[str, str]: A tuple containing (room_url, token)
Raises:
HTTPException: If room creation or token generation fails
"""
room = await daily_helpers["rest"].create_room(DailyRoomParams())
if not room.url:
raise HTTPException(status_code=500, detail="Failed to create room")
token = await daily_helpers["rest"].get_token(room.url)
if not token:
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room.url}")
return room.url, token
@app.post("/connect")
async def bot_connect(request: Request) -> Dict[Any, Any]:
"""Connect endpoint that creates a room and returns connection credentials.
This endpoint is called by client to establish a connection.
Returns:
Dict[Any, Any]: Authentication bundle containing room_url and token
Raises:
HTTPException: If room creation, token generation, or bot startup fails
"""
print("Creating room for RTVI connection")
room_url, token = await create_room_and_token()
print(f"Room URL: {room_url}")
# Start the bot process
try:
bot_file = "signalling_bot"
proc = subprocess.Popen(
[f"python3 -m {bot_file} -u {room_url} -t {token}"],
shell=True,
bufsize=1,
cwd=os.path.dirname(os.path.abspath(__file__)),
)
bot_procs[proc.pid] = (proc, room_url)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to start subprocess: {e}")
# Return the authentication bundle in format expected by DailyTransport
return {"room_url": room_url, "token": token}
if __name__ == "__main__":
import uvicorn
# Parse command line arguments for server configuration
default_host = os.getenv("HOST", "0.0.0.0")
default_port = int(os.getenv("FAST_API_PORT", "7860"))
parser = argparse.ArgumentParser(description="Daily Travel Companion FastAPI server")
parser.add_argument("--host", type=str, default=default_host, help="Host address")
parser.add_argument("--port", type=int, default=default_port, help="Port number")
parser.add_argument("--reload", action="store_true", help="Reload code on change")
config = parser.parse_args()
# Start the FastAPI server
uvicorn.run(
"server:app",
host=config.host,
port=config.port,
reload=config.reload,
)

View File

@@ -0,0 +1,95 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import os
import sys
from dataclasses import dataclass
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.frames.frames import AudioRawFrame, EndFrame, OutputAudioRawFrame, TTSSpeakFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineTask
from pipecat.services.cartesia.tts import CartesiaTTSService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
@dataclass
class SilenceFrame(OutputAudioRawFrame):
def __init__(
self,
*,
sample_rate: int,
duration: float,
):
# Initialize the parent class with the silent frame's data
super().__init__(
audio=self.create_silent_audio_frame(sample_rate, 1, duration).audio,
sample_rate=sample_rate,
num_channels=1,
)
@staticmethod
def create_silent_audio_frame(
sample_rate: int, num_channels: int, duration: float
) -> AudioRawFrame:
"""Create an AudioRawFrame containing silence."""
frame_size = num_channels * 2 # 2 bytes per sample for 16-bit audio
total_frames = int(sample_rate * duration)
total_bytes = total_frames * frame_size
silent_audio = bytes(total_bytes) # Create a byte array filled with zeros
return AudioRawFrame(audio=silent_audio, sample_rate=sample_rate, num_channels=num_channels)
async def main():
async with aiohttp.ClientSession() as session:
(room_url, _) = await configure(session)
transport = DailyTransport(
room_url, None, "Say One Thing", DailyParams(audio_out_enabled=True)
)
tts = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
)
runner = PipelineRunner()
task = PipelineTask(Pipeline([tts, transport.output()]))
# Register an event handler so we can play the audio when we receive a specific message
@transport.event_handler("on_app_message")
async def on_app_message(transport, message, sender):
logger.debug(f"Received app message: {message} - {sender}")
if "playable" not in message:
return
await task.queue_frames(
[
SilenceFrame(
sample_rate=task.params.audio_out_sample_rate,
duration=0.5,
),
TTSSpeakFrame(f"Hello there, how are you doing today ?"),
EndFrame(),
]
)
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,161 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
runpod.toml

View File

@@ -0,0 +1,15 @@
FROM python:3.10-bullseye
RUN mkdir /app
RUN mkdir /app/assets
RUN mkdir /app/utils
COPY *.py /app/
COPY requirements.txt /app/
WORKDIR /app
RUN pip3 install -r requirements.txt
EXPOSE 7860
CMD ["python3", "server.py"]

View File

@@ -0,0 +1,37 @@
# Simple Chatbot
<img src="image.png" width="420px">
This app connects you to a chatbot powered by GPT-4, complete with animations generated by Stable Video Diffusion.
See a video of it in action: https://x.com/kwindla/status/1778628911817183509
And a quick video walkthrough of the code: https://www.loom.com/share/13df1967161f4d24ade054e7f8753416
The first time, things might take extra time to get started since VAD (Voice Activity Detection) model needs to be downloaded.
## Get started
```python
python3 -m venv venv
source venv/bin/activate
pip install -r requirements.txt
cp env.example .env # and add your credentials
```
## Run the server
```bash
python server.py
```
Then, visit `http://localhost:7860/` in your browser to start a chatbot session.
## Build and test the Docker image
```
docker build -t chatbot .
docker run --env-file .env -p 7860:7860 chatbot
```

View File

@@ -0,0 +1,162 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import datetime
import io
import os
import sys
import wave
import aiofiles
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
# Create the recordings directory if it doesn't exist
os.makedirs("recordings", exist_ok=True)
async def save_audio(audio: bytes, sample_rate: int, num_channels: int, name: str):
if len(audio) > 0:
filename = os.path.join(
"recordings",
f"{name}_conversation_recording{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.wav",
)
with io.BytesIO() as buffer:
with wave.open(buffer, "wb") as wf:
wf.setsampwidth(2)
wf.setnchannels(num_channels)
wf.setframerate(sample_rate)
wf.writeframes(audio)
async with aiofiles.open(filename, "wb") as file:
await file.write(buffer.getvalue())
print(f"Merged audio saved to {filename}")
else:
print("No audio data to save")
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
"Chatbot",
DailyParams(
audio_out_enabled=True,
audio_in_enabled=True,
video_out_enabled=False,
vad_analyzer=SileroVADAnalyzer(),
transcription_enabled=True,
#
# Spanish
#
# transcription_settings=DailyTranscriptionSettings(
# language="es",
# tier="nova",
# model="2-general"
# )
),
)
tts = ElevenLabsTTSService(
api_key=os.getenv("ELEVENLABS_API_KEY"),
#
# English
#
voice_id="cgSgspJ2msm6clMCkdW9",
#
# Spanish
#
# model="eleven_multilingual_v2",
# voice_id="gD1IexrzCvsXPHUuT0s3",
)
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
messages = [
{
"role": "system",
#
# English
#
"content": "You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by introducing yourself. Keep all your response to 12 words or fewer.",
#
# Spanish
#
# "content": "Eres Chatbot, un amigable y útil robot. Tu objetivo es demostrar tus capacidades de una manera breve. Tus respuestas se convertiran a audio así que nunca no debes incluir caracteres especiales. Contesta a lo que el usuario pregunte de una manera creativa, útil y breve. Empieza por presentarte a ti mismo.",
},
]
context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)
# NOTE: Watch out! This will save all the conversation in memory. You
# can pass `buffer_size` to get periodic callbacks.
audiobuffer = AudioBufferProcessor(enable_turn_audio=True)
pipeline = Pipeline(
[
transport.input(), # microphone
context_aggregator.user(),
llm,
tts,
transport.output(),
audiobuffer, # used to buffer the audio in the pipeline
context_aggregator.assistant(),
]
)
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
@audiobuffer.event_handler("on_audio_data")
async def on_audio_data(buffer, audio, sample_rate, num_channels):
await save_audio(audio, sample_rate, num_channels, "full")
@audiobuffer.event_handler("on_user_turn_audio_data")
async def on_user_turn_audio_data(buffer, audio, sample_rate, num_channels):
await save_audio(audio, sample_rate, num_channels, "user")
@audiobuffer.event_handler("on_bot_turn_audio_data")
async def on_bot_turn_audio_data(buffer, audio, sample_rate, num_channels):
await save_audio(audio, sample_rate, num_channels, "bot")
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await audiobuffer.start_recording()
await transport.capture_participant_transcription(participant["id"])
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
print(f"Participant left: {participant}")
await task.cancel()
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,4 @@
DAILY_SAMPLE_ROOM_URL=https://yourdomain.daily.co/yourroom # (for joining the bot to the same room repeatedly for local dev)
DAILY_API_KEY=7df...
OPENAI_API_KEY=sk-PL...
ELEVENLABS_API_KEY=aeb...

View File

@@ -0,0 +1,5 @@
aiofiles
python-dotenv
fastapi[all]
uvicorn
pipecat-ai[daily,openai,silero,elevenlabs]

View File

@@ -0,0 +1,55 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import os
import aiohttp
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
async def configure(aiohttp_session: aiohttp.ClientSession):
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
parser.add_argument(
"-u", "--url", type=str, required=False, help="URL of the Daily room to join"
)
parser.add_argument(
"-k",
"--apikey",
type=str,
required=False,
help="Daily API Key (needed to create an owner token for the room)",
)
args, unknown = parser.parse_known_args()
url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL")
key = args.apikey or os.getenv("DAILY_API_KEY")
if not url:
raise Exception(
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL."
)
if not key:
raise Exception(
"No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers."
)
daily_rest_helper = DailyRESTHelper(
daily_api_key=key,
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
aiohttp_session=aiohttp_session,
)
# Create a meeting token for the given room with an expiration 1 hour in
# the future.
expiry_time: float = 60 * 60
token = await daily_rest_helper.get_token(url, expiry_time)
return (url, token)

View File

@@ -0,0 +1,139 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import os
import subprocess
from contextlib import asynccontextmanager
import aiohttp
from dotenv import load_dotenv
from fastapi import FastAPI, HTTPException, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, RedirectResponse
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomParams
MAX_BOTS_PER_ROOM = 1
# Bot sub-process dict for status reporting and concurrency control
bot_procs = {}
daily_helpers = {}
load_dotenv(override=True)
def cleanup():
# Clean up function, just to be extra safe
for entry in bot_procs.values():
proc = entry[0]
proc.terminate()
proc.wait()
@asynccontextmanager
async def lifespan(app: FastAPI):
aiohttp_session = aiohttp.ClientSession()
daily_helpers["rest"] = DailyRESTHelper(
daily_api_key=os.getenv("DAILY_API_KEY", ""),
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
aiohttp_session=aiohttp_session,
)
yield
await aiohttp_session.close()
cleanup()
app = FastAPI(lifespan=lifespan)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/")
async def start_agent(request: Request):
print(f"!!! Creating room")
room = await daily_helpers["rest"].create_room(DailyRoomParams())
print(f"!!! Room URL: {room.url}")
# Ensure the room property is present
if not room.url:
raise HTTPException(
status_code=500,
detail="Missing 'room' property in request data. Cannot start agent without a target room!",
)
# Check if there is already an existing process running in this room
num_bots_in_room = sum(
1 for proc in bot_procs.values() if proc[1] == room.url and proc[0].poll() is None
)
if num_bots_in_room >= MAX_BOTS_PER_ROOM:
raise HTTPException(status_code=500, detail=f"Max bot limited reach for room: {room.url}")
# Get the token for the room
token = await daily_helpers["rest"].get_token(room.url)
if not token:
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room.url}")
# Spawn a new agent, and join the user session
# Note: this is mostly for demonstration purposes (refer to 'deployment' in README)
try:
proc = subprocess.Popen(
[f"python3 -m bot -u {room.url} -t {token}"],
shell=True,
bufsize=1,
cwd=os.path.dirname(os.path.abspath(__file__)),
)
bot_procs[proc.pid] = (proc, room.url)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to start subprocess: {e}")
return RedirectResponse(room.url)
@app.get("/status/{pid}")
def get_status(pid: int):
# Look up the subprocess
proc = bot_procs.get(pid)
# If the subprocess doesn't exist, return an error
if not proc:
raise HTTPException(status_code=404, detail=f"Bot with process id: {pid} not found")
# Check the status of the subprocess
if proc[0].poll() is None:
status = "running"
else:
status = "finished"
return JSONResponse({"bot_id": pid, "status": status})
if __name__ == "__main__":
import uvicorn
default_host = os.getenv("HOST", "0.0.0.0")
default_port = int(os.getenv("FAST_API_PORT", "7860"))
parser = argparse.ArgumentParser(description="Daily Storyteller FastAPI server")
parser.add_argument("--host", type=str, default=default_host, help="Host address")
parser.add_argument("--port", type=int, default=default_port, help="Port number")
parser.add_argument("--reload", action="store_true", help="Reload code on change")
config = parser.parse_args()
uvicorn.run(
"server:app",
host=config.host,
port=config.port,
reload=config.reload,
)

View File

@@ -0,0 +1,39 @@
# Daily Custom Tracks
This example shows how to send and receive Daily custom tracks. We will run a simple `daily-python` application to send an audio file with a custom track (named "pipecat") to a room. Then, the Pipecat bot will mirror that custom track into another custom track (named "pipecat-mirror") in the same room.
## Get started
```python
python3 -m venv venv
source venv/bin/activate
pip install -r requirements.txt
```
## Run the bot
Start the bot by giving it a Daily room URL.
```bash
python bot.py -u ROOM_URL
```
The bot will wait for the first participant to join. Then, it will mirror a custom track named "pipecat" into a new custom track named "pipecat-mirror".
## Run the sender
Now, run the custom track sender. This is a simple `daily-python` application that opens and audio file and sends it as a custom track to the same Daily room.
```bash
python custom_track_sender.py -u ROOM_URL -i office-ambience-mono-16000.mp3
```
## Open client
Finally, open the client so you can hear both custom tracks.
```bash
open index.html
```
Once the client is opened, copy the URL of the Daily room and join it. You should be able to select which custom track you want to hear.

View File

@@ -0,0 +1,87 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import sys
import aiohttp
from loguru import logger
from runner import configure
from pipecat.frames.frames import Frame, InputAudioRawFrame, OutputAudioRawFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.transports.services.daily import DailyParams, DailyTransport
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
class CustomTrackMirrorProcessor(FrameProcessor):
def __init__(self, transport_destination: str, **kwargs):
super().__init__(**kwargs)
self._transport_destination = transport_destination
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
if isinstance(frame, InputAudioRawFrame) and frame.transport_source:
output_frame = OutputAudioRawFrame(
audio=frame.audio,
sample_rate=frame.sample_rate,
num_channels=frame.num_channels,
)
output_frame.transport_destination = self._transport_destination
await self.push_frame(output_frame)
else:
await self.push_frame(frame, direction)
async def main():
async with aiohttp.ClientSession() as session:
(room_url, _) = await configure(session)
transport = DailyTransport(
room_url,
None,
"Custom tracks mirror",
DailyParams(
audio_in_enabled=True,
audio_out_enabled=True,
microphone_out_enabled=False, # Disable since we just use custom tracks
audio_out_destinations=["pipecat-mirror"],
),
)
pipeline = Pipeline(
[
transport.input(), # Transport user input
CustomTrackMirrorProcessor("pipecat-mirror"),
transport.output(), # Transport bot output
]
)
task = PipelineTask(
pipeline,
params=PipelineParams(
audio_in_sample_rate=16000,
audio_out_sample_rate=16000,
),
)
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_audio(participant["id"], audio_source="pipecat")
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,74 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import time
from daily import CallClient, CustomAudioSource, Daily
from pydub import AudioSegment
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
parser.add_argument("-u", "--url", type=str, required=True, help="URL of the Daily room to join")
parser.add_argument(
"-i", "--input", type=str, required=True, help="Input audio file (needs 16000 sample rate)"
)
args, _ = parser.parse_known_args()
audio = AudioSegment.from_mp3(args.input)
raw_bytes = audio.raw_data
sample_rate = audio.frame_rate
channels = audio.channels
print(f"Length: {len(raw_bytes)} bytes")
print(f"Sample rate: {sample_rate}, Channels: {channels}")
# Initialize the Daily context & create call client
Daily.init()
client = CallClient()
# Join the room and indicate we have a custom track named "pipecat".
client.join(
args.url,
client_settings={
"publishing": {
"camera": False,
"microphone": False,
"customAudio": {"pipecat": True},
},
},
)
# Just sleep for a couple of seconds. To do this well we should really use
# completions.
time.sleep(2)
# Create the custom audio source. This is where we will write our audio.
audio_source = CustomAudioSource(sample_rate, channels)
# Create an audio track and assign it our audio source.
client.add_custom_audio_track("pipecat", audio_source)
# Just sleep for a second. To do this well we should really use completions.
time.sleep(1)
try:
# Just write one second of audio until we have read all the file.
chunk_size = sample_rate * channels * 2
while len(raw_bytes) > 0:
chunk = raw_bytes[:chunk_size]
raw_bytes = raw_bytes[chunk_size:]
audio_source.write_frames(chunk)
except KeyboardInterrupt:
client.leave()
# Just sleep for a second. To do this well we should really use completions.
time.sleep(1)
client.release()

View File

@@ -0,0 +1,173 @@
<html>
<head>
<title>daily custom tracks</title>
</head>
<script crossorigin src="https://unpkg.com/@daily-co/daily-js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/fomantic-ui/2.8.6/semantic.min.js"></script>
<link
rel="stylesheet"
type="text/css"
href="https://cdnjs.cloudflare.com/ajax/libs/fomantic-ui/2.8.6/semantic.min.css"
/>
<script>
function enableButton(buttonId, enable) {
const button = document.getElementById(buttonId);
button.disabled = !enable;
}
function enableJoinButton(enable) {
enableButton("join-button", enable);
}
function enableLeaveButton(enable) {
enableButton("leave-button", enable);
}
function destroyPlayers(query) {
const items = document.querySelectorAll(query);
if (items) {
for (const item of items) {
item.remove();
}
}
}
function destroyParticipantPlayers(participantId) {
destroyPlayers(`audio[data-participant-id="${participantId}"]`);
destroyPlayers(`button[data-participant-id="${participantId}"]`);
}
async function startPlayer(player, track) {
player.muted = false;
player.autoplay = true;
if (track != null) {
player.srcObject = new MediaStream([track]);
}
}
async function buildAudioPlayer(track, participantId) {
const audioContainer = document.getElementById("audio-container");
const player = document.createElement("audio");
player.dataset.participantId = participantId;
// Create a new button for controlling audio
const audioControlButton = document.createElement("button");
audioControlButton.className = "ui primary green button"
audioControlButton.innerText = track._mediaTag == "cam-audio" ? "english" : track._mediaTag;
audioControlButton.dataset.participantId = participantId;
audioControlButton.onclick = () => {
if (player.paused) {
player.play();
audioControlButton.className = "ui primary red button"
} else {
player.pause();
audioControlButton.className = "ui primary green button"
}
};
audioContainer.appendChild(player);
audioContainer.appendChild(audioControlButton);
await startPlayer(player, track);
player.pause()
return player;
}
function subscribeToTracks(participantId) {
console.log(`subscribing to track`);
if (participantId === "local") {
return;
}
callObject.updateParticipant(participantId, {
setSubscribedTracks: {
audio: true,
video: false,
custom: true,
},
});
}
function startDaily() {
enableJoinButton(true);
enableLeaveButton(false);
window.callObject = window.DailyIframe.createCallObject({});
callObject.on("participant-joined", (e) => {
if (!e.participant.local) {
console.log("participant-joined", e.participant);
subscribeToTracks(e.participant.session_id);
}
});
callObject.on("participant-left", (e) => {
console.log("participant-left", e.participant.session_id);
destroyParticipantPlayers(e.participant.session_id);
});
callObject.on("track-started", async (e) => {
console.log("track-started", e.track);
if (e.track.kind === "audio") {
await buildAudioPlayer(e.track, e.participant.session_id);
}
});
}
async function joinRoom() {
enableJoinButton(false);
enableLeaveButton(true);
const meetingUrl = document.getElementById("meeting-url").value;
callObject.join({
url: meetingUrl,
startVideoOff: true,
startAudioOff: true,
subscribeToTracksAutomatically: false,
receiveSettings: {
base: { video: { layer: 0 } },
},
});
}
async function leaveRoom() {
enableJoinButton(true);
enableLeaveButton(false);
callObject.leave();
const audioContainer = document.getElementById("audio-container");
audioContainer.replaceChildren();
}
</script>
<body onload="startDaily()">
<div class="ui centered page grid" style="margin-top: 30px">
<div class="ten wide column">
<div class="ui form" style="margin-top: 30px">
<div class="field">
<label>Meeting URL</label>
<input id="meeting-url" value="" />
</div>
</div>
</div>
</div>
<div class="ui centered aligned header" style="margin-top: 30px">
<button id="join-button" class="ui primary button" onclick="joinRoom()">
Join
</button>
<button id="leave-button" class="ui button" onclick="leaveRoom()">
Leave
</button>
</div>
<div id="tile" class="ui container" style="margin-top: 30px">
<div id="tile" class="ui center aligned grid">
<div id="audio-container"></div><br/>
</div>
</div>
</body>
</html>

View File

@@ -0,0 +1,2 @@
pydub
pipecat-ai[daily]

View File

@@ -0,0 +1,55 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import os
import aiohttp
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
async def configure(aiohttp_session: aiohttp.ClientSession):
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
parser.add_argument(
"-u", "--url", type=str, required=False, help="URL of the Daily room to join"
)
parser.add_argument(
"-k",
"--apikey",
type=str,
required=False,
help="Daily API Key (needed to create an owner token for the room)",
)
args, unknown = parser.parse_known_args()
url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL")
key = args.apikey or os.getenv("DAILY_API_KEY")
if not url:
raise Exception(
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL."
)
if not key:
raise Exception(
"No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers."
)
daily_rest_helper = DailyRESTHelper(
daily_api_key=key,
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
aiohttp_session=aiohttp_session,
)
# Create a meeting token for the given room with an expiration 1 hour in
# the future.
expiry_time: float = 60 * 60
token = await daily_rest_helper.get_token(url, expiry_time)
return (url, token)

View File

@@ -0,0 +1,15 @@
FROM python:3.10-bullseye
RUN mkdir /app
RUN mkdir /app/assets
RUN mkdir /app/utils
COPY *.py /app/
COPY requirements.txt /app/
WORKDIR /app
RUN pip3 install -r requirements.txt
EXPOSE 7860
CMD ["python3", "server.py"]

View File

@@ -0,0 +1,39 @@
# Daily Multi Translation
This example shows how to use Daily to stream multiple simultaneous translations using a single transport. Daily provides custom tracks and in this example we will simultaneously translate incoming audio in English to Spanish, French and German, each of them being sent to a custom track.
## Get started
```python
python3 -m venv venv
source venv/bin/activate
pip install -r requirements.txt
cp env.example .env # and add your credentials
```
## Run the server
```bash
python server.py
```
Then, visit `http://localhost:7860/` in your browser. This will open a Daily Prebuilt room where you will speak in English (make sure you are not muted).
## Open client
Next, you need to open the client that will listen to the translations.
```bash
open index.html
```
Once the client is opened, copy the URL of the Daily room created above and join it. You should be able to select which translation you want to hear.
## Build and test the Docker image
```
docker build -t daily-multi-translation .
docker run --env-file .env -p 7860:7860 daily-multi-translation
```

View File

@@ -0,0 +1,165 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import os
import sys
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.audio.mixers.soundfile_mixer import SoundfileMixer
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.observers.loggers.transcription_log_observer import TranscriptionLogObserver
from pipecat.pipeline.parallel_pipeline import ParallelPipeline
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.cartesia.tts import CartesiaTTSService
from pipecat.services.deepgram.stt import DeepgramSTTService
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
BACKGROUND_SOUND_FILE = "office-ambience-mono-16000.mp3"
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
"Multi translation bot",
DailyParams(
audio_in_enabled=True,
audio_out_enabled=True,
audio_out_mixer={
"spanish": SoundfileMixer(
sound_files={"office": BACKGROUND_SOUND_FILE}, default_sound="office"
),
"french": SoundfileMixer(
sound_files={"office": BACKGROUND_SOUND_FILE}, default_sound="office"
),
"german": SoundfileMixer(
sound_files={"office": BACKGROUND_SOUND_FILE}, default_sound="office"
),
},
audio_out_destinations=["spanish", "french", "german"],
microphone_out_enabled=False, # Disable since we just use custom tracks
vad_analyzer=SileroVADAnalyzer(),
),
)
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
tts_spanish = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_id="cefcb124-080b-4655-b31f-932f3ee743de",
transport_destination="spanish",
)
tts_french = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_id="8832a0b5-47b2-4751-bb22-6a8e2149303d",
transport_destination="french",
)
tts_german = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_id="38aabb6a-f52b-4fb0-a3d1-988518f4dc06",
transport_destination="german",
)
messages_spanish = [
{
"role": "system",
"content": "You will be provided with a sentence in English, and your task is to only translate it into Spanish.",
},
]
messages_french = [
{
"role": "system",
"content": "You will be provided with a sentence in English, and your task is to only translate it into French.",
},
]
messages_german = [
{
"role": "system",
"content": "You will be provided with a sentence in English, and your task is to only translate it into German.",
},
]
llm_spanish = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
llm_french = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
llm_german = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
context_spanish = OpenAILLMContext(messages_spanish)
context_aggregator_spanish = llm_spanish.create_context_aggregator(context_spanish)
context_french = OpenAILLMContext(messages_french)
context_aggregator_french = llm_french.create_context_aggregator(context_french)
context_german = OpenAILLMContext(messages_german)
context_aggregator_german = llm_german.create_context_aggregator(context_german)
pipeline = Pipeline(
[
transport.input(), # Transport user input
stt,
ParallelPipeline(
# Spanish pipeline.
[
context_aggregator_spanish.user(),
llm_spanish,
tts_spanish,
context_aggregator_spanish.assistant(),
],
# French pipeline.
[
context_aggregator_french.user(),
llm_french,
tts_french,
context_aggregator_french.assistant(),
],
# German pipeline.
[
context_aggregator_german.user(),
llm_german,
tts_german,
context_aggregator_german.assistant(),
],
),
transport.output(), # Transport bot output
]
)
task = PipelineTask(
pipeline,
params=PipelineParams(
audio_in_sample_rate=16000,
audio_out_sample_rate=16000,
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
report_only_initial_ttfb=True,
),
observers=[TranscriptionLogObserver()],
)
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,5 @@
DAILY_SAMPLE_ROOM_URL=https://yourdomain.daily.co/yourroom # (for joining the bot to the same room repeatedly for local dev)
DAILY_API_KEY=7df...
OPENAI_API_KEY=sk-PL...
DEEPGRAM_API_KEY=efb...
CARTESIA_API_KEY=aeb...

View File

@@ -0,0 +1,202 @@
<html>
<head>
<title>daily multi translation</title>
</head>
<script crossorigin src="https://unpkg.com/@daily-co/daily-js"></script>
<script
src="https://code.jquery.com/jquery-3.1.1.min.js"
integrity="sha256-hVVnYaiADRTO2PzUGmuLJr8BLUSjGIZsDYGmIJLv2b8="
crossorigin="anonymous"
></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/fomantic-ui/2.8.6/semantic.min.js"></script>
<link
rel="stylesheet"
type="text/css"
href="https://cdnjs.cloudflare.com/ajax/libs/fomantic-ui/2.8.6/semantic.min.css"
/>
<script>
function enableButton(buttonId, enable) {
const button = document.getElementById(buttonId);
button.disabled = !enable;
}
function enableJoinButton(enable) {
enableButton("join-button", enable);
}
function enableLeaveButton(enable) {
enableButton("leave-button", enable);
}
function destroyPlayers(query) {
const items = document.querySelectorAll(query);
if (items) {
for (const item of items) {
item.remove();
}
}
}
function destroyParticipantPlayers(participantId) {
destroyPlayers(`video[data-participant-id="${participantId}"]`);
destroyPlayers(`audio[data-participant-id="${participantId}"]`);
destroyPlayers(`button[data-participant-id="${participantId}"]`);
}
async function startPlayer(player, track) {
player.muted = false;
player.autoplay = true;
if (track != null) {
player.srcObject = new MediaStream([track]);
}
}
async function buildVideoPlayer(track, participantId) {
const videoContainer = document.getElementById("video-container");
const player = document.createElement("video");
player.dataset.participantId = participantId;
videoContainer.appendChild(player);
await startPlayer(player, track);
await player.play();
return player;
}
async function buildAudioPlayer(track, participantId) {
const audioContainer = document.getElementById("audio-container");
const player = document.createElement("audio");
player.dataset.participantId = participantId;
// Create a new button for controlling audio
const audioControlButton = document.createElement("button");
audioControlButton.className = "ui primary green button"
audioControlButton.innerText = track._mediaTag == "cam-audio" ? "english" : track._mediaTag;
audioControlButton.dataset.participantId = participantId;
audioControlButton.onclick = () => {
if (player.paused) {
player.play();
audioControlButton.className = "ui primary red button"
} else {
player.pause();
audioControlButton.className = "ui primary green button"
}
};
audioContainer.appendChild(player);
audioContainer.appendChild(audioControlButton);
await startPlayer(player, track);
player.pause()
return player;
}
function subscribeToTracks(participantId) {
console.log(`subscribing to track`);
if (participantId === "local") {
return;
}
callObject.updateParticipant(participantId, {
setSubscribedTracks: {
audio: true,
video: true,
custom: true,
},
});
}
function startDaily() {
enableJoinButton(true);
enableLeaveButton(false);
window.callObject = window.DailyIframe.createCallObject({});
callObject.on("participant-joined", (e) => {
if (!e.participant.local) {
console.log("participant-joined", e.participant);
subscribeToTracks(e.participant.session_id);
}
});
callObject.on("participant-left", (e) => {
console.log("participant-left", e.participant.session_id);
destroyParticipantPlayers(e.participant.session_id);
});
callObject.on("track-started", async (e) => {
console.log("track-started", e.track);
if (e.track.kind === "video") {
await buildVideoPlayer(e.track, e.participant.session_id);
} else if (e.track.kind === "audio") {
await buildAudioPlayer(e.track, e.participant.session_id);
}
});
}
async function joinRoom() {
enableJoinButton(false);
enableLeaveButton(true);
const meetingUrl = document.getElementById("meeting-url").value;
callObject.join({
url: meetingUrl,
startVideoOff: true,
startAudioOff: true,
subscribeToTracksAutomatically: false,
receiveSettings: {
base: { video: { layer: 0 } },
},
});
}
async function leaveRoom() {
enableJoinButton(true);
enableLeaveButton(false);
callObject.leave();
const videoContainer = document.getElementById("video-container");
videoContainer.replaceChildren();
const audioContainer = document.getElementById("audio-container");
audioContainer.replaceChildren();
}
</script>
<body onload="startDaily()">
<div class="ui centered page grid" style="margin-top: 30px">
<div class="ten wide column">
<div class="ui form" style="margin-top: 30px">
<div class="field">
<label>Meeting URL</label>
<input id="meeting-url" value="" />
</div>
</div>
</div>
</div>
<div class="ui centered aligned header" style="margin-top: 30px">
<button id="join-button" class="ui primary button" onclick="joinRoom()">
Join
</button>
<button id="leave-button" class="ui button" onclick="leaveRoom()">
Leave
</button>
</div>
<div id="tile" class="ui container" style="margin-top: 30px">
<div id="tile" class="ui center aligned grid">
<div id="audio-container"></div><br/>
</div>
</div>
<div id="tile" class="ui container" style="margin-top: 30px">
<div id="tile" class="ui center aligned grid">
<div id="video-container" class="ui segment"></div>
</div>
</div>
</body>
</html>

View File

@@ -0,0 +1,5 @@
aiofiles
python-dotenv
fastapi[all]
uvicorn
pipecat-ai[daily,deepgram,openai,silero,cartesia]

View File

@@ -0,0 +1,55 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import os
import aiohttp
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
async def configure(aiohttp_session: aiohttp.ClientSession):
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
parser.add_argument(
"-u", "--url", type=str, required=False, help="URL of the Daily room to join"
)
parser.add_argument(
"-k",
"--apikey",
type=str,
required=False,
help="Daily API Key (needed to create an owner token for the room)",
)
args, unknown = parser.parse_known_args()
url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL")
key = args.apikey or os.getenv("DAILY_API_KEY")
if not url:
raise Exception(
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL."
)
if not key:
raise Exception(
"No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers."
)
daily_rest_helper = DailyRESTHelper(
daily_api_key=key,
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
aiohttp_session=aiohttp_session,
)
# Create a meeting token for the given room with an expiration 1 hour in
# the future.
expiry_time: float = 60 * 60
token = await daily_rest_helper.get_token(url, expiry_time)
return (url, token)

View File

@@ -0,0 +1,139 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import os
import subprocess
from contextlib import asynccontextmanager
import aiohttp
from dotenv import load_dotenv
from fastapi import FastAPI, HTTPException, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, RedirectResponse
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomParams
MAX_BOTS_PER_ROOM = 1
# Bot sub-process dict for status reporting and concurrency control
bot_procs = {}
daily_helpers = {}
load_dotenv(override=True)
def cleanup():
# Clean up function, just to be extra safe
for entry in bot_procs.values():
proc = entry[0]
proc.terminate()
proc.wait()
@asynccontextmanager
async def lifespan(app: FastAPI):
aiohttp_session = aiohttp.ClientSession()
daily_helpers["rest"] = DailyRESTHelper(
daily_api_key=os.getenv("DAILY_API_KEY", ""),
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
aiohttp_session=aiohttp_session,
)
yield
await aiohttp_session.close()
cleanup()
app = FastAPI(lifespan=lifespan)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/")
async def start_agent(request: Request):
print(f"!!! Creating room")
room = await daily_helpers["rest"].create_room(DailyRoomParams())
print(f"!!! Room URL: {room.url}")
# Ensure the room property is present
if not room.url:
raise HTTPException(
status_code=500,
detail="Missing 'room' property in request data. Cannot start agent without a target room!",
)
# Check if there is already an existing process running in this room
num_bots_in_room = sum(
1 for proc in bot_procs.values() if proc[1] == room.url and proc[0].poll() is None
)
if num_bots_in_room >= MAX_BOTS_PER_ROOM:
raise HTTPException(status_code=500, detail=f"Max bot limited reach for room: {room.url}")
# Get the token for the room
token = await daily_helpers["rest"].get_token(room.url)
if not token:
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room.url}")
# Spawn a new agent, and join the user session
# Note: this is mostly for demonstration purposes (refer to 'deployment' in README)
try:
proc = subprocess.Popen(
[f"python3 -m bot -u {room.url} -t {token}"],
shell=True,
bufsize=1,
cwd=os.path.dirname(os.path.abspath(__file__)),
)
bot_procs[proc.pid] = (proc, room.url)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to start subprocess: {e}")
return RedirectResponse(room.url)
@app.get("/status/{pid}")
def get_status(pid: int):
# Look up the subprocess
proc = bot_procs.get(pid)
# If the subprocess doesn't exist, return an error
if not proc:
raise HTTPException(status_code=404, detail=f"Bot with process id: {pid} not found")
# Check the status of the subprocess
if proc[0].poll() is None:
status = "running"
else:
status = "finished"
return JSONResponse({"bot_id": pid, "status": status})
if __name__ == "__main__":
import uvicorn
default_host = os.getenv("HOST", "0.0.0.0")
default_port = int(os.getenv("FAST_API_PORT", "7860"))
parser = argparse.ArgumentParser(description="Daily Storyteller FastAPI server")
parser.add_argument("--host", type=str, default=default_host, help="Host address")
parser.add_argument("--port", type=int, default=default_port, help="Port number")
parser.add_argument("--reload", action="store_true", help="Reload code on change")
config = parser.parse_args()
uvicorn.run(
"server:app",
host=config.host,
port=config.port,
reload=config.reload,
)

View File

@@ -0,0 +1,13 @@
FROM python:3.11-bullseye
# Open port 7860 for http service
ENV FAST_API_PORT=7860
EXPOSE 7860
# Install Python dependencies
COPY *.py .
COPY ./requirements.txt requirements.txt
RUN pip3 install --no-cache-dir --upgrade -r requirements.txt
# Start the FastAPI server
CMD python3 bot_runner.py --port ${FAST_API_PORT}

View File

@@ -0,0 +1,39 @@
# Fly.io deployment example
This project modifies the `bot_runner.py` server to launch a new machine for each user session. This is a recommended approach for production vs. running shell processess as your deployment will quickly run out of system resources under load.
For this example, we are using Daily as a WebRTC transport and provisioning a new room and token for each session. You can use another transport, such as WebSockets, by modifying the `bot.py` and `bot_runner.py` files accordingly.
## Setting up your fly.io deployment
### Create your fly.toml file
You can copy the `example-fly.toml` as a reference. Be sure to change the app name to something unique.
### Create your .env file
Copy the base `env.example` to `.env` and enter the necessary API keys.
`FLY_APP_NAME` should match that in the `fly.toml` file.
### Launch a new fly.io project
`fly launch` or `fly launch --org your-org-name`
### Set the necessary app secrets from your .env
Note: you can do this manually via the fly.io dashboard under the "secrets" sub-section of your deployment (e.g. "https://fly.io/apps/fly-app-name/secrets") or run the following terminal command:
`cat .env | tr '\n' ' ' | xargs flyctl secrets set`
### Deploy your machine
`fly deploy`
## Connecting to your bot
Send a post request to your running fly.io instance:
`curl --location --request POST 'https://YOUR_FLY_APP_NAME/'`
This request will wait until the machine enters into a `starting` state, before returning the a room URL and token to join.

Some files were not shown because too many files have changed in this diff Show More