Compare commits
1 Commits
mb/test-ci
...
aleix/para
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
644babe241 |
@@ -144,7 +144,7 @@ class InputParams(BaseModel):
|
||||
|
||||
#### Examples
|
||||
|
||||
Validated against `examples/07-interruptible.py`:
|
||||
Validated against `examples/foundational/07-interruptible.py`:
|
||||
|
||||
- Proper `create_transport()` usage
|
||||
- Correct pipeline structure
|
||||
|
||||
30
.dockerignore
Normal file
30
.dockerignore
Normal file
@@ -0,0 +1,30 @@
|
||||
# flyctl launch added from .gitignore
|
||||
**/.vscode
|
||||
**/env
|
||||
**/__pycache__
|
||||
**/*~
|
||||
**/venv
|
||||
#*#
|
||||
|
||||
# Distribution / packaging
|
||||
**/.Python
|
||||
**/build
|
||||
**/develop-eggs
|
||||
**/dist
|
||||
**/downloads
|
||||
**/eggs
|
||||
**/.eggs
|
||||
**/lib
|
||||
**/lib64
|
||||
**/parts
|
||||
**/sdist
|
||||
**/var
|
||||
**/wheels
|
||||
**/share/python-wheels
|
||||
**/*.egg-info
|
||||
**/.installed.cfg
|
||||
**/*.egg
|
||||
**/MANIFEST
|
||||
**/.DS_Store
|
||||
**/.env
|
||||
fly.toml
|
||||
6
.github/workflows/format.yaml
vendored
6
.github/workflows/format.yaml
vendored
@@ -32,7 +32,7 @@ jobs:
|
||||
run: uv python install 3.12
|
||||
|
||||
- name: Install development dependencies
|
||||
run: uv sync --group dev --extra daily --extra tracing
|
||||
run: uv sync --group dev
|
||||
|
||||
- name: Ruff formatter
|
||||
id: ruff-format
|
||||
@@ -41,7 +41,3 @@ jobs:
|
||||
- name: Ruff linter (all rules)
|
||||
id: ruff-check
|
||||
run: uv run ruff check
|
||||
|
||||
- name: Type check (pyright)
|
||||
id: pyright
|
||||
run: uv run pyright
|
||||
|
||||
4
.github/workflows/python-compatibility.yaml
vendored
4
.github/workflows/python-compatibility.yaml
vendored
@@ -14,7 +14,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ['3.11.15', '3.12.13', '3.13.12', '3.14.3']
|
||||
python-version: ['3.10.19', '3.11.14', '3.12.12', '3.13.12']
|
||||
|
||||
name: Python ${{ matrix.python-version }}
|
||||
steps:
|
||||
@@ -42,7 +42,7 @@ jobs:
|
||||
|
||||
- name: Test uv sync with all extras
|
||||
run: |
|
||||
uv sync --group dev --all-extras
|
||||
uv sync --group dev --all-extras --no-extra krisp
|
||||
|
||||
- name: Verify installation
|
||||
run: |
|
||||
|
||||
51
.github/workflows/sync-quickstart.yaml
vendored
Normal file
51
.github/workflows/sync-quickstart.yaml
vendored
Normal file
@@ -0,0 +1,51 @@
|
||||
name: Sync Quickstart to pipecat-quickstart repo
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'examples/quickstart/**'
|
||||
workflow_dispatch: # Manual trigger
|
||||
|
||||
jobs:
|
||||
sync-quickstart:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout main repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Checkout quickstart repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: pipecat-ai/pipecat-quickstart
|
||||
token: ${{ secrets.QUICKSTART_SYNC_TOKEN }}
|
||||
path: quickstart-repo
|
||||
|
||||
- name: Sync files (excluding uv.lock and README.md)
|
||||
run: |
|
||||
# Copy all files except uv.lock and README.md
|
||||
find examples/quickstart -type f \
|
||||
-not -name "README.md" \
|
||||
-not -name "uv.lock" \
|
||||
-exec cp {} quickstart-repo/ \;
|
||||
|
||||
- name: Commit and push changes
|
||||
run: |
|
||||
cd quickstart-repo
|
||||
git config user.name "GitHub Action"
|
||||
git config user.email "action@github.com"
|
||||
git add .
|
||||
|
||||
# Only commit if there are changes
|
||||
if ! git diff --staged --quiet; then
|
||||
git commit -m "Sync from pipecat main repo
|
||||
|
||||
Updated files from examples/quickstart/
|
||||
Commit: ${{ github.sha }}
|
||||
"
|
||||
git push
|
||||
else
|
||||
echo "No changes to sync"
|
||||
fi
|
||||
1
.github/workflows/update-docs.yml
vendored
1
.github/workflows/update-docs.yml
vendored
@@ -114,7 +114,6 @@ jobs:
|
||||
GH_TOKEN=$DOCS_SYNC_TOKEN gh pr create \
|
||||
--repo pipecat-ai/docs \
|
||||
--label auto-docs \
|
||||
--label pipecat \
|
||||
--title "docs: update for pipecat PR #${{ steps.pr.outputs.number }}" \
|
||||
--body "$(cat <<'BODY'
|
||||
Automated documentation update for [pipecat PR #${{ steps.pr.outputs.number }}](https://github.com/pipecat-ai/pipecat/pull/${{ steps.pr.outputs.number }}).
|
||||
|
||||
@@ -1,13 +1,8 @@
|
||||
repos:
|
||||
- repo: local
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.12.1
|
||||
hooks:
|
||||
- id: ruff
|
||||
name: ruff
|
||||
entry: uv run ruff check --fix
|
||||
language: system
|
||||
types: [python]
|
||||
language_version: python3
|
||||
args: [--fix]
|
||||
- id: ruff-format
|
||||
name: ruff-format
|
||||
entry: uv run ruff format
|
||||
language: system
|
||||
types: [python]
|
||||
|
||||
@@ -11,7 +11,7 @@ build:
|
||||
jobs:
|
||||
post_install:
|
||||
- pip install uv
|
||||
- UV_PROJECT_ENVIRONMENT=$READTHEDOCS_VIRTUALENV_PATH uv sync --group docs --all-extras --no-extra gstreamer --no-extra local_smart_turn --no-extra moondream --no-extra mlx-whisper
|
||||
- UV_PROJECT_ENVIRONMENT=$READTHEDOCS_VIRTUALENV_PATH uv sync --group docs --all-extras --no-extra krisp --no-extra gstreamer --no-extra local_smart_turn --no-extra moondream --no-extra riva --no-extra mlx-whisper
|
||||
|
||||
sphinx:
|
||||
configuration: docs/api/conf.py
|
||||
|
||||
1318
CHANGELOG.md
1318
CHANGELOG.md
File diff suppressed because it is too large
Load Diff
62
CHANGELOG.md.template
Normal file
62
CHANGELOG.md.template
Normal file
@@ -0,0 +1,62 @@
|
||||
# Changelog
|
||||
|
||||
All notable changes to the **<project name>** SDK will be documented in this file.
|
||||
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
Please make sure to add your changes to the appropriate categories:
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
|
||||
<!-- for new functionality -->
|
||||
|
||||
- n/a
|
||||
|
||||
### Changed
|
||||
|
||||
<!-- for changed functionality -->
|
||||
|
||||
- n/a
|
||||
|
||||
### Deprecated
|
||||
|
||||
<!-- for soon-to-be removed functionality -->
|
||||
|
||||
- n/a
|
||||
|
||||
### Removed
|
||||
|
||||
<!-- for removed functionality -->
|
||||
|
||||
- n/a
|
||||
|
||||
### Fixed
|
||||
|
||||
<!-- for fixed bugs -->
|
||||
|
||||
- n/a
|
||||
|
||||
### Performance
|
||||
|
||||
<!-- for performance-relevant changes -->
|
||||
|
||||
- n/a
|
||||
|
||||
### Security
|
||||
|
||||
<!-- for security-relevant changes -->
|
||||
|
||||
- n/a
|
||||
|
||||
### Other
|
||||
|
||||
<!-- for everything else -->
|
||||
|
||||
- n/a
|
||||
|
||||
## [0.1.0] - YYYY-MM-DD
|
||||
|
||||
Initial release.
|
||||
@@ -10,7 +10,7 @@ Pipecat is an open-source Python framework for building real-time voice and mult
|
||||
|
||||
```bash
|
||||
# Setup development environment
|
||||
uv sync --group dev --all-extras --no-extra gstreamer
|
||||
uv sync --group dev --all-extras --no-extra gstreamer --no-extra krisp
|
||||
|
||||
# Install pre-commit hooks
|
||||
uv run pre-commit install
|
||||
|
||||
@@ -23,7 +23,7 @@ Create your integration following the patterns and examples shown in the "Integr
|
||||
Your repository must contain these components:
|
||||
|
||||
- **Source code** - Complete implementation following Pipecat patterns
|
||||
- **Foundational example** - Single file example showing basic usage (see [Pipecat examples](https://github.com/pipecat-ai/pipecat/tree/main/examples))
|
||||
- **Foundational example** - Single file example showing basic usage (see [Pipecat examples](https://github.com/pipecat-ai/pipecat/tree/main/examples/foundational))
|
||||
- **README.md** - Must include:
|
||||
- Introduction and explanation of your integration
|
||||
- Installation instructions
|
||||
@@ -225,17 +225,6 @@ Vision services process images and provide analysis such as descriptions, object
|
||||
|
||||
### Naming Conventions
|
||||
|
||||
#### Package and Repository Naming
|
||||
|
||||
Use the `pipecat-{vendor}` naming convention for your PyPI package and repository:
|
||||
|
||||
- `pipecat-{vendor}` — for single-service integrations (e.g., `pipecat-deepdub`)
|
||||
- `pipecat-{vendor}-{type}` — when a vendor offers multiple service types (e.g., `pipecat-upliftai-stt`, `pipecat-upliftai-tts`)
|
||||
|
||||
This convention makes community packages easily discoverable via PyPI search and clearly identifies them as part of the Pipecat ecosystem.
|
||||
|
||||
#### Class Naming
|
||||
|
||||
- **STT:** `VendorSTTService`
|
||||
- **LLM:** `VendorLLMService`
|
||||
- **TTS:**
|
||||
@@ -417,9 +406,8 @@ Use Pipecat's tracing decorators:
|
||||
|
||||
### Packaging and Distribution
|
||||
|
||||
- Name your package `pipecat-{vendor}` (see [Naming Conventions](#naming-conventions))
|
||||
- Use [uv](https://docs.astral.sh/uv/) for packaging (encouraged)
|
||||
- Publish to PyPI for easier installation
|
||||
- Consider releasing to PyPI for easier installation
|
||||
- Follow semantic versioning principles
|
||||
- Maintain a changelog
|
||||
|
||||
|
||||
49
README.md
49
README.md
@@ -8,7 +8,7 @@
|
||||
|
||||
**Pipecat** is an open-source Python framework for building real-time voice and multimodal conversational agents. Orchestrate audio and video, AI services, different transports, and conversation pipelines effortlessly—so you can focus on what makes your agent unique.
|
||||
|
||||
> Want to dive right in? Run `pipecat init quickstart` or follow the [quickstart guide](https://docs.pipecat.ai/getting-started/quickstart).
|
||||
> Want to dive right in? Try the [quickstart](https://docs.pipecat.ai/getting-started/quickstart).
|
||||
|
||||
## 🚀 What You Can Build
|
||||
|
||||
@@ -28,10 +28,6 @@
|
||||
|
||||
## 🌐 Pipecat Ecosystem
|
||||
|
||||
### 🧩 Multi-agent systems
|
||||
|
||||
Need multiple AI agents working together? [Pipecat Subagents](https://github.com/pipecat-ai/pipecat-subagents) lets you build distributed multi-agent systems where each agent runs its own pipeline and communicates through a shared message bus. Hand off conversations between specialists, dispatch background tasks, and scale agents across processes or machines.
|
||||
|
||||
### 📱 Client SDKs
|
||||
|
||||
Building client applications? You can connect to Pipecat from any platform using our official SDKs:
|
||||
@@ -71,7 +67,7 @@ and install any of the available plugins.
|
||||
|
||||
### 🧩 Community Integrations
|
||||
|
||||
Build and share your own Pipecat service integrations! Browse existing [community integrations](https://docs.pipecat.ai/api-reference/server/services/community-integrations) or check out our [guide](COMMUNITY_INTEGRATIONS.md) to create your own.
|
||||
Build and share your own Pipecat service integrations! Browse existing [community integrations](https://docs.pipecat.ai/server/services/community-integrations) or check out our [guide](COMMUNITY_INTEGRATIONS.md) to create your own.
|
||||
|
||||
### 📺️ Pipecat TV Channel
|
||||
|
||||
@@ -83,28 +79,28 @@ Catch new features, interviews, and how-tos on our [Pipecat TV](https://www.yout
|
||||
<a href="https://github.com/pipecat-ai/pipecat-examples/tree/main/simple-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat-examples/main/simple-chatbot/image.png" width="400" /></a>
|
||||
<a href="https://github.com/pipecat-ai/pipecat-examples/tree/main/storytelling-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat-examples/main/storytelling-chatbot/image.png" width="400" /></a>
|
||||
<br/>
|
||||
<a href="https://github.com/pipecat-ai/pipecat-examples/tree/main/daily-multi-translation"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat-examples/main/daily-multi-translation/image.png" width="400" /></a>
|
||||
<a href="https://github.com/pipecat-ai/pipecat/blob/main/examples/vision/vision-moondream.py"><img src="https://github.com/pipecat-ai/pipecat/blob/main/examples/assets/moondream.png" width="400" /></a>
|
||||
<a href="https://github.com/pipecat-ai/pipecat-examples/tree/main/translation-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat-examples/main/translation-chatbot/image.png" width="400" /></a>
|
||||
<a href="https://github.com/pipecat-ai/pipecat/blob/main/examples/foundational/12-describe-video.py"><img src="https://github.com/pipecat-ai/pipecat/blob/main/examples/foundational/assets/moondream.png" width="400" /></a>
|
||||
</p>
|
||||
|
||||
## 🧩 Available services
|
||||
|
||||
| Category | Services |
|
||||
| ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/api-reference/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/api-reference/server/services/stt/aws), [Azure](https://docs.pipecat.ai/api-reference/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/api-reference/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/api-reference/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/api-reference/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/api-reference/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/api-reference/server/services/stt/gladia), [Google](https://docs.pipecat.ai/api-reference/server/services/stt/google), [Gradium](https://docs.pipecat.ai/api-reference/server/services/stt/gradium), [Groq (Whisper)](https://docs.pipecat.ai/api-reference/server/services/stt/groq), [Mistral](https://docs.pipecat.ai/api-reference/server/services/stt/mistral), [NVIDIA](https://docs.pipecat.ai/api-reference/server/services/stt/nvidia), [OpenAI (Whisper)](https://docs.pipecat.ai/api-reference/server/services/stt/openai), [Sarvam](https://docs.pipecat.ai/api-reference/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/api-reference/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/api-reference/server/services/stt/speechmatics), [Whisper](https://docs.pipecat.ai/api-reference/server/services/stt/whisper), [xAI](https://docs.pipecat.ai/api-reference/server/services/stt/xai) |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/api-reference/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/api-reference/server/services/llm/aws), [Azure](https://docs.pipecat.ai/api-reference/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/api-reference/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/api-reference/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/api-reference/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/api-reference/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/api-reference/server/services/llm/grok), [Groq](https://docs.pipecat.ai/api-reference/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/api-reference/server/services/llm/mistral), [Nebius](https://docs.pipecat.ai/api-reference/server/services/llm/nebius), [Novita](https://docs.pipecat.ai/api-reference/server/services/llm/novita), [NVIDIA NIM](https://docs.pipecat.ai/api-reference/server/services/llm/nvidia), [Ollama](https://docs.pipecat.ai/api-reference/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/api-reference/server/services/llm/openai), [OpenAI Responses](https://docs.pipecat.ai/api-reference/server/services/llm/openai-responses), [OpenRouter](https://docs.pipecat.ai/api-reference/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/api-reference/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/api-reference/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/api-reference/server/services/llm/sambanova), [Sarvam](https://docs.pipecat.ai/api-reference/server/services/llm/sarvam), [Together AI](https://docs.pipecat.ai/api-reference/server/services/llm/together) |
|
||||
| Text-to-Speech | [Async](https://docs.pipecat.ai/api-reference/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/api-reference/server/services/tts/aws), [Azure](https://docs.pipecat.ai/api-reference/server/services/tts/azure), [Camb AI](https://docs.pipecat.ai/api-reference/server/services/tts/camb), [Cartesia](https://docs.pipecat.ai/api-reference/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/api-reference/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/api-reference/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/api-reference/server/services/tts/fish), [Google](https://docs.pipecat.ai/api-reference/server/services/tts/google), [Gradium](https://docs.pipecat.ai/api-reference/server/services/tts/gradium), [Groq](https://docs.pipecat.ai/api-reference/server/services/tts/groq), [Hume](https://docs.pipecat.ai/api-reference/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/api-reference/server/services/tts/inworld), [Kokoro](https://docs.pipecat.ai/api-reference/server/services/tts/kokoro), [LMNT](https://docs.pipecat.ai/api-reference/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/api-reference/server/services/tts/minimax), [Mistral](https://docs.pipecat.ai/api-reference/server/services/tts/mistral), [Neuphonic](https://docs.pipecat.ai/api-reference/server/services/tts/neuphonic), [NVIDIA](https://docs.pipecat.ai/api-reference/server/services/tts/nvidia), [OpenAI](https://docs.pipecat.ai/api-reference/server/services/tts/openai), [Piper](https://docs.pipecat.ai/api-reference/server/services/tts/piper), [Resemble](https://docs.pipecat.ai/api-reference/server/services/tts/resemble), [Rime](https://docs.pipecat.ai/api-reference/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/api-reference/server/services/tts/sarvam), [Smallest](https://docs.pipecat.ai/api-reference/server/services/tts/smallest), [Soniox](https://docs.pipecat.ai/api-reference/server/services/tts/soniox), [Speechmatics](https://docs.pipecat.ai/api-reference/server/services/tts/speechmatics), [xAI](https://docs.pipecat.ai/api-reference/server/services/tts/xai), [XTTS](https://docs.pipecat.ai/api-reference/server/services/tts/xtts) |
|
||||
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/api-reference/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/api-reference/server/services/s2s/gemini), [Grok Voice Agent](https://docs.pipecat.ai/api-reference/server/services/s2s/grok), [OpenAI Realtime](https://docs.pipecat.ai/api-reference/server/services/s2s/openai), [Ultravox](https://docs.pipecat.ai/api-reference/server/services/s2s/ultravox), |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/api-reference/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/api-reference/server/services/transport/fastapi-websocket), [LiveKit (WebRTC)](https://docs.pipecat.ai/api-reference/server/services/transport/livekit), [SmallWebRTCTransport](https://docs.pipecat.ai/api-reference/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/api-reference/server/services/transport/websocket-server), [WhatsApp](https://docs.pipecat.ai/api-reference/server/services/transport/whatsapp), Local |
|
||||
| Serializers | [Exotel](https://docs.pipecat.ai/api-reference/server/services/serializers/exotel), [Genesys](https://docs.pipecat.ai/api-reference/server/services/serializers/genesys), [Plivo](https://docs.pipecat.ai/api-reference/server/services/serializers/plivo), [Twilio](https://docs.pipecat.ai/api-reference/server/services/serializers/twilio), [Telnyx](https://docs.pipecat.ai/api-reference/server/services/serializers/telnyx), [Vonage](https://docs.pipecat.ai/api-reference/server/services/serializers/vonage) |
|
||||
| Video | [HeyGen](https://docs.pipecat.ai/api-reference/server/services/video/heygen), [LemonSlice](https://docs.pipecat.ai/api-reference/server/services/transport/lemonslice), [Tavus](https://docs.pipecat.ai/api-reference/server/services/video/tavus), [Simli](https://docs.pipecat.ai/api-reference/server/services/video/simli) |
|
||||
| Memory | [mem0](https://docs.pipecat.ai/api-reference/server/services/memory/mem0) |
|
||||
| Vision & Image | [fal](https://docs.pipecat.ai/api-reference/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/api-reference/server/services/image-generation/google-imagen), [Moondream](https://docs.pipecat.ai/api-reference/server/services/vision/moondream) |
|
||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/api-reference/server/utilities/audio/silero-vad-analyzer), [Krisp Viva](https://docs.pipecat.ai/guides/features/krisp-viva), [Koala](https://docs.pipecat.ai/api-reference/server/utilities/audio/koala-filter), [ai-coustics](https://docs.pipecat.ai/api-reference/server/utilities/audio/aic-filter), [RNNoise](https://docs.pipecat.ai/api-reference/server/utilities/audio/rnnoise-filter) |
|
||||
| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/api-reference/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/api-reference/server/services/analytics/sentry) |
|
||||
| Community | [Browse community integrations →](https://docs.pipecat.ai/api-reference/server/services/community-integrations) |
|
||||
| Category | Services |
|
||||
| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Gradium](https://docs.pipecat.ai/server/services/stt/gradium), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Sarvam](https://docs.pipecat.ai/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/server/services/llm/mistral), [Novita](https://docs.pipecat.ai/server/services/llm/novita), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nvidia), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova), [Sarvam](https://docs.pipecat.ai/server/services/llm/sarvam), [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
|
||||
| Text-to-Speech | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Camb AI](https://docs.pipecat.ai/server/services/tts/camb), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Gradium](https://docs.pipecat.ai/server/services/tts/gradium), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Hume](https://docs.pipecat.ai/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [Resemble](https://docs.pipecat.ai/server/services/tts/resemble), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [Smallest](https://docs.pipecat.ai/server/services/tts/smallest), [Speechmatics](https://docs.pipecat.ai/server/services/tts/speechmatics), [xAI](https://docs.pipecat.ai/server/services/tts/xai), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
|
||||
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [Grok Voice Agent](https://docs.pipecat.ai/server/services/s2s/grok), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai), [Ultravox](https://docs.pipecat.ai/server/services/s2s/ultravox), |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local |
|
||||
| Serializers | [Exotel](https://docs.pipecat.ai/server/utilities/serializers/exotel), [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx), [Vonage](https://docs.pipecat.ai/server/utilities/serializers/vonage) |
|
||||
| Video | [HeyGen](https://docs.pipecat.ai/server/services/video/heygen), [LemonSlice](https://docs.pipecat.ai/server/services/video/lemonslice), [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
|
||||
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
|
||||
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/google-imagen), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
|
||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [ai-coustics](https://docs.pipecat.ai/server/utilities/audio/aic-filter) |
|
||||
| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
|
||||
| Community | [Browse community integrations →](https://docs.pipecat.ai/server/services/community-integrations) |
|
||||
|
||||
📚 [View full services documentation →](https://docs.pipecat.ai/api-reference/server/services/supported-services)
|
||||
📚 [View full services documentation →](https://docs.pipecat.ai/server/services/supported-services)
|
||||
|
||||
## ⚡ Getting started
|
||||
|
||||
@@ -146,15 +142,15 @@ You can get started with Pipecat running on your local machine, then move your a
|
||||
|
||||
## 🧪 Code examples
|
||||
|
||||
- [Foundational](https://github.com/pipecat-ai/pipecat/tree/main/examples) — small snippets that build on each other, introducing one or two concepts at a time
|
||||
- [Foundational](https://github.com/pipecat-ai/pipecat/tree/main/examples/foundational) — small snippets that build on each other, introducing one or two concepts at a time
|
||||
- [Example apps](https://github.com/pipecat-ai/pipecat-examples) — complete applications that you can use as starting points for development
|
||||
|
||||
## 🛠️ Contributing to the framework
|
||||
|
||||
### Prerequisites
|
||||
|
||||
**Minimum Python Version:** 3.11
|
||||
**Recommended Python Version:** >= 3.12
|
||||
**Minimum Python Version:** 3.10
|
||||
**Recommended Python Version:** 3.12
|
||||
|
||||
### Setup Steps
|
||||
|
||||
@@ -170,6 +166,7 @@ You can get started with Pipecat running on your local machine, then move your a
|
||||
```bash
|
||||
uv sync --group dev --all-extras \
|
||||
--no-extra gstreamer \
|
||||
--no-extra krisp \
|
||||
--no-extra local \
|
||||
```
|
||||
|
||||
|
||||
1
changelog/3978.added.md
Normal file
1
changelog/3978.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `SarvamLLMService` with support for `sarvam-30b`, `sarvam-30b-16k`, `sarvam-105b` and `sarvam-105b-32k`
|
||||
1
changelog/4013.added.md
Normal file
1
changelog/4013.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `on_turn_context_created(context_id)` hook to `TTSService`. Override this to perform provider-specific setup (e.g. eagerly opening a server-side context) before text starts flowing. Called each time a new turn context ID is created.
|
||||
1
changelog/4013.changed.md
Normal file
1
changelog/4013.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added context prewarming path for `InworldTTSService` to improve first audio latency
|
||||
1
changelog/4022.changed.md
Normal file
1
changelog/4022.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `KrispVivaVadAnalyzer` for Voice Activity Detection using the Krisp VIVA SDK (requires `krisp_audio`).
|
||||
1
changelog/4028.changed.md
Normal file
1
changelog/4028.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Modeified `InworldTTSService` to close context at end of turn instead of relying on idle timeout
|
||||
1
changelog/4031.added.md
Normal file
1
changelog/4031.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `XAIHttpTTSService` for text-to-speech using xAI's HTTP TTS API.
|
||||
1
changelog/4078.changed.md
Normal file
1
changelog/4078.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added Gemini 3 support to the Gemini Live service.
|
||||
1
changelog/4084.changed.md
Normal file
1
changelog/4084.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- `TTSService`: the default `stop_frame_timeout_s` (idle time before an automatic `TTSStoppedFrame` is pushed when `push_stop_frames=True`) has changed from `2.0` to `3.0` seconds.
|
||||
1
changelog/4089.added.md
Normal file
1
changelog/4089.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added support for "developer" role messages in conversation context across all LLM adapters. For non-OpenAI services (Anthropic, Google, AWS Bedrock), "developer" messages are converted to "user" messages (use `system_instruction` to set the system instruction). For OpenAI services, "developer" messages pass through in conversation history. For the Responses API, they are kept as "developer" role (matching the existing "system" → "developer" conversion).
|
||||
1
changelog/4089.changed.md
Normal file
1
changelog/4089.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- ⚠️ `GeminiLLMAdapter` now only treats `messages[0]` as the initial system message, matching all other adapters. Previously it searched for the first "system" message anywhere in the conversation history. A "system" message appearing later in the list will now be converted to "user" instead of being extracted as the system instruction.
|
||||
1
changelog/4089.fixed.2.md
Normal file
1
changelog/4089.fixed.2.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed Gemini Live (`GoogleGeminiLiveLLMService`) not honoring `settings.system_instruction`. The system instruction was being read from a deprecated constructor parameter instead of the settings object, causing it to be silently ignored.
|
||||
1
changelog/4089.fixed.md
Normal file
1
changelog/4089.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed `AWSBedrockLLMAdapter` sending an empty message list to the API when the only message in context was a system message. The lone system message is now converted to "user" role instead of being extracted, matching the existing Anthropic adapter behavior.
|
||||
1
changelog/4092.added.md
Normal file
1
changelog/4092.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `SmallestTTSService`, a WebSocket-based TTS service integration with Smallest AI's Waves API. Supports the Lightning v2 and v3.1 models with configurable voice, language, speed, consistency, similarity, and enhancement settings.
|
||||
1
changelog/4113.changed.md
Normal file
1
changelog/4113.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed `InworldTtsService` to fallback to full text when TTS timestamps are not received
|
||||
1
changelog/4115.added.md
Normal file
1
changelog/4115.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added warnings in turn stop strategies when `VADParams.stop_secs` differs from the recommended default (0.2s) or when `stop_secs >= STT p99 latency`, which collapses the STT wait timeout to 0s and may cause delayed turn detection. The warnings guide developers to re-run the [stt-benchmark](https://github.com/pipecat-ai/stt-benchmark) with their VAD settings.
|
||||
1
changelog/4117.added.md
Normal file
1
changelog/4117.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `domain` parameter to `AssemblyAISTTSettings` for specialized recognition modes such as Medical Mode (`domain="medical-v1"`).
|
||||
1
changelog/4119.added.md
Normal file
1
changelog/4119.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `NovitaLLMService` for using Novita AI's LLM models via their OpenAI-compatible API.
|
||||
1
changelog/4120.added.md
Normal file
1
changelog/4120.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `cleanup()` method to `VADAnalyzer` and `VADController` so VAD analyzer resources are properly released when no longer needed. Custom `VADAnalyzer` subclasses can override `cleanup()` to free any held resources.
|
||||
1
changelog/4125.fixed.md
Normal file
1
changelog/4125.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed Gemini Live pipeline hanging indefinitely when an `EndFrame` was deferred while waiting for the bot to finish responding and `turn_complete` never arrived. As a possible root-cause fix, `turn_complete` messages are now handled even if they lack `usage_metadata`. As a fallback, the deferred `EndFrame` now has a 30-second safety timeout.
|
||||
1
changelog/4126.fixed.md
Normal file
1
changelog/4126.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed ElevenLabs WebSocket disconnections (1008 "Maximum simultaneous contexts exceeded") caused by rapid user interruptions. When interruptions arrived before any TTS text was generated, phantom contexts were created on the ElevenLabs server that were never closed, eventually exceeding the 5-context limit.
|
||||
1
changelog/4127.fixed.md
Normal file
1
changelog/4127.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed the final sentence being dropped from the conversation context when using RTVI text input with non-word-timestamp TTS services. The `LLMFullResponseEndFrame` was racing ahead of the last `TTSTextFrame`, causing the `LLMAssistantAggregator` to finalize the context before the final sentence arrived.
|
||||
1
changelog/4128.added.md
Normal file
1
changelog/4128.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `on_end_of_turn` event handler to `AssemblyAISTTService`. This fires after the final transcript is pushed, providing a reliable hook for end-of-turn logic that doesn't race with `TranscriptionFrame`. Works in both Pipecat and AssemblyAI turn detection modes.
|
||||
1
changelog/4130.changed.md
Normal file
1
changelog/4130.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- ⚠️ Realtime services (Gemini Live, OpenAI Realtime, Grok Realtime, Nova Sonic) now prefer `system_instruction` from service settings over an initial system message in the LLM context, matching the behavior of non-realtime services. Previously, context-provided system instructions took precedence. A warning is now logged when both are set.
|
||||
1
changelog/4135.fixed.md
Normal file
1
changelog/4135.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed audio crackling and popping in recordings when both user and bot are speaking. `AudioBufferProcessor` no longer injects silence into a track's buffer while that track is actively producing audio, preventing mid-utterance interruptions in the recorded output.
|
||||
1
changelog/4136.changed.2.md
Normal file
1
changelog/4136.changed.2.md
Normal file
@@ -0,0 +1 @@
|
||||
- Bumped `nvidia-riva-client` minimum version to `>=2.25.1`.
|
||||
1
changelog/4136.changed.md
Normal file
1
changelog/4136.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Upgraded `protobuf` from 5.x to 6.x (`>=6.31.1,<7`).
|
||||
1
changelog/4137.changed.md
Normal file
1
changelog/4137.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Unrecognized language strings (e.g. Deepgram's `"multi"`) no longer produce a warning at startup. The log message has been downgraded to debug level since these are valid service-specific values that are passed through correctly.
|
||||
1
changelog/4142.changed.md
Normal file
1
changelog/4142.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- `GrokLLMService` and `GrokRealtimeLLMService` now live in the `pipecat.services.xai` module alongside `XAIHttpTTSService`, since all three use the same xAI API. Update imports from `pipecat.services.grok.*` to `pipecat.services.xai.*` (e.g. `from pipecat.services.xai.llm import GrokLLMService`).
|
||||
1
changelog/4142.deprecated.md
Normal file
1
changelog/4142.deprecated.md
Normal file
@@ -0,0 +1 @@
|
||||
- `pipecat.services.grok.llm`, `pipecat.services.grok.realtime.llm`, and `pipecat.services.grok.realtime.events` are deprecated. The old import paths still work but emit a `DeprecationWarning`; use `pipecat.services.xai.llm`, `pipecat.services.xai.realtime.llm`, and `pipecat.services.xai.realtime.events` instead.
|
||||
1
changelog/4143.added.md
Normal file
1
changelog/4143.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `DeepgramFluxSageMakerSTTService` for running Deepgram Flux speech-to-text on AWS SageMaker endpoints. Use with `ExternalUserTurnStrategies` to take advantage of Flux's turn detection.
|
||||
1
changelog/4145.fixed.2.md
Normal file
1
changelog/4145.fixed.2.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed websocket TTS word timestamps so interrupted contexts cannot leak stale words or backward PTS values into later turns.
|
||||
1
changelog/4145.fixed.md
Normal file
1
changelog/4145.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed a race condition in `InterruptibleTTSService` where, if `run_tts` had been invoked but `BotStartedSpeakingFrame` had not yet been received, a user interruption could allow stale audio to leak through.
|
||||
1
changelog/4145.removed.md
Normal file
1
changelog/4145.removed.md
Normal file
@@ -0,0 +1 @@
|
||||
- ⚠️ `TTSService.add_word_timestamps()` no longer supports the `"Reset"` and `"TTSStoppedFrame"` sentinel strings. If you have a custom TTS service that called `await self.add_word_timestamps([("Reset", 0)])` or `await self.add_word_timestamps([("TTSStoppedFrame", 0), ("Reset", 0)], ctx_id)`, replace them with `await self.append_to_audio_context(ctx_id, TTSStoppedFrame(context_id=ctx_id))` and let `_handle_audio_context` manage the word-timestamp reset automatically.
|
||||
1
changelog/4146.fixed.md
Normal file
1
changelog/4146.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed Gemini Live local VAD mode (`GeminiVADParams(disabled=True)` with external VAD) not working. The bot now correctly detects user speech and signals turn boundaries to the Gemini API.
|
||||
1
changelog/4147.fixed.md
Normal file
1
changelog/4147.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed Gemini Live message handling to process all `server_content` fields independently. Gemini 3.x can bundle multiple fields (e.g. `model_turn` and `output_transcription`) on the same message, but the previous `elif` chain only processed the first match, silently dropping the rest.
|
||||
1
changelog/4149.fixed.md
Normal file
1
changelog/4149.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed `ServiceSwitcher` with `ServiceSwitcherStrategyFailover` incorrectly triggering failover when `ErrorFrame`s from other pipeline stages (e.g. TTS) propagated upstream through the switcher. Previously, any non-fatal error passing through would be misattributed to the active service and trigger an unwanted service switch. Now only errors originating from the switcher's own managed services trigger failover.
|
||||
1
changelog/4151.fixed.md
Normal file
1
changelog/4151.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed `LiveKitOutputTransport` not clearing the `rtc.AudioSource` internal buffer on interruption, causing the bot to continue speaking for several seconds after being interrupted.
|
||||
1
changelog/4152.fixed.md
Normal file
1
changelog/4152.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed a crash in OpenAI LLM processing when the provider returns `chunk.choices[0].delta.audio = None`, which caused `'NoneType' object has no attribute 'get'` errors during audio transcript handling.
|
||||
1
changelog/4153.fixed.md
Normal file
1
changelog/4153.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed error floods in `DeepgramSTTService` when the WebSocket connection drops. With Deepgram SDK 6.x, `send_media()` raises exceptions on a dead connection instead of silently failing, causing every queued audio frame to log an error. Now `send_media()` failures are caught gracefully — a single warning is logged and audio frames are skipped until the existing reconnection logic restores the connection.
|
||||
1
changelog/4154.removed.md
Normal file
1
changelog/4154.removed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Removed `SambaNovaSTTService`. SambaNova no longer offers speech-to-text audio models. Use another STT provider instead.
|
||||
1
changelog/4156.added.md
Normal file
1
changelog/4156.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `Mem0MemoryService.get_memories()` convenience method for retrieving all stored memories outside the pipeline (e.g. to build a personalized greeting at connection time). This avoids the need to manually handle client type branching, filter construction, and async wrapping.
|
||||
1
changelog/4156.changed.md
Normal file
1
changelog/4156.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- ⚠️ Bumped `mem0ai` dependency from `~=0.1.94` to `>=1.0.8,<2`. Users of the `mem0` extra will need to update their mem0ai package.
|
||||
1
changelog/4156.fixed.2.md
Normal file
1
changelog/4156.fixed.2.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed `Mem0MemoryService` failing to store messages when the context contained system or developer role messages. The Mem0 API only accepts user and assistant roles, so other roles are now filtered out before storing.
|
||||
1
changelog/4156.fixed.md
Normal file
1
changelog/4156.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- `Mem0MemoryService` no longer blocks the event loop during memory storage and retrieval. All Mem0 API calls now run in a background thread, and message storage is fire-and-forget so it doesn't delay downstream processing.
|
||||
1
changelog/4161.fixed.md
Normal file
1
changelog/4161.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added missing `on_dtmf_event` callback to `LemonSliceTransportClient.setup()` `DailyCallbacks` construction, fixing a `ValidationError` at pipeline setup time.
|
||||
1
changelog/4167.fixed.2.md
Normal file
1
changelog/4167.fixed.2.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed an issue in `InworldTTSService` where, in cases of fast interruption, we would continue receiving audio from the previous context.
|
||||
1
changelog/4167.fixed.md
Normal file
1
changelog/4167.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed a word timestamp interleaving issue in `InworldTTSService` when processing multiple sentences.
|
||||
1
changelog/4172.fixed.md
Normal file
1
changelog/4172.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed duplicate `TTSStoppedFrame` being pushed in TTS services using `push_stop_frames=True`. When the stop-frame timeout fired, a second `TTSStoppedFrame` could be pushed after the normal one at context completion.
|
||||
1
changelog/4172.performance.md
Normal file
1
changelog/4172.performance.md
Normal file
@@ -0,0 +1 @@
|
||||
- `RimeTTSService` now handles Rime's `done` WebSocket message to complete audio contexts immediately, eliminating the 3-second idle timeout that previously added latency at the end of each utterance.
|
||||
1
changelog/4174.fixed.md
Normal file
1
changelog/4174.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- ⚠️ Fixed `DeepgramSTTService` compatibility with deepgram-sdk 6.1.0. The SDK now requires explicit message objects for `send_keep_alive()`, `send_close_stream()`, and `send_finalize()`. The minimum deepgram-sdk version is now 6.1.0.
|
||||
1
changelog/4176.fixed.md
Normal file
1
changelog/4176.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed RTVI events not being delivered to clients when using WebSocket transports. `ProtobufFrameSerializer` now sets `ignore_rtvi_messages=False` by default.
|
||||
@@ -1 +0,0 @@
|
||||
- Added a `session_id` field to `RunnerArguments` so bots can log or trace a per-session identifier in local development the same way they can in Pipecat Cloud. The development runner now mints a UUID at every construction site, and paths that already returned a `sessionId` to the caller (Daily `/start`, dial-in webhook) share that same UUID with the runner args instead of generating two. The SmallWebRTC `/api/offer` endpoint also accepts an optional `session_id` query parameter so the `/sessions/{session_id}/...` proxy can thread it through.
|
||||
@@ -1 +0,0 @@
|
||||
- Updated the default `SonioxTTSService` model from `tts-rt-v1-preview` to the generally available `tts-rt-v1`.
|
||||
@@ -5,7 +5,7 @@
|
||||
|
||||
{% for text, values in sections[section][category].items() %}
|
||||
{{ text }}
|
||||
(PR {{ values|join(', ') }})
|
||||
(PR {{ values|join(', ') }})
|
||||
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
|
||||
@@ -1,60 +1,108 @@
|
||||
# Pipecat API Documentation
|
||||
# Pipecat Documentation
|
||||
|
||||
This directory contains the source files for auto-generating Pipecat's API reference documentation.
|
||||
This directory contains the source files for auto-generating Pipecat's server API reference documentation.
|
||||
|
||||
## Setup
|
||||
|
||||
1. Install documentation dependencies:
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
2. Make the build scripts executable:
|
||||
|
||||
```bash
|
||||
chmod +x build-docs.sh rtd-test.py
|
||||
```
|
||||
|
||||
## Building Documentation
|
||||
|
||||
From this directory:
|
||||
From this directory, you can build the documentation in several ways:
|
||||
|
||||
### Local Build
|
||||
|
||||
```bash
|
||||
# Build docs (warnings shown but don't fail the build)
|
||||
cd docs/api && uv run ./build-docs.sh
|
||||
# Using the build script (automatically opens docs when done)
|
||||
./build-docs.sh
|
||||
|
||||
# Build with strict mode (warnings treated as errors)
|
||||
cd docs/api && uv run ./build-docs.sh --strict
|
||||
# Or directly with sphinx-build
|
||||
sphinx-build -b html . _build/html -W --keep-going
|
||||
```
|
||||
|
||||
The build script will:
|
||||
### ReadTheDocs Test Build
|
||||
|
||||
1. Install documentation dependencies via `uv sync --group docs`
|
||||
2. Clean previous build output
|
||||
3. Run `sphinx-build` to generate HTML documentation
|
||||
4. Open the result in your browser (macOS)
|
||||
To test the documentation build process exactly as it would run on ReadTheDocs:
|
||||
|
||||
```bash
|
||||
./rtd-test.py
|
||||
```
|
||||
|
||||
This script:
|
||||
|
||||
- Creates a fresh virtual environment
|
||||
- Installs all dependencies as specified in requirements files
|
||||
- Handles conflicting dependencies (like grpcio versions for Riva)
|
||||
- Builds the documentation in an isolated environment
|
||||
- Provides detailed logging of the build process
|
||||
|
||||
Use this script to verify your documentation will build correctly on ReadTheDocs before pushing changes.
|
||||
|
||||
## Viewing Documentation
|
||||
|
||||
The built documentation will be available at `_build/html/index.html`. To open:
|
||||
|
||||
```bash
|
||||
# On MacOS
|
||||
open _build/html/index.html
|
||||
|
||||
# On Linux
|
||||
xdg-open _build/html/index.html
|
||||
|
||||
# On Windows
|
||||
start _build/html/index.html
|
||||
```
|
||||
|
||||
## Directory Structure
|
||||
|
||||
```
|
||||
.
|
||||
├── api/ # Auto-generated API documentation (created during build)
|
||||
├── _build/ # Built documentation output
|
||||
├── conf.py # Sphinx configuration (mock imports, extensions, etc.)
|
||||
├── api/ # Auto-generated API documentation
|
||||
├── _build/ # Built documentation
|
||||
├── _static/ # Static files (images, css, etc.)
|
||||
├── conf.py # Sphinx configuration
|
||||
├── index.rst # Main documentation entry point
|
||||
├── requirements-base.txt # Base documentation dependencies
|
||||
├── requirements-riva.txt # Riva-specific dependencies
|
||||
├── build-docs.sh # Local build script
|
||||
└── rtd-test.sh # ReadTheDocs test build script (uses pip, not uv)
|
||||
└── rtd-test.py # ReadTheDocs test build script
|
||||
```
|
||||
|
||||
## How It Works
|
||||
## Notes
|
||||
|
||||
- `conf.py` runs `sphinx-apidoc` during Sphinx's `setup()` phase to generate `.rst` files from Python source
|
||||
- Sphinx autodoc imports each module to extract docstrings
|
||||
- Modules with unavailable dependencies are listed in `autodoc_mock_imports` in `conf.py`
|
||||
- Napoleon extension converts Google-style docstrings to reStructuredText
|
||||
- Documentation is auto-generated from Python docstrings
|
||||
- Service modules are automatically detected and included
|
||||
- The build process matches our ReadTheDocs configuration
|
||||
- Warnings are treated as errors (-W flag) to maintain consistency
|
||||
- The --keep-going flag ensures all errors are reported
|
||||
- Dependencies are split into multiple requirements files to handle version conflicts
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
**Module not appearing in docs:**
|
||||
If you encounter missing service modules:
|
||||
|
||||
1. Check the build output for `autodoc: failed to import` warnings
|
||||
2. If the module has an unresolvable import dependency, add it to `autodoc_mock_imports` in `conf.py`
|
||||
3. Verify the module is importable: `uv run python -c "import pipecat.module.name"`
|
||||
1. Verify the service is installed with its extras: `pip install pipecat-ai[service-name]`
|
||||
2. Check the build logs for import errors
|
||||
3. Ensure the service module is properly initialized in the package
|
||||
4. Run `./rtd-test.py` to test in an isolated environment matching ReadTheDocs
|
||||
|
||||
**Duplicate object warnings:**
|
||||
For dependency conflicts:
|
||||
|
||||
These come from re-export modules or Sphinx discovering the same class through multiple import paths. Usually cosmetic.
|
||||
1. Check the requirements files for version specifications
|
||||
2. Use `rtd-test.py` to verify dependency resolution
|
||||
3. Consider adding service-specific requirements files if needed
|
||||
|
||||
**Docstring formatting warnings:**
|
||||
For more information:
|
||||
|
||||
Docstrings use reStructuredText, not Markdown. Common issues:
|
||||
- Use `Example::` with indented code blocks, not `` ```python ``
|
||||
- Ensure blank lines between directive content and subsequent sections
|
||||
- Use `Parameters:` (not `Attributes:`) for dataclass field documentation to avoid duplicate entries
|
||||
- [ReadTheDocs Configuration](.readthedocs.yaml)
|
||||
- [Sphinx Documentation](https://www.sphinx-doc.org/)
|
||||
|
||||
@@ -1,16 +1,8 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Usage: ./build-docs.sh [--strict]
|
||||
# --strict: Treat warnings as errors (default: warnings only)
|
||||
|
||||
SPHINX_OPTS=""
|
||||
if [ "$1" = "--strict" ]; then
|
||||
SPHINX_OPTS="-W --keep-going"
|
||||
fi
|
||||
|
||||
# Build docs using uv
|
||||
echo "Installing dependencies with uv..."
|
||||
uv sync --group docs --all-extras --no-extra gstreamer --no-extra local_smart_turn --no-extra moondream --no-extra mlx-whisper
|
||||
uv sync --group docs --all-extras --no-extra krisp --no-extra gstreamer --no-extra local_smart_turn --no-extra moondream --no-extra riva --no-extra mlx-whisper
|
||||
|
||||
# Check if sphinx-build is available
|
||||
if ! uv run sphinx-build --version &> /dev/null; then
|
||||
@@ -22,7 +14,8 @@ fi
|
||||
rm -rf _build
|
||||
|
||||
echo "Building documentation..."
|
||||
uv run sphinx-build -b html -d _build/doctrees . _build/html $SPHINX_OPTS
|
||||
# Build docs matching ReadTheDocs configuration
|
||||
uv run sphinx-build -b html -d _build/doctrees . _build/html -W --keep-going
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "Documentation built successfully!"
|
||||
|
||||
@@ -4,19 +4,6 @@ import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
# Fix Pydantic v2 + Sphinx autodoc incompatibility: ConfigDict(extra="allow") fails
|
||||
# during Sphinx's import because __pydantic_extra__ annotation on BaseModel resolves to
|
||||
# `Dict[str, Any] | None` whose get_origin() is Union, not dict. Patch the check to
|
||||
# accept Union-wrapped dict types (i.e., Optional[Dict[str, Any]]).
|
||||
import pydantic._internal._generate_schema as _pydantic_gs
|
||||
|
||||
_ORIG_DICT_TYPES = _pydantic_gs.DICT_TYPES
|
||||
# Expand the accepted types to include Union (Optional[Dict[str, Any]])
|
||||
import types
|
||||
import typing
|
||||
|
||||
_pydantic_gs.DICT_TYPES = [*_ORIG_DICT_TYPES, typing.Union, types.UnionType]
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
||||
logger = logging.getLogger("sphinx-build")
|
||||
@@ -61,6 +48,8 @@ autodoc_default_options = {
|
||||
# Mock imports for optional dependencies
|
||||
autodoc_mock_imports = [
|
||||
# Krisp - has build issues on some platforms
|
||||
"pipecat_ai_krisp",
|
||||
"krisp",
|
||||
"krisp_audio",
|
||||
# System-specific GUI libraries
|
||||
"_tkinter",
|
||||
@@ -89,6 +78,16 @@ autodoc_mock_imports = [
|
||||
"einops",
|
||||
"intel_extension_for_pytorch",
|
||||
"huggingface_hub",
|
||||
# riva dependencies
|
||||
"riva",
|
||||
"riva.client",
|
||||
"riva.client.Auth",
|
||||
"riva.client.ASRService",
|
||||
"riva.client.StreamingRecognitionConfig",
|
||||
"riva.client.RecognitionConfig",
|
||||
"riva.client.AudioEncoding",
|
||||
"riva.client.proto.riva_tts_pb2",
|
||||
"riva.client.SpeechSynthesisService",
|
||||
# MLX dependencies (Apple Silicon specific)
|
||||
"mlx",
|
||||
"mlx_whisper", # Note: might need underscore format too
|
||||
@@ -99,6 +98,7 @@ autodoc_mock_imports = [
|
||||
"cartesia",
|
||||
"camb",
|
||||
"sarvamai",
|
||||
"openpipe",
|
||||
"openai.types.beta.realtime",
|
||||
"langchain_core",
|
||||
"langchain_core.messages",
|
||||
@@ -110,8 +110,6 @@ autodoc_mock_imports = [
|
||||
"fastapi.middleware",
|
||||
"fastapi.responses",
|
||||
"uvicorn",
|
||||
# Deepgram dependencies
|
||||
"deepgram",
|
||||
]
|
||||
|
||||
# HTML output settings
|
||||
@@ -138,8 +136,6 @@ def import_core_modules():
|
||||
"pipecat.runner",
|
||||
"pipecat.serializers",
|
||||
"pipecat.transcriptions",
|
||||
"pipecat.turns",
|
||||
"pipecat.extensions",
|
||||
"pipecat.utils",
|
||||
]
|
||||
|
||||
@@ -184,6 +180,7 @@ def setup(app):
|
||||
logger.info(f"Source directory: {source_dir}")
|
||||
|
||||
excludes = [
|
||||
str(project_root / "src/pipecat/pipeline/to_be_updated"),
|
||||
str(project_root / "src/pipecat/examples"),
|
||||
str(project_root / "src/pipecat/tests"),
|
||||
"**/test_*.py",
|
||||
|
||||
@@ -32,5 +32,4 @@ Quick Links
|
||||
Services <api/pipecat.services>
|
||||
Transcriptions <api/pipecat.transcriptions>
|
||||
Transports <api/pipecat.transports>
|
||||
Turns <api/pipecat.turns>
|
||||
Utils <api/pipecat.utils>
|
||||
|
||||
16
env.example
16
env.example
@@ -1,5 +1,5 @@
|
||||
# AI-COUSTICS
|
||||
AIC_LICENSE_KEY=...
|
||||
AICOUSTICS_LICENSE_KEY=...
|
||||
|
||||
# Anthropic
|
||||
ANTHROPIC_API_KEY=...
|
||||
@@ -121,9 +121,6 @@ MINIMAX_GROUP_ID=...
|
||||
# Mistral
|
||||
MISTRAL_API_KEY=...
|
||||
|
||||
# Nebius
|
||||
NEBIUS_API_KEY=...
|
||||
|
||||
# Neuphonic
|
||||
NEUPHONIC_API_KEY=...
|
||||
|
||||
@@ -136,6 +133,9 @@ NVIDIA_API_KEY=...
|
||||
# OpenAI
|
||||
OPENAI_API_KEY=...
|
||||
|
||||
# OpenPipe
|
||||
OPENPIPE_API_KEY=...
|
||||
|
||||
# OpenRouter
|
||||
OPENROUTER_API_KEY=...
|
||||
|
||||
@@ -214,10 +214,4 @@ WHATSAPP_PHONE_NUMBER_ID=...
|
||||
WHATSAPP_APP_SECRET=...
|
||||
|
||||
# xAI / Grok
|
||||
XAI_API_KEY=...
|
||||
|
||||
# PIPECAT_SCTP_MAX_CHUNK_SIZE controls the maximum SCTP DATA-chunk payload
|
||||
# size (bytes) used by aiortc's data channel. The default is 1100.
|
||||
# All the details here:
|
||||
# https://docs.pipecat.ai/api-reference/server/services/transport/small-webrtc#pipecat_sctp_max_chunk_size
|
||||
#PIPECAT_SCTP_MAX_CHUNK_SIZE=1100
|
||||
XAI_API_KEY=...
|
||||
@@ -1,150 +1,31 @@
|
||||
# Pipecat Examples
|
||||
|
||||
This directory contains examples showing how to build voice and multimodal agents with Pipecat.
|
||||
This directory contains examples to help you learn how to build with Pipecat.
|
||||
|
||||
## Setup
|
||||
## Getting Started
|
||||
|
||||
1. Follow the [README](https://github.com/pipecat-ai/pipecat/blob/main/README.md#%EF%B8%8F-contributing-to-the-framework) steps to get your local environment configured.
|
||||
New to Pipecat? Start here:
|
||||
|
||||
> **Run from root directory**: Make sure you are running the steps from the root directory.
|
||||
- **[Quickstart](quickstart/)** - Get your first voice AI bot running in 5 minutes _(coming soon)_
|
||||
- **[Client/Server Web](client-server-web/)** - Learn to build web applications with Pipecat's client SDKs _(coming soon)_
|
||||
- **[Phone Bot with Twilio](phone-bot-twilio/)** - Connect your bot to a phone number _(coming soon)_
|
||||
|
||||
> **Using local audio?**: The `LocalAudioTransport` requires a system dependency for `portaudio`. Install the dependency to use the transport.
|
||||
## Foundational Examples
|
||||
|
||||
2. Copy the [`env.example`](../env.example) file and add API keys for services you plan to use:
|
||||
Single-file examples that introduce core Pipecat concepts one at a time. These examples:
|
||||
|
||||
```bash
|
||||
cp env.example .env
|
||||
# Edit .env with your API keys
|
||||
```
|
||||
- Build on each other progressively
|
||||
- Focus on specific features or integrations
|
||||
- Are used for testing with every Pipecat release
|
||||
|
||||
3. Run any example:
|
||||
See the **[Foundational Examples README](foundational/)** for the complete list.
|
||||
|
||||
```bash
|
||||
uv run python getting-started/01-say-one-thing.py
|
||||
```
|
||||
## More Advanced Examples
|
||||
|
||||
4. Open the web interface at http://localhost:7860/client/ and click "Connect"
|
||||
Ready to explore complex use cases? Visit **[pipecat-examples](https://github.com/pipecat-ai/pipecat-examples)** for:
|
||||
|
||||
## Running examples with other transports
|
||||
|
||||
Most examples support running with other transports, like Twilio or Daily.
|
||||
|
||||
### Daily
|
||||
|
||||
You need to create a Daily account at https://dashboard.daily.co/u/signup. Once signed up, you can create your own room from the dashboard and set the environment variables `DAILY_ROOM_URL` and `DAILY_API_KEY`. Alternatively, you can let the example create a room for you (still needs `DAILY_API_KEY` environment variable). Then, start any example with `-t daily`:
|
||||
|
||||
```bash
|
||||
uv run getting-started/06-voice-agent.py -t daily
|
||||
```
|
||||
|
||||
### Twilio
|
||||
|
||||
It is also possible to run the example through a Twilio phone number. You will need to setup a few things:
|
||||
|
||||
1. Install and run [ngrok](https://ngrok.com/download).
|
||||
|
||||
```bash
|
||||
ngrok http 7860
|
||||
```
|
||||
|
||||
2. Configure your Twilio phone number. One way is to setup a TwiML app and set the request URL to the ngrok URL from step (1). Then, set your phone number to use the new TwiML app.
|
||||
|
||||
Then, run the example with:
|
||||
|
||||
```bash
|
||||
uv run getting-started/06-voice-agent.py -t twilio -x NGROK_HOST_NAME
|
||||
```
|
||||
|
||||
## Directory Structure
|
||||
|
||||
### [`getting-started/`](./getting-started/)
|
||||
|
||||
Progressive introduction to Pipecat, from minimal TTS to a full voice agent with function calling.
|
||||
|
||||
### [`voice/`](./voice/)
|
||||
|
||||
Full STT + LLM + TTS voice agent pipelines showcasing different speech service providers (Deepgram, ElevenLabs, Cartesia, etc.)
|
||||
|
||||
### [`function-calling/`](./function-calling/)
|
||||
|
||||
Function calling with different LLM providers (OpenAI, Anthropic, Google, etc.)
|
||||
|
||||
### [`transcription/`](./transcription/)
|
||||
|
||||
Speech-to-text examples with various STT providers.
|
||||
|
||||
### [`vision/`](./vision/)
|
||||
|
||||
Image description and vision capabilities with different multimodal LLMs.
|
||||
|
||||
### [`realtime/`](./realtime/)
|
||||
|
||||
Realtime and multimodal live APIs (OpenAI Realtime, Gemini Live, AWS Nova Sonic, Ultravox, Grok).
|
||||
|
||||
### [`persistent-context/`](./persistent-context/)
|
||||
|
||||
Maintaining conversation context across sessions with different providers.
|
||||
|
||||
### [`context-summarization/`](./context-summarization/)
|
||||
|
||||
Summarizing conversation context to manage token limits.
|
||||
|
||||
### [`update-settings/`](./update-settings/)
|
||||
|
||||
Changing service settings at runtime, organized by service type:
|
||||
|
||||
- **[`stt/`](./update-settings/stt/)** — Speech-to-text settings
|
||||
- **[`tts/`](./update-settings/tts/)** — Text-to-speech settings
|
||||
- **[`llm/`](./update-settings/llm/)** — LLM settings
|
||||
|
||||
### [`turn-management/`](./turn-management/)
|
||||
|
||||
Turn detection, interruption handling, and user input management.
|
||||
|
||||
### [`thinking-and-mcp/`](./thinking-and-mcp/)
|
||||
|
||||
LLM thinking/reasoning modes and MCP (Model Context Protocol) tool server integration.
|
||||
|
||||
### [`transports/`](./transports/)
|
||||
|
||||
Transport layer examples (WebRTC, Daily, LiveKit).
|
||||
|
||||
### [`video-avatar/`](./video-avatar/)
|
||||
|
||||
Video avatar integrations (Tavus, HeyGen, Simli, LemonSlice).
|
||||
|
||||
### [`video-processing/`](./video-processing/)
|
||||
|
||||
Video processing, mirroring, GStreamer, and custom video tracks.
|
||||
|
||||
### [`audio/`](./audio/)
|
||||
|
||||
Audio recording, background sounds, and sound effects.
|
||||
|
||||
### [`observability/`](./observability/)
|
||||
|
||||
Pipeline monitoring: observers, heartbeats, and Sentry metrics.
|
||||
|
||||
### [`rag/`](./rag/)
|
||||
|
||||
Retrieval-augmented generation, grounding, and long-term memory (Mem0, Gemini).
|
||||
|
||||
### [`features/`](./features/)
|
||||
|
||||
Miscellaneous features: wake phrases, live translation, service switching, voice switching, and more.
|
||||
|
||||
## Advanced Usage
|
||||
|
||||
### Customizing Network Settings
|
||||
|
||||
```bash
|
||||
uv run python <example-name> --host 0.0.0.0 --port 8080
|
||||
```
|
||||
|
||||
### Troubleshooting
|
||||
|
||||
- **No audio/video**: Check browser permissions for microphone and camera
|
||||
- **Connection errors**: Verify API keys in `.env` file
|
||||
- **Port conflicts**: Use `--port` to change the port
|
||||
|
||||
For more examples, visit the [pipecat-examples repository](https://github.com/pipecat-ai/pipecat-examples).
|
||||
- Production-ready applications
|
||||
- Multi-platform client implementations
|
||||
- Telephony integrations
|
||||
- Multimodal and creative applications
|
||||
- Deployment and monitoring examples
|
||||
|
||||
71
examples/foundational/01-say-one-thing-piper.py
Normal file
71
examples/foundational/01-say-one-thing-piper.py
Normal file
@@ -0,0 +1,71 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import os
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import EndFrame, TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.piper.tts import PiperHttpTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(audio_out_enabled=True),
|
||||
"twilio": lambda: FastAPIWebsocketParams(audio_out_enabled=True),
|
||||
"webrtc": lambda: TransportParams(audio_out_enabled=True),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
# Create an HTTP session
|
||||
async with aiohttp.ClientSession() as session:
|
||||
tts = PiperHttpTTSService(
|
||||
base_url=os.getenv("PIPER_BASE_URL"),
|
||||
aiohttp_session=session,
|
||||
sample_rate=24000,
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
Pipeline([tts, transport.output()]),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
# Register an event handler so we can play the audio when the client joins
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
await task.queue_frames([TTSSpeakFrame(f"Hello there!"), EndFrame()])
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
72
examples/foundational/01-say-one-thing-rime.py
Normal file
72
examples/foundational/01-say-one-thing-rime.py
Normal file
@@ -0,0 +1,72 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import os
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import EndFrame, TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.rime.tts import RimeHttpTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(audio_out_enabled=True),
|
||||
"twilio": lambda: FastAPIWebsocketParams(audio_out_enabled=True),
|
||||
"webrtc": lambda: TransportParams(audio_out_enabled=True),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
# Create an HTTP session
|
||||
async with aiohttp.ClientSession() as session:
|
||||
tts = RimeHttpTTSService(
|
||||
api_key=os.getenv("RIME_API_KEY", ""),
|
||||
aiohttp_session=session,
|
||||
settings=RimeHttpTTSService.Settings(
|
||||
voice="rex",
|
||||
),
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
Pipeline([tts, transport.output()]),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
# Register an event handler so we can play the audio when the client joins
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
await task.queue_frames([TTSSpeakFrame(f"Hello there!"), EndFrame()])
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
@@ -36,7 +36,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.environ["CARTESIA_API_KEY"],
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
settings=CartesiaTTSService.Settings(
|
||||
voice="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
),
|
||||
@@ -28,7 +28,7 @@ async def main():
|
||||
transport = LocalAudioTransport(LocalAudioTransportParams(audio_out_enabled=True))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.environ["CARTESIA_API_KEY"],
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
settings=CartesiaTTSService.Settings(
|
||||
voice="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
),
|
||||
64
examples/foundational/01b-livekit-audio.py
Normal file
64
examples/foundational/01b-livekit-audio.py
Normal file
@@ -0,0 +1,64 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.runner.livekit import configure
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.transports.livekit.transport import LiveKitParams, LiveKitTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
(url, token, room_name) = await configure()
|
||||
|
||||
transport = LiveKitTransport(
|
||||
url=url,
|
||||
token=token,
|
||||
room_name=room_name,
|
||||
params=LiveKitParams(audio_out_enabled=True),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
settings=CartesiaTTSService.Settings(
|
||||
voice="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
),
|
||||
)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
task = PipelineTask(Pipeline([tts, transport.output()]))
|
||||
|
||||
# Register an event handler so we can play the audio when the
|
||||
# participant joins.
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant_id):
|
||||
await asyncio.sleep(1)
|
||||
await task.queue_frame(
|
||||
TTSSpeakFrame(
|
||||
"Hello there! How are you doing today? Would you like to talk about the weather?"
|
||||
)
|
||||
)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
64
examples/foundational/01c-nvidia-riva-tts.py
Normal file
64
examples/foundational/01c-nvidia-riva-tts.py
Normal file
@@ -0,0 +1,64 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import EndFrame, TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.nvidia.tts import NvidiaTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(audio_out_enabled=True),
|
||||
"twilio": lambda: FastAPIWebsocketParams(audio_out_enabled=True),
|
||||
"webrtc": lambda: TransportParams(audio_out_enabled=True),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
tts = NvidiaTTSService(api_key=os.getenv("NVIDIA_API_KEY"))
|
||||
|
||||
task = PipelineTask(
|
||||
Pipeline([tts, transport.output()]),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
# Register an event handler so we can play the audio when the client joins
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
await task.queue_frames([TTSSpeakFrame(f"Hello there!"), EndFrame()])
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
@@ -38,14 +38,14 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.environ["CARTESIA_API_KEY"],
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
settings=CartesiaTTSService.Settings(
|
||||
voice="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.environ["OPENAI_API_KEY"],
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
settings=OpenAILLMService.Settings(
|
||||
system_instruction="You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way.",
|
||||
),
|
||||
84
examples/foundational/03-still-frame.py
Normal file
84
examples/foundational/03-still-frame.py
Normal file
@@ -0,0 +1,84 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import os
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.fal.image import FalImageGenService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
video_out_enabled=True,
|
||||
video_out_width=1024,
|
||||
video_out_height=1024,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
video_out_enabled=True,
|
||||
video_out_width=1024,
|
||||
video_out_height=1024,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
# Create an HTTP session
|
||||
async with aiohttp.ClientSession() as session:
|
||||
imagegen = FalImageGenService(
|
||||
settings=FalImageGenService.Settings(
|
||||
image_size="square_hd",
|
||||
),
|
||||
aiohttp_session=session,
|
||||
key=os.getenv("FAL_KEY"),
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
Pipeline([imagegen, transport.output()]),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
# Register an event handler so we can play the audio when the client joins
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
await task.queue_frame(TextFrame("a cat in the style of picasso"))
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
@@ -42,7 +42,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
imagegen = GoogleImageGenService(
|
||||
api_key=os.environ["GOOGLE_API_KEY"],
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
@@ -8,6 +8,7 @@ import argparse
|
||||
import asyncio
|
||||
import os
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import Dict
|
||||
|
||||
import uvicorn
|
||||
from dotenv import load_dotenv
|
||||
@@ -38,7 +39,7 @@ load_dotenv(override=True)
|
||||
app = FastAPI()
|
||||
|
||||
# Store connections by pc_id
|
||||
pcs_map: dict[str, SmallWebRTCConnection] = {}
|
||||
pcs_map: Dict[str, SmallWebRTCConnection] = {}
|
||||
|
||||
ice_servers = [
|
||||
IceServer(
|
||||
@@ -62,17 +63,17 @@ async def run_example(webrtc_connection: SmallWebRTCConnection):
|
||||
),
|
||||
)
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.environ["DEEPGRAM_API_KEY"])
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.environ["CARTESIA_API_KEY"],
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
settings=CartesiaTTSService.Settings(
|
||||
voice="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.environ["OPENAI_API_KEY"],
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
settings=OpenAILLMService.Settings(
|
||||
system_instruction="You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way.",
|
||||
),
|
||||
@@ -49,14 +49,14 @@ async def main():
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.environ["CARTESIA_API_KEY"],
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
settings=CartesiaTTSService.Settings(
|
||||
voice="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.environ["OPENAI_API_KEY"],
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
settings=OpenAILLMService.Settings(
|
||||
system_instruction="You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way.",
|
||||
),
|
||||
@@ -53,17 +53,17 @@ async def main():
|
||||
),
|
||||
)
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.environ["DEEPGRAM_API_KEY"])
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.environ["OPENAI_API_KEY"],
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
settings=OpenAILLMService.Settings(
|
||||
system_instruction="You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way.",
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.environ["CARTESIA_API_KEY"],
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
settings=CartesiaTTSService.Settings(
|
||||
voice="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
),
|
||||
@@ -108,10 +108,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
# Create an HTTP session for API calls
|
||||
async with aiohttp.ClientSession() as session:
|
||||
llm = OpenAILLMService(api_key=os.environ["OPENAI_API_KEY"])
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
tts = CartesiaHttpTTSService(
|
||||
api_key=os.environ["CARTESIA_API_KEY"],
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
settings=CartesiaHttpTTSService.Settings(
|
||||
voice="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
),
|
||||
155
examples/foundational/06-listen-and-respond.py
Normal file
155
examples/foundational/06-listen-and-respond.py
Normal file
@@ -0,0 +1,155 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import Frame, LLMRunFrame, MetricsFrame
|
||||
from pipecat.metrics.metrics import (
|
||||
LLMUsageMetricsData,
|
||||
ProcessingMetricsData,
|
||||
TTFBMetricsData,
|
||||
TTSUsageMetricsData,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
class MetricsLogger(FrameProcessor):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, MetricsFrame):
|
||||
for d in frame.data:
|
||||
if isinstance(d, TTFBMetricsData):
|
||||
print(f"!!! MetricsFrame: {frame}, ttfb: {d.value}")
|
||||
elif isinstance(d, ProcessingMetricsData):
|
||||
print(f"!!! MetricsFrame: {frame}, processing: {d.value}")
|
||||
elif isinstance(d, LLMUsageMetricsData):
|
||||
tokens = d.value
|
||||
print(
|
||||
f"!!! MetricsFrame: {frame}, tokens: {tokens.prompt_tokens}, characters: {tokens.completion_tokens}"
|
||||
)
|
||||
elif isinstance(d, TTSUsageMetricsData):
|
||||
print(f"!!! MetricsFrame: {frame}, characters: {d.value}")
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
settings=CartesiaTTSService.Settings(
|
||||
voice="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
settings=OpenAILLMService.Settings(
|
||||
system_instruction="You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way.",
|
||||
),
|
||||
)
|
||||
|
||||
ml = MetricsLogger()
|
||||
|
||||
context = LLMContext()
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
ml,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
@@ -96,17 +96,17 @@ transport_params = {
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.environ["DEEPGRAM_API_KEY"])
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.environ["CARTESIA_API_KEY"],
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
settings=CartesiaTTSService.Settings(
|
||||
voice="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.environ["OPENAI_API_KEY"],
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
settings=OpenAILLMService.Settings(
|
||||
system_instruction="You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way.",
|
||||
),
|
||||
@@ -119,8 +119,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
)
|
||||
|
||||
image_sync_aggregator = ImageSyncAggregator(
|
||||
os.path.join(os.path.dirname(__file__), "..", "assets", "speaking.png"),
|
||||
os.path.join(os.path.dirname(__file__), "..", "assets", "waiting.png"),
|
||||
os.path.join(os.path.dirname(__file__), "assets", "speaking.png"),
|
||||
os.path.join(os.path.dirname(__file__), "assets", "waiting.png"),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
@@ -54,10 +54,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
stt = CartesiaSTTService(api_key=os.environ["CARTESIA_API_KEY"])
|
||||
stt = CartesiaSTTService(api_key=os.getenv("CARTESIA_API_KEY"))
|
||||
|
||||
tts = CartesiaHttpTTSService(
|
||||
api_key=os.environ["CARTESIA_API_KEY"],
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
aiohttp_session=session,
|
||||
settings=CartesiaHttpTTSService.Settings(
|
||||
voice="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
@@ -65,7 +65,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.environ["OPENAI_API_KEY"],
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
settings=OpenAILLMService.Settings(
|
||||
system_instruction="You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way.",
|
||||
),
|
||||
@@ -51,17 +51,17 @@ transport_params = {
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.environ["DEEPGRAM_API_KEY"])
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.environ["CARTESIA_API_KEY"],
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
settings=CartesiaTTSService.Settings(
|
||||
voice="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
),
|
||||
)
|
||||
|
||||
llm = OpenAIResponsesLLMService(
|
||||
api_key=os.environ["OPENAI_API_KEY"],
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
settings=OpenAIResponsesLLMService.Settings(
|
||||
system_instruction="You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way.",
|
||||
),
|
||||
@@ -51,17 +51,17 @@ transport_params = {
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.environ["DEEPGRAM_API_KEY"])
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.environ["CARTESIA_API_KEY"],
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
settings=CartesiaTTSService.Settings(
|
||||
voice="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.environ["OPENAI_API_KEY"],
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
settings=OpenAILLMService.Settings(
|
||||
system_instruction="You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way.",
|
||||
),
|
||||
@@ -91,7 +91,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
async with aiohttp.ClientSession() as session:
|
||||
stt = SpeechmaticsSTTService(
|
||||
api_key=os.environ["SPEECHMATICS_API_KEY"],
|
||||
api_key=os.getenv("SPEECHMATICS_API_KEY"),
|
||||
settings=SpeechmaticsSTTService.Settings(
|
||||
language=Language.EN,
|
||||
turn_detection_mode=SpeechmaticsSTTService.TurnDetectionMode.ADAPTIVE,
|
||||
@@ -102,7 +102,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
)
|
||||
|
||||
tts = SpeechmaticsTTSService(
|
||||
api_key=os.environ["SPEECHMATICS_API_KEY"],
|
||||
api_key=os.getenv("SPEECHMATICS_API_KEY"),
|
||||
settings=SpeechmaticsTTSService.Settings(
|
||||
voice="sarah",
|
||||
),
|
||||
@@ -110,7 +110,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.environ["OPENAI_API_KEY"],
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
settings=OpenAILLMService.Settings(
|
||||
temperature=0.75,
|
||||
system_instruction="You are a helpful British assistant called Sarah in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Always include punctuation in your responses. Give very short replies - do not give longer replies unless strictly necessary. Respond to what the user said in a concise, funny, creative and helpful way. Use `<Sn/>` tags to identify different speakers - do not use tags in your replies. Do not respond to speakers within `<PASSIVE/>` tags unless explicitly asked to.",
|
||||
@@ -74,7 +74,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
stt = SpeechmaticsSTTService(
|
||||
api_key=os.environ["SPEECHMATICS_API_KEY"],
|
||||
api_key=os.getenv("SPEECHMATICS_API_KEY"),
|
||||
settings=SpeechmaticsSTTService.Settings(
|
||||
language=Language.EN,
|
||||
speaker_active_format="<{speaker_id}>{text}</{speaker_id}>",
|
||||
@@ -82,7 +82,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
)
|
||||
|
||||
tts = SpeechmaticsTTSService(
|
||||
api_key=os.environ["SPEECHMATICS_API_KEY"],
|
||||
api_key=os.getenv("SPEECHMATICS_API_KEY"),
|
||||
settings=SpeechmaticsTTSService.Settings(
|
||||
voice="sarah",
|
||||
),
|
||||
@@ -90,7 +90,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.environ["OPENAI_API_KEY"],
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
settings=OpenAILLMService.Settings(
|
||||
temperature=0.75,
|
||||
system_instruction="You are a helpful British assistant called Sarah in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Always include punctuation in your responses. Give very short replies - do not give longer replies unless strictly necessary. Respond to what the user said in a concise, funny, creative and helpful way. Use `<Sn/>` tags to identify different speakers - do not use tags in your replies. Do not respond to speakers within `<PASSIVE/>` tags unless explicitly asked to.",
|
||||
@@ -8,15 +8,15 @@
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
|
||||
from langchain_community.chat_message_histories import ChatMessageHistory
|
||||
from langchain_core.chat_history import BaseChatMessageHistory
|
||||
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
||||
from langchain_core.runnables.history import RunnableWithMessageHistory
|
||||
from langchain_openai import ChatOpenAI
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.frames.frames import LLMMessagesUpdateFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
@@ -67,10 +67,10 @@ transport_params = {
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.environ["DEEPGRAM_API_KEY"])
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.environ["CARTESIA_API_KEY"],
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
settings=CartesiaTTSService.Settings(
|
||||
voice="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
),
|
||||
@@ -129,10 +129,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
# An `LLMContextFrame` will be picked up by the LangchainProcessor using
|
||||
# only the content of the last message to inject it in the prompt defined
|
||||
# above. So no role is required here.
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please briefly introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
messages = [({"content": "Please briefly introduce yourself to the user."})]
|
||||
await task.queue_frames([LLMMessagesUpdateFrame(messages, run_llm=True)])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
@@ -59,8 +59,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
# - AWS credentials configured (via environment variables or AWS CLI)
|
||||
# - A deployed SageMaker endpoint with Deepgram Flux model
|
||||
stt = DeepgramFluxSageMakerSTTService(
|
||||
endpoint_name=os.environ["SAGEMAKER_STT_ENDPOINT_NAME"],
|
||||
region=os.environ["AWS_REGION"],
|
||||
endpoint_name=os.getenv("SAGEMAKER_STT_ENDPOINT_NAME"),
|
||||
region=os.getenv("AWS_REGION"),
|
||||
settings=DeepgramFluxSageMakerSTTService.Settings(
|
||||
min_confidence=0.3,
|
||||
),
|
||||
@@ -71,8 +71,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
# - AWS credentials configured (via environment variables or AWS CLI)
|
||||
# - A deployed SageMaker endpoint with Deepgram TTS model
|
||||
tts = DeepgramSageMakerTTSService(
|
||||
endpoint_name=os.environ["SAGEMAKER_TTS_ENDPOINT_NAME"],
|
||||
region=os.environ["AWS_REGION"],
|
||||
endpoint_name=os.getenv("SAGEMAKER_TTS_ENDPOINT_NAME"),
|
||||
region=os.getenv("AWS_REGION"),
|
||||
settings=DeepgramSageMakerTTSService.Settings(
|
||||
voice="aura-2-andromeda-en",
|
||||
),
|
||||
@@ -55,21 +55,21 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramFluxSTTService(
|
||||
api_key=os.environ["DEEPGRAM_API_KEY"],
|
||||
api_key=os.getenv("DEEPGRAM_API_KEY"),
|
||||
settings=DeepgramFluxSTTService.Settings(
|
||||
min_confidence=0.3,
|
||||
),
|
||||
)
|
||||
|
||||
tts = DeepgramTTSService(
|
||||
api_key=os.environ["DEEPGRAM_API_KEY"],
|
||||
api_key=os.getenv("DEEPGRAM_API_KEY"),
|
||||
settings=DeepgramTTSService.Settings(
|
||||
voice="aura-2-andromeda-en",
|
||||
),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.environ["OPENAI_API_KEY"],
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
settings=OpenAILLMService.Settings(
|
||||
system_instruction="You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way.",
|
||||
),
|
||||
@@ -55,10 +55,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
stt = DeepgramSTTService(api_key=os.environ["DEEPGRAM_API_KEY"])
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = DeepgramHttpTTSService(
|
||||
api_key=os.environ["DEEPGRAM_API_KEY"],
|
||||
api_key=os.getenv("DEEPGRAM_API_KEY"),
|
||||
settings=DeepgramHttpTTSService.Settings(
|
||||
voice="aura-2-andromeda-en",
|
||||
),
|
||||
@@ -66,7 +66,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.environ["OPENAI_API_KEY"],
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
settings=OpenAILLMService.Settings(
|
||||
system_instruction="You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way.",
|
||||
),
|
||||
@@ -58,8 +58,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
# - AWS credentials configured (via environment variables or AWS CLI)
|
||||
# - A deployed SageMaker endpoint with Deepgram model
|
||||
stt = DeepgramSageMakerSTTService(
|
||||
endpoint_name=os.environ["SAGEMAKER_STT_ENDPOINT_NAME"],
|
||||
region=os.environ["AWS_REGION"],
|
||||
endpoint_name=os.getenv("SAGEMAKER_STT_ENDPOINT_NAME"),
|
||||
region=os.getenv("AWS_REGION"),
|
||||
)
|
||||
|
||||
# Initialize Deepgram SageMaker TTS Service
|
||||
@@ -67,8 +67,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
# - AWS credentials configured (via environment variables or AWS CLI)
|
||||
# - A deployed SageMaker endpoint with Deepgram TTS model
|
||||
tts = DeepgramSageMakerTTSService(
|
||||
endpoint_name=os.environ["SAGEMAKER_TTS_ENDPOINT_NAME"],
|
||||
region=os.environ["AWS_REGION"],
|
||||
endpoint_name=os.getenv("SAGEMAKER_TTS_ENDPOINT_NAME"),
|
||||
region=os.getenv("AWS_REGION"),
|
||||
settings=DeepgramSageMakerTTSService.Settings(
|
||||
voice="aura-2-andromeda-en",
|
||||
),
|
||||
133
examples/foundational/07c-interruptible-deepgram-vad.py
Normal file
133
examples/foundational/07c-interruptible-deepgram-vad.py
Normal file
@@ -0,0 +1,133 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.deepgram.tts import DeepgramTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_turn_strategies import ExternalUserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(
|
||||
api_key=os.getenv("DEEPGRAM_API_KEY"),
|
||||
settings=DeepgramSTTService.Settings(
|
||||
vad_events=True,
|
||||
utterance_end_ms="1000",
|
||||
),
|
||||
)
|
||||
|
||||
tts = DeepgramTTSService(
|
||||
api_key=os.getenv("DEEPGRAM_API_KEY"),
|
||||
settings=DeepgramTTSService.Settings(
|
||||
voice="aura-2-andromeda-en",
|
||||
),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
settings=OpenAILLMService.Settings(
|
||||
system_instruction="You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way.",
|
||||
),
|
||||
)
|
||||
|
||||
context = LLMContext()
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(user_turn_strategies=ExternalUserTurnStrategies()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
user_aggregator, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
assistant_aggregator, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user