Compare commits

..

44 Commits

Author SHA1 Message Date
Mark Backman
4036c2aed5 Replace audioop for python 3.13+ 2024-12-16 19:57:07 -05:00
Mark Backman
f3112a8638 Merge pull request #866 from pipecat-ai/mb/readme-links
Fix a bunch of README docs links
2024-12-16 10:51:01 -05:00
Mark Backman
0293d40e4e Merge pull request #870 from pipecat-ai/mb/dotenv
Add python-dotenv to dev-requirements.txt
2024-12-16 10:50:46 -05:00
Mark Backman
64038442ed Add python-dotenv to dev-requirements.txt 2024-12-16 09:23:12 -05:00
Mark Backman
f90cbe8086 Fix a bunch of README docs links 2024-12-15 14:30:20 -05:00
Mark Backman
09a611d44b Merge pull request #856 from pipecat-ai/mb/daily-rest-helpers
Remove default 5 min exp time for created rooms, add docstrings
2024-12-13 12:08:58 -05:00
Mark Backman
16d7fb2c4a Remove default 5 min exp time for created rooms, add docstrings 2024-12-13 12:02:26 -05:00
Aleix Conchillo Flaqué
643160c960 Merge pull request #858 from pipecat-ai/aleix/fastpitch-timeout
riva: make sure we don't block on fastpitch
2024-12-13 08:20:38 -08:00
Aleix Conchillo Flaqué
aac907aadb riva: make sure we don't block on fastpitch 2024-12-13 07:32:51 -08:00
Aleix Conchillo Flaqué
8f24ca4e58 Merge pull request #857 from pipecat-ai/aleix/fix-riva-tts-audio-stuttering
riva: fix FastPitchTTSService audio stuttering
2024-12-12 22:20:00 -08:00
Aleix Conchillo Flaqué
420ce16807 riva: fix FastPitchTTSService audio stuttering 2024-12-12 22:15:44 -08:00
Aleix Conchillo Flaqué
2b8c35c681 Merge pull request #855 from pipecat-ai/aleix/transport-services-disconnect-fixes
transports(services): disconnect client first
2024-12-12 19:40:03 -08:00
Mark Backman
3d96369193 Merge pull request #852 from pipecat-ai/mb/readme-docs-badge
Add docs badge to README
2024-12-12 22:21:41 -05:00
Aleix Conchillo Flaqué
d44b36a07c Merge pull request #854 from pipecat-ai/aleix/aiservice-add-missing-process-frame
AIService: add missing super().process_frame()
2024-12-12 19:10:21 -08:00
Aleix Conchillo Flaqué
ccc96994e9 pyproject: update livekit 2024-12-12 19:09:36 -08:00
Aleix Conchillo Flaqué
337d421338 transports: disconnect client first 2024-12-12 19:09:06 -08:00
Aleix Conchillo Flaqué
752720b4d5 AIService: add missing super().process_frame() 2024-12-12 17:25:38 -08:00
Aleix Conchillo Flaqué
f8e69cfa00 Merge pull request #853 from pipecat-ai/revert-849-aleix/no-need-for-super-process-frame
Revert "no longer necessary to call super().process_frame(frame, direction)"
2024-12-12 17:21:20 -08:00
Aleix Conchillo Flaqué
6d11911d83 Revert "no longer necessary to call super().process_frame(frame, direction)" 2024-12-12 17:03:40 -08:00
Mark Backman
ec6e71c8ea Add docs badge to README 2024-12-12 18:08:24 -05:00
Aleix Conchillo Flaqué
10f854aeba Merge pull request #846 from pipecat-ai/aleix/base-output-transport-audio-sync
transport(output): fix non-audio frames sync after audio frames
2024-12-12 14:29:42 -08:00
Aleix Conchillo Flaqué
d8caf007b0 Merge pull request #849 from pipecat-ai/aleix/no-need-for-super-process-frame
no longer necessary to call super().process_frame(frame, direction)
2024-12-12 14:29:10 -08:00
Mark Backman
26ea64ef12 Merge pull request #850 from pipecat-ai/mb/fix-docs-builds
Fix docs generation build issues
2024-12-12 17:27:00 -05:00
Mark Backman
19c178ebc7 Fix docs generation build issues 2024-12-12 17:18:04 -05:00
Aleix Conchillo Flaqué
3c3fd67d96 no longer necessary to call super().process_frame(frame, direction) 2024-12-12 13:03:41 -08:00
Mark Backman
7bbc0ee8df Merge pull request #845 from pipecat-ai/mb/more-docs-updates
Docs auto-gen improvements
2024-12-12 15:42:34 -05:00
Mark Backman
67804edce6 Remove formats from .readthedocs.yaml 2024-12-12 15:41:11 -05:00
Mark Backman
ec082d0888 Remove deprecated VAD module 2024-12-12 15:32:38 -05:00
Mark Backman
8631d71d5a Fix more missing docs 2024-12-12 15:16:37 -05:00
Aleix Conchillo Flaqué
db7eaed980 transport(output): fix non-audio frames sync after audio frames 2024-12-12 10:56:02 -08:00
Mark Backman
44c5220104 Update README 2024-12-12 13:28:05 -05:00
Mark Backman
276fd86ecb More fixes for missing packages 2024-12-12 13:25:13 -05:00
Mark Backman
2de0737056 Merge pull request #844 from pipecat-ai/cb-gemini-example-fix
Update requirements.txt for simple-chatbot
2024-12-12 11:18:58 -05:00
Mark Backman
b5d5a0e923 Add special cases for displaying some names 2024-12-12 11:15:36 -05:00
Mark Backman
f3ed12c30b Clean up module and package display names 2024-12-12 11:11:53 -05:00
Mark Backman
e14399727b Add README and build script for local testing 2024-12-12 11:06:53 -05:00
Mark Backman
414dcf9810 Improve TOC in sidebar, fix missing services 2024-12-12 11:06:09 -05:00
chadbailey59
88d530e840 Update requirements.txt for simple-chatbot
The gemini example doesn't actually work from a fresh install, because the requirements.txt file doesn't include google :)
2024-12-12 09:31:15 -06:00
Aleix Conchillo Flaqué
af821d8e95 Merge pull request #841 from pipecat-ai/aleix/aws-to-polly
polly: renamed AWSTTSService to PollyTTSService
2024-12-11 18:13:02 -08:00
Aleix Conchillo Flaqué
133e1aff6c polly: renamed AWSTTSService to PollyTTSService 2024-12-11 17:56:43 -08:00
Aleix Conchillo Flaqué
def415f476 Merge pull request #840 from pipecat-ai/aleix/11labs-playht-more-languages
tts: support more languages in playht and elevenlabs
2024-12-11 14:58:03 -08:00
Aleix Conchillo Flaqué
a34d16dabe tts: support more languages in playht and elevenlabs 2024-12-11 14:53:24 -08:00
Mark Backman
ec7260b237 Merge pull request #839 from pipecat-ai/mb/bump-versions
Bump openai and aiohttp package versions
2024-12-11 17:06:15 -05:00
Mark Backman
96c6c71d5b Bump openai and aiohttp package versions 2024-12-11 16:48:36 -05:00
33 changed files with 899 additions and 485 deletions

View File

@@ -1,47 +0,0 @@
name: Generate API Documentation
on:
release:
types: [published] # Run on new release
workflow_dispatch: # Manual trigger
jobs:
update-docs:
runs-on: ubuntu-latest
permissions:
contents: write
pull-requests: write
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r docs/api/requirements.txt
pip install .
- name: Generate API documentation
run: |
cd docs/api
python generate_docs.py
- name: Create Pull Request
uses: peter-evans/create-pull-request@v5
with:
commit-message: 'docs: Update API documentation'
title: 'docs: Update API documentation'
body: |
Automated PR to update API documentation.
- Generated using `docs/api/generate_docs.py`
- Triggered by: ${{ github.event_name }}
branch: update-api-docs
delete-branch: true
labels: |
documentation

View File

@@ -4,12 +4,33 @@ build:
os: ubuntu-22.04
tools:
python: '3.12'
apt_packages:
- portaudio19-dev
- python3-dev
- libasound2-dev
jobs:
pre_build:
- python -m pip install --upgrade pip
- pip install wheel setuptools
post_build:
- echo "Build completed"
sphinx:
configuration: docs/api/conf.py
fail_on_warning: false
python:
install:
- requirements: docs/api/requirements.txt
- method: pip
path: .
search:
ranking:
api/*: 5
getting-started/*: 4
guides/*: 3
submodules:
include: all
recursive: true

View File

@@ -5,6 +5,34 @@ All notable changes to **Pipecat** will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
### Added
- Add support for more languages to ElevenLabs (Arabic, Croatian, Filipino,
Tamil) and PlayHT (Afrikans, Albanian, Amharic, Arabic, Bengali, Croatian,
Galician, Hebrew, Mandarin, Serbian, Tagalog, Urdu, Xhosa).
### Changed
- Changed: Room expiration (`exp`) in `DailyRoomProperties` is now optional
(None) by default instead of automatically setting a 5-minute expiration
time. You must explicitly set expiration time if desired.
### Deprecated
- `AWSTTSService` is now deprecated, use `PollyTTSService` instead.
### Fixed
- Fixed an audio stuttering issue in `FastPitchTTSService`.
- Fixed a `BaseOutputTransport` issue that was causing non-audio frames being
processed before the previous audio frames were played. This will allow, for
example, sending a frame `A` after a `TTSSpeakFrame` and the frame `A` will
only be pushed downstream after the audio generated from `TTSSpeakFrame` has
been spoken.
## [0.0.50] - 2024-12-11
### Added

View File

@@ -2,7 +2,7 @@
 <img alt="pipecat" width="300px" height="auto" src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/pipecat.png">
</div></h1>
[![PyPI](https://img.shields.io/pypi/v/pipecat-ai)](https://pypi.org/project/pipecat-ai) [![Discord](https://img.shields.io/discord/1239284677165056021)](https://discord.gg/pipecat) <a href="https://app.commanddash.io/agent/github_pipecat-ai_pipecat"><img src="https://img.shields.io/badge/AI-Code%20Agent-EB9FDA"></a>
[![PyPI](https://img.shields.io/pypi/v/pipecat-ai)](https://pypi.org/project/pipecat-ai) [![Docs](https://img.shields.io/badge/Documentation-blue)](https://docs.pipecat.ai) [![Discord](https://img.shields.io/discord/1239284677165056021)](https://discord.gg/pipecat) <a href="https://app.commanddash.io/agent/github_pipecat-ai_pipecat"><img src="https://img.shields.io/badge/AI-Code%20Agent-EB9FDA"></a>
Pipecat is an open source Python framework for building voice and multimodal conversational agents. It handles the complex orchestration of AI services, network transport, audio processing, and multimodal interactions, letting you focus on creating engaging experiences.
@@ -55,19 +55,19 @@ pip install "pipecat-ai[option,...]"
Available options include:
| Category | Services | Install Command Example |
| ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------- |
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/api-reference/services/stt/assemblyai), [Azure](https://docs.pipecat.ai/api-reference/services/stt/azure), [Deepgram](https://docs.pipecat.ai/api-reference/services/stt/deepgram), [Gladia](https://docs.pipecat.ai/api-reference/services/stt/gladia), [Whisper](https://docs.pipecat.ai/api-reference/services/stt/whisper) | `pip install "pipecat-ai[deepgram]"` |
| LLMs | [Anthropic](https://docs.pipecat.ai/api-reference/services/llm/anthropic), [Azure](https://docs.pipecat.ai/api-reference/services/llm/azure), [Fireworks AI](https://docs.pipecat.ai/api-reference/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/api-reference/services/llm/gemini), [Grok](https://docs.pipecat.ai/api-reference/services/llm/grok), [Groq](https://docs.pipecat.ai/api-reference/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/api-reference/services/llm/nim), [Ollama](https://docs.pipecat.ai/api-reference/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/api-reference/services/llm/openai), [Together AI](https://docs.pipecat.ai/api-reference/services/llm/together) | `pip install "pipecat-ai[openai]"` |
| Text-to-Speech | [AWS](https://docs.pipecat.ai/api-reference/services/tts/aws), [Azure](https://docs.pipecat.ai/api-reference/services/tts/azure), [Cartesia](https://docs.pipecat.ai/api-reference/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/api-reference/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/api-reference/services/tts/elevenlabs), [Google](https://docs.pipecat.ai/api-reference/services/tts/google), [LMNT](https://docs.pipecat.ai/api-reference/services/tts/lmnt), [OpenAI](https://docs.pipecat.ai/api-reference/services/tts/openai), [PlayHT](https://docs.pipecat.ai/api-reference/services/tts/playht), [Rime](https://docs.pipecat.ai/api-reference/services/tts/rime), [XTTS](https://docs.pipecat.ai/api-reference/services/tts/xtts) | `pip install "pipecat-ai[cartesia]"` |
| Speech-to-Speech | [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/api-reference/services/s2s/openai) | `pip install "pipecat-ai[openai]"` |
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/api-reference/services/transport/daily), WebSocket, Local | `pip install "pipecat-ai[daily]"` |
| Video | [Tavus](https://docs.pipecat.ai/api-reference/services/video/tavus), [Simli](https://docs.pipecat.ai/api-reference/services/video/simli) | `pip install "pipecat-ai[tavus,simli]"` |
| Vision & Image | [Moondream](https://docs.pipecat.ai/api-reference/services/vision/moondream), [fal](https://docs.pipecat.ai/api-reference/services/image-generation/fal) | `pip install "pipecat-ai[moondream]"` |
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/api-reference/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/api-reference/utilities/audio/krisp-filter), [Noisereduce](https://docs.pipecat.ai/api-reference/utilities/audio/noisereduce-filter) | `pip install "pipecat-ai[silero]"` |
| Analytics & Metrics | [Canonical AI](https://docs.pipecat.ai/api-reference/services/analytics/canonical), [Sentry](https://docs.pipecat.ai/api-reference/services/analytics/sentry) | `pip install "pipecat-ai[canonical]"` |
| Category | Services | Install Command Example |
| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------- |
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) | `pip install "pipecat-ai[deepgram]"` |
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [Together AI](https://docs.pipecat.ai/server/services/llm/together) | `pip install "pipecat-ai[openai]"` |
| Text-to-Speech | [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Google](https://docs.pipecat.ai/server/services/tts/google), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) | `pip install "pipecat-ai[cartesia]"` |
| Speech-to-Speech | [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) | `pip install "pipecat-ai[openai]"` |
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), WebSocket, Local | `pip install "pipecat-ai[daily]"` |
| Video | [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) | `pip install "pipecat-ai[tavus,simli]"` |
| Vision & Image | [Moondream](https://docs.pipecat.ai/server/services/vision/moondream), [fal](https://docs.pipecat.ai/server/services/image-generation/fal) | `pip install "pipecat-ai[moondream]"` |
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) | `pip install "pipecat-ai[silero]"` |
| Analytics & Metrics | [Canonical AI](https://docs.pipecat.ai/server/services/analytics/canonical), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) | `pip install "pipecat-ai[canonical]"` |
📚 [View full services documentation →](https://docs.pipecat.ai/api-reference/services/supported-services)
📚 [View full services documentation →](https://docs.pipecat.ai/server/services/supported-services)
## Code examples

View File

@@ -6,3 +6,4 @@ pytest~=8.3.2
ruff~=0.6.7
setuptools~=72.2.0
setuptools_scm~=8.1.0
python-dotenv~=1.0.1

109
docs/api/README.md Normal file
View File

@@ -0,0 +1,109 @@
# Pipecat Documentation
This directory contains the source files for auto-generating Pipecat's server API reference documentation.
## Setup
1. Install documentation dependencies:
```bash
pip install -r requirements.txt
```
2. Make the build scripts executable:
```bash
chmod +x build-docs.sh rtd-test.py
```
## Building Documentation
From this directory, you can build the documentation in several ways:
### Local Build
```bash
# Using the build script (automatically opens docs when done)
./build-docs.sh
# Or directly with sphinx-build
sphinx-build -b html . _build/html -W --keep-going
```
### ReadTheDocs Test Build
To test the documentation build process exactly as it would run on ReadTheDocs:
```bash
./rtd-test.py
```
This script:
- Creates a fresh virtual environment
- Installs all dependencies as specified in requirements files
- Handles conflicting dependencies (like grpcio versions for Riva and PlayHT)
- Builds the documentation in an isolated environment
- Provides detailed logging of the build process
Use this script to verify your documentation will build correctly on ReadTheDocs before pushing changes.
## Viewing Documentation
The built documentation will be available at `_build/html/index.html`. To open:
```bash
# On MacOS
open _build/html/index.html
# On Linux
xdg-open _build/html/index.html
# On Windows
start _build/html/index.html
```
## Directory Structure
```
.
├── api/ # Auto-generated API documentation
├── _build/ # Built documentation
├── _static/ # Static files (images, css, etc.)
├── conf.py # Sphinx configuration
├── index.rst # Main documentation entry point
├── requirements-base.txt # Base documentation dependencies
├── requirements-riva.txt # Riva-specific dependencies
├── requirements-playht.txt # PlayHT-specific dependencies
├── build-docs.sh # Local build script
└── rtd-test.py # ReadTheDocs test build script
```
## Notes
- Documentation is auto-generated from Python docstrings
- Service modules are automatically detected and included
- The build process matches our ReadTheDocs configuration
- Warnings are treated as errors (-W flag) to maintain consistency
- The --keep-going flag ensures all errors are reported
- Dependencies are split into multiple requirements files to handle version conflicts
## Troubleshooting
If you encounter missing service modules:
1. Verify the service is installed with its extras: `pip install pipecat-ai[service-name]`
2. Check the build logs for import errors
3. Ensure the service module is properly initialized in the package
4. Run `./rtd-test.py` to test in an isolated environment matching ReadTheDocs
For dependency conflicts:
1. Check the requirements files for version specifications
2. Use `rtd-test.py` to verify dependency resolution
3. Consider adding service-specific requirements files if needed
For more information:
- [ReadTheDocs Configuration](.readthedocs.yaml)
- [Sphinx Documentation](https://www.sphinx-doc.org/)

10
docs/api/build-docs.sh Executable file
View File

@@ -0,0 +1,10 @@
#!/bin/bash
# Clean previous build
rm -rf _build
# Build docs matching ReadTheDocs configuration
sphinx-build -b html -d _build/doctrees . _build/html -W --keep-going
# Open docs (MacOS)
open _build/html/index.html

View File

@@ -1,6 +1,11 @@
import logging
import sys
from pathlib import Path
# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger("sphinx-build")
# Add source directory to path
docs_dir = Path(__file__).parent
project_root = docs_dir.parent.parent
@@ -32,13 +37,150 @@ autodoc_default_options = {
"undoc-members": True,
"exclude-members": "__weakref__",
"no-index": True,
"show-inheritance": True,
}
# Mock imports for optional dependencies
autodoc_mock_imports = [
"riva",
"livekit",
"pyht", # Base PlayHT package
"pyht.async_client", # PlayHT specific imports
"pyht.client",
"pyht.protos",
"pyht.protos.api_pb2",
"pipecat_ai_playht", # PlayHT wrapper
"anthropic",
"assemblyai",
"boto3",
"azure",
"cartesia",
"deepgram",
"elevenlabs",
"fal",
"gladia",
"google",
"krisp",
"langchain",
"lmnt",
"noisereduce",
"openai",
"openpipe",
"simli",
"soundfile",
# Existing mocks
"pipecat_ai_krisp",
"pyaudio",
"_tkinter",
"tkinter",
"daily",
"daily_python",
"pydantic.BaseModel",
"pydantic.Field",
"pydantic._internal._model_construction",
"pydantic._internal._fields",
]
# HTML output settings
html_theme = "sphinx_rtd_theme"
html_static_path = ["_static"]
autodoc_typehints = "description"
html_show_sphinx = False # Remove "Built with Sphinx"
html_show_sphinx = False
def verify_modules():
"""Verify that required modules are available."""
required_modules = {
"services": [
"assemblyai",
"aws",
"cartesia",
"deepgram",
"google",
"lmnt",
"riva",
"simli",
],
"serializers": ["livekit"],
"vad": ["silero", "vad_analyzer"],
"transports": {
"services": ["daily", "livekit"],
"local": ["audio", "tk"],
"network": ["fastapi_websocket", "websocket_server"],
},
}
missing = []
for category, modules in required_modules.items():
if isinstance(modules, dict):
# Handle nested structure
for subcategory, submodules in modules.items():
for module in submodules:
try:
__import__(f"pipecat.{category}.{subcategory}.{module}")
logger.info(
f"Successfully imported pipecat.{category}.{subcategory}.{module}"
)
except (ImportError, TypeError, NameError) as e:
missing.append(f"pipecat.{category}.{subcategory}.{module}")
logger.warning(
f"Optional module not available: pipecat.{category}.{subcategory}.{module} - {str(e)}"
)
else:
# Handle flat structure
for module in modules:
try:
__import__(f"pipecat.{category}.{module}")
logger.info(f"Successfully imported pipecat.{category}.{module}")
except (ImportError, TypeError, NameError) as e:
missing.append(f"pipecat.{category}.{module}")
logger.warning(
f"Optional module not available: pipecat.{category}.{module} - {str(e)}"
)
if missing:
logger.warning(f"Some optional modules are not available: {missing}")
def clean_title(title: str) -> str:
"""Automatically clean module titles."""
# Remove everything after space (like 'module', 'processor', etc.)
title = title.split(" ")[0]
# Get the last part of the dot-separated path
parts = title.split(".")
title = parts[-1]
# Special cases for service names and common acronyms
special_cases = {
"ai": "AI",
"aws": "AWS",
"api": "API",
"vad": "VAD",
"assemblyai": "AssemblyAI",
"deepgram": "Deepgram",
"elevenlabs": "ElevenLabs",
"openai": "OpenAI",
"openpipe": "OpenPipe",
"playht": "PlayHT",
"xtts": "XTTS",
"lmnt": "LMNT",
}
# Check if the entire title is a special case
if title.lower() in special_cases:
return special_cases[title.lower()]
# Otherwise, capitalize each word
words = title.split("_")
cleaned_words = []
for word in words:
if word.lower() in special_cases:
cleaned_words.append(special_cases[word.lower()])
else:
cleaned_words.append(word.capitalize())
return " ".join(cleaned_words)
def setup(app):
@@ -55,24 +197,56 @@ def setup(app):
import shutil
shutil.rmtree(output_dir)
logger.info(f"Cleaned existing documentation in {output_dir}")
print(f"Generating API documentation...")
print(f"Output directory: {output_dir}")
print(f"Source directory: {source_dir}")
logger.info(f"Generating API documentation...")
logger.info(f"Output directory: {output_dir}")
logger.info(f"Source directory: {source_dir}")
# Similar exclusions as in your generate_docs.py
excludes = [
str(project_root / "src/pipecat/pipeline/to_be_updated"),
str(project_root / "src/pipecat/processors/gstreamer"),
str(project_root / "src/pipecat/transports/network"),
str(project_root / "src/pipecat/transports/services"),
str(project_root / "src/pipecat/transports/local"),
str(project_root / "src/pipecat/services/to_be_updated"),
str(project_root / "src/pipecat/vad"), # deprecated
"**/test_*.py",
"**/tests/*.py",
]
try:
main(["-f", "-e", "-M", "--no-toc", "-o", output_dir, source_dir] + excludes)
print("API documentation generated successfully!")
main(
[
"-f", # Force overwriting
"-e", # Don't generate empty files
"-M", # Put module documentation before submodule documentation
"--no-toc", # Don't create a table of contents file
"--separate", # Put documentation for each module in its own page
"--module-first", # Module documentation before submodule documentation
"--implicit-namespaces", # Added: Handle implicit namespace packages
"-o",
output_dir,
source_dir,
]
+ excludes
)
logger.info("API documentation generated successfully!")
# Process generated RST files to update titles
for rst_file in Path(output_dir).glob("**/*.rst"): # Changed to recursive glob
content = rst_file.read_text()
lines = content.split("\n")
# Find and clean up the title
if lines and "=" in lines[1]: # Title is typically the first line
old_title = lines[0]
new_title = clean_title(old_title)
content = content.replace(old_title, new_title)
rst_file.write_text(content)
logger.info(f"Updated title: {old_title} -> {new_title}")
except Exception as e:
print(f"Error generating API documentation: {e}")
logger.error(f"Error generating API documentation: {e}", exc_info=True)
# Run module verification
verify_modules()

View File

@@ -1,104 +0,0 @@
#!/usr/bin/env python3
import shutil
import subprocess
from pathlib import Path
def run_command(command: list[str]) -> None:
"""Run a command and exit if it fails."""
print(f"Running: {' '.join(command)}")
try:
subprocess.run(command, check=True)
except subprocess.CalledProcessError as e:
print(f"Warning: Command failed: {' '.join(command)}")
print(f"Error: {e}")
def main():
docs_dir = Path(__file__).parent
project_root = docs_dir.parent.parent
# Install documentation requirements
requirements_file = docs_dir / "requirements.txt"
run_command(["pip", "install", "-r", str(requirements_file)])
# Install from project root, not docs directory
run_command(["pip", "install", "-e", str(project_root)])
# Install all service dependencies
services = [
"anthropic",
"assemblyai",
"aws",
"azure",
"canonical",
"cartesia",
# "daily",
"deepgram",
"elevenlabs",
"fal",
"fireworks",
"gladia",
"google",
"grok",
"groq",
"langchain",
# "livekit",
"lmnt",
"moondream",
"nim",
"noisereduce",
"openai",
"openpipe",
"playht",
"silero",
"soundfile",
"websocket",
"whisper",
]
extras = ",".join(services)
try:
run_command(["pip", "install", "-e", f"{str(project_root)}[{extras}]"])
except Exception as e:
print(f"Warning: Some dependencies failed to install: {e}")
# Clean old files
api_dir = docs_dir / "api"
build_dir = docs_dir / "_build"
for dir in [api_dir, build_dir]:
if dir.exists():
shutil.rmtree(dir)
# Generate API documentation
run_command(
[
"sphinx-apidoc",
"-f", # Force overwrite
"-e", # Put each module on its own page
"-M", # Put module documentation before submodule
"--no-toc", # Don't generate modules.rst (cleaner structure)
"-o",
str(api_dir), # Output directory
str(project_root / "src/pipecat"),
# Exclude problematic files and directories
str(project_root / "src/pipecat/processors/gstreamer"), # Optional gstreamer
str(project_root / "src/pipecat/transports/network"), # Pydantic issues
str(project_root / "src/pipecat/transports/services"), # Pydantic issues
str(project_root / "src/pipecat/transports/local"), # Optional dependencies
str(project_root / "src/pipecat/services/to_be_updated"), # Exclude to_be_updated
"**/test_*.py", # Test files
"**/tests/*.py", # Test files
]
)
# Build HTML documentation
run_command(["sphinx-build", "-b", "html", str(docs_dir), str(build_dir / "html")])
print("\nDocumentation generated successfully!")
print(f"HTML docs: {build_dir}/html/index.html")
if __name__ == "__main__":
main()

View File

@@ -13,61 +13,61 @@ Quick Links
* `GitHub Repository <https://github.com/pipecat-ai/pipecat>`_
* `Website <https://pipecat.ai>`_
API Reference
-------------
Core Components
~~~~~~~~~~~~~~~
* :mod:`pipecat.frames`
* :mod:`pipecat.processors`
* :mod:`pipecat.pipeline`
* :mod:`Frames <pipecat.frames>`
* :mod:`Processors <pipecat.processors>`
* :mod:`Pipeline <pipecat.pipeline>`
Audio Processing
~~~~~~~~~~~~~~~~
* :mod:`pipecat.audio`
* :mod:`pipecat.vad`
* :mod:`Audio <pipecat.audio>`
Services
~~~~~~~~
* :mod:`pipecat.services`
* :mod:`Services <pipecat.services>`
Transport & Serialization
~~~~~~~~~~~~~~~~~~~~~~~~~
* :mod:`pipecat.transports`
* :mod:`pipecat.serializers`
* :mod:`Transports <pipecat.transports>`
* :mod:`Local <pipecat.transports.local>`
* :mod:`Network <pipecat.transports.network>`
* :mod:`Services <pipecat.transports.services>`
* :mod:`Serializers <pipecat.serializers>`
Utilities
~~~~~~~~~
* :mod:`pipecat.clocks`
* :mod:`pipecat.metrics`
* :mod:`pipecat.sync`
* :mod:`pipecat.transcriptions`
* :mod:`pipecat.utils`
* :mod:`Clocks <pipecat.clocks>`
* :mod:`Metrics <pipecat.metrics>`
* :mod:`Sync <pipecat.sync>`
* :mod:`Transcriptions <pipecat.transcriptions>`
* :mod:`Utils <pipecat.utils>`
.. toctree::
:maxdepth: 2
:maxdepth: 3
:caption: API Reference
:hidden:
api/pipecat.audio
api/pipecat.clocks
api/pipecat.frames
api/pipecat.metrics
api/pipecat.pipeline
api/pipecat.processors
api/pipecat.serializers
api/pipecat.services
api/pipecat.sync
api/pipecat.transcriptions
api/pipecat.transports
api/pipecat.utils
api/pipecat.vad
Audio <api/pipecat.audio>
Clocks <api/pipecat.clocks>
Frames <api/pipecat.frames>
Metrics <api/pipecat.metrics>
Pipeline <api/pipecat.pipeline>
Processors <api/pipecat.processors>
Serializers <api/pipecat.serializers>
Services <api/pipecat.services>
Sync <api/pipecat.sync>
Transcriptions <api/pipecat.transcriptions>
Transports <api/pipecat.transports>
Utils <api/pipecat.utils>
Indices and tables
==================

View File

@@ -1,6 +1,40 @@
# Sphinx dependencies
sphinx>=8.1.3
sphinx-rtd-theme
sphinx-markdown-builder
sphinx-autodoc-typehints
toml
pipecat-ai[anthropic,assemblyai,aws,azure,canonical,cartesia,deepgram,elevenlabs,fal,fireworks,gladia,google,grok,groq,krisp,langchain,lmnt,moondream,nim,noisereduce,openai,openpipe,playht,silero,soundfile,websocket,whisper]
# Install all extras individually to ensure they're properly resolved
pipecat-ai[anthropic]
pipecat-ai[assemblyai]
pipecat-ai[aws]
pipecat-ai[azure]
pipecat-ai[canonical]
pipecat-ai[cartesia]
pipecat-ai[daily]
pipecat-ai[deepgram]
pipecat-ai[elevenlabs]
pipecat-ai[fal]
pipecat-ai[fireworks]
pipecat-ai[gladia]
pipecat-ai[google]
pipecat-ai[grok]
pipecat-ai[groq]
# pipecat-ai[krisp] # Mocked instead
pipecat-ai[langchain]
pipecat-ai[livekit]
pipecat-ai[lmnt]
pipecat-ai[local]
pipecat-ai[moondream]
pipecat-ai[nim]
pipecat-ai[noisereduce]
pipecat-ai[openai]
# pipecat-ai[openpipe]
# pipecat-ai[playht] # Mocked due to grpcio conflict with riva
pipecat-ai[riva]
pipecat-ai[silero]
pipecat-ai[simli]
pipecat-ai[soundfile]
pipecat-ai[websocket]
pipecat-ai[whisper]

38
docs/api/rtd-test.sh Executable file
View File

@@ -0,0 +1,38 @@
#!/bin/bash
set -e
# Configuration
DOCS_DIR=$(pwd)
PROJECT_ROOT=$(cd ../../ && pwd)
TEST_DIR="/tmp/rtd-test-$(date +%Y%m%d_%H%M%S)"
echo "Creating test directory: $TEST_DIR"
mkdir -p "$TEST_DIR"
cd "$TEST_DIR"
# Create virtual environment
python -m venv venv
source venv/bin/activate
echo "Installing build dependencies..."
pip install --upgrade pip wheel setuptools
echo "Installing documentation dependencies..."
pip install -r "$DOCS_DIR/requirements.txt"
echo "Building documentation..."
cd "$DOCS_DIR"
sphinx-build -b html . "_build/html"
echo "Build complete. Check _build/html directory for output."
# Print summary
echo -e "\n=== Build Summary ==="
echo "Documentation: $DOCS_DIR/_build/html"
echo "Test environment: $TEST_DIR"
echo -e "\nTo view the documentation:"
echo "open $DOCS_DIR/_build/html/index.html"
# Print installed packages for verification
echo -e "\n=== Installed Packages ==="
pip freeze | grep -E "sphinx|pipecat"

View File

@@ -19,7 +19,7 @@ from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.aws import AWSTTSService
from pipecat.services.aws import PollyTTSService
from pipecat.services.deepgram import DeepgramSTTService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
@@ -48,12 +48,12 @@ async def main():
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
tts = AWSTTSService(
tts = PollyTTSService(
api_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
region=os.getenv("AWS_REGION"),
voice_id="Amy",
params=AWSTTSService.InputParams(engine="neural", language="en-GB", rate="1.05"),
params=PollyTTSService.InputParams(engine="neural", language="en-GB", rate="1.05"),
)
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")

View File

@@ -24,12 +24,12 @@ This repository demonstrates a simple AI chatbot with real-time audio/video inte
2. **JavaScript**
- Basic implementation using [Pipecat JavaScript SDK](https://docs.pipecat.ai/client/reference/js/introduction)
- Basic implementation using [Pipecat JavaScript SDK](https://docs.pipecat.ai/client/js/introduction)
- No framework dependencies
- Good for learning the fundamentals
3. **React**
- Basic impelmentation using [Pipecat React SDK](https://docs.pipecat.ai/client/reference/react/introduction)
- Basic impelmentation using [Pipecat React SDK](https://docs.pipecat.ai/client/react/introduction)
- Demonstrates the basic client principles with Pipecat React
## Quick Start

View File

@@ -1,6 +1,6 @@
# JavaScript Implementation
Basic implementation using the [Pipecat JavaScript SDK](https://docs.pipecat.ai/client/reference/js/introduction).
Basic implementation using the [Pipecat JavaScript SDK](https://docs.pipecat.ai/client/js/introduction).
## Setup

View File

@@ -1,6 +1,6 @@
# React Implementation
Basic implementation using the [Pipecat React SDK](https://docs.pipecat.ai/client/reference/react/introduction).
Basic implementation using the [Pipecat React SDK](https://docs.pipecat.ai/client/react/introduction).
## Setup

View File

@@ -1,4 +1,4 @@
python-dotenv
fastapi[all]
uvicorn
pipecat-ai[daily,elevenlabs,openai,silero]
pipecat-ai[daily,elevenlabs,openai,silero,google]

View File

@@ -1,4 +1,4 @@
DAILY_SAMPLE_ROOM_URL= # Follow instructions here and put your https://YOURDOMAIN.daily.co/YOURROOM (Instructions: https://docs.pipecat.ai/quickstart#preparing-your-environment)
DAILY_SAMPLE_ROOM_URL= # Follow instructions here and put your https://YOURDOMAIN.daily.co/YOURROOM (Instructions: https://docs.pipecat.ai/getting-started/installation)
DAILY_API_KEY= # Create here: https://dashboard.daily.co/developers
OPENAI_API_KEY= # Create here: https://platform.openai.com/docs/overview
CARTESIA_API_KEY= # Create here: https://play.cartesia.ai/console

View File

@@ -20,7 +20,7 @@ classifiers = [
"Topic :: Scientific/Engineering :: Artificial Intelligence"
]
dependencies = [
"aiohttp~=3.11.9",
"aiohttp~=3.11.10",
"loguru~=0.7.2",
"Markdown~=3.7",
"numpy~=1.26.4",
@@ -38,8 +38,9 @@ Website = "https://pipecat.ai"
[project.optional-dependencies]
anthropic = [ "anthropic~=0.40.0" ]
assemblyai = [ "assemblyai~=0.34.0" ]
audio = [ "soundfile~=0.12.1" ]
aws = [ "boto3~=1.35.27" ]
azure = [ "azure-cognitiveservices-speech~=1.41.1", "openai~=1.50.2" ]
azure = [ "azure-cognitiveservices-speech~=1.41.1", "openai~=1.57.2" ]
canonical = [ "aiofiles~=24.1.0" ]
cartesia = [ "cartesia~=1.0.13", "websockets~=13.1" ]
daily = [ "daily-python~=0.13.0" ]
@@ -49,28 +50,28 @@ examples = [ "python-dotenv~=1.0.1", "flask~=3.0.3", "flask_cors~=4.0.1" ]
fal = [ "fal-client~=0.4.1" ]
gladia = [ "websockets~=13.1" ]
google = [ "google-generativeai~=0.8.3", "google-cloud-texttospeech~=2.21.1" ]
grok = [ "openai~=1.50.2" ]
groq = [ "openai~=1.50.2" ]
grok = [ "openai~=1.57.2" ]
groq = [ "openai~=1.57.2" ]
gstreamer = [ "pygobject~=3.48.2" ]
fireworks = [ "openai~=1.50.2" ]
fireworks = [ "openai~=1.57.2" ]
krisp = [ "pipecat-ai-krisp~=0.3.0" ]
langchain = [ "langchain~=0.2.14", "langchain-community~=0.2.12", "langchain-openai~=0.1.20" ]
livekit = [ "livekit~=0.17.5", "livekit-api~=0.7.1", "tenacity~=8.5.0" ]
livekit = [ "livekit~=0.18.2", "livekit-api~=0.8.0", "tenacity~=8.5.0" ]
lmnt = [ "lmnt~=1.1.4" ]
local = [ "pyaudio~=0.2.14" ]
moondream = [ "einops~=0.8.0", "timm~=1.0.8", "transformers~=4.44.0" ]
nim = [ "openai~=1.50.2" ]
nim = [ "openai~=1.57.2" ]
noisereduce = [ "noisereduce~=3.0.3" ]
openai = [ "openai~=1.50.2", "websockets~=13.1", "python-deepcompare~=1.0.1" ]
openai = [ "openai~=1.57.2", "websockets~=13.1", "python-deepcompare~=1.0.1" ]
openpipe = [ "openpipe~=4.38.0" ]
playht = [ "pyht~=0.1.8", "websockets~=13.1" ]
riva = [ "nvidia-riva-client~=2.17.0" ]
silero = [ "onnxruntime~=1.20.1" ]
simli = [ "simli-ai~=0.1.7"]
soundfile = [ "soundfile~=0.12.1" ]
together = [ "openai~=1.50.2" ]
together = [ "openai~=1.57.2" ]
websocket = [ "websockets~=13.1", "fastapi~=0.115.0" ]
whisper = [ "faster-whisper~=1.1.0" ]
simli = [ "simli-ai~=0.1.7"]
[tool.setuptools.packages.find]
# All the following settings are optional:

View File

@@ -4,7 +4,8 @@
# SPDX-License-Identifier: BSD 2-Clause License
#
import audioop
import sys
import numpy as np
import pyloudnorm as pyln
import resampy
@@ -74,21 +75,50 @@ def exp_smoothing(value: float, prev_value: float, factor: float) -> float:
return prev_value + factor * (value - prev_value)
def ulaw_to_pcm(ulaw_bytes: bytes, in_sample_rate: int, out_sample_rate: int):
# Convert μ-law to PCM
in_pcm_bytes = audioop.ulaw2lin(ulaw_bytes, 2)
if sys.version_info >= (3, 13):
try:
import io
# Resample
out_pcm_bytes = audioop.ratecv(in_pcm_bytes, 2, 1, in_sample_rate, out_sample_rate, None)[0]
import soundfile as sf
return out_pcm_bytes
def ulaw_to_pcm(ulaw_bytes: bytes, in_sample_rate: int, out_sample_rate: int) -> bytes:
with io.BytesIO(ulaw_bytes) as buf:
data, _ = sf.read(
buf, channels=1, samplerate=in_sample_rate, format="RAW", subtype="ULAW"
)
if in_sample_rate != out_sample_rate:
data = resampy.resample(data, in_sample_rate, out_sample_rate)
return (data * 32767).astype(np.int16).tobytes()
def pcm_to_ulaw(pcm_bytes: bytes, in_sample_rate: int, out_sample_rate: int) -> bytes:
data = np.frombuffer(pcm_bytes, dtype=np.int16).astype(np.float32) / 32767.0
if in_sample_rate != out_sample_rate:
data = resampy.resample(data, in_sample_rate, out_sample_rate)
with io.BytesIO() as buf:
sf.write(buf, data, out_sample_rate, format="RAW", subtype="ULAW")
return buf.getvalue()
except ImportError:
raise ImportError(
"For Python 3.13+, please install soundfile: pip install pipecat-ai[audio]"
)
def pcm_to_ulaw(pcm_bytes: bytes, in_sample_rate: int, out_sample_rate: int):
# Resample
in_pcm_bytes = audioop.ratecv(pcm_bytes, 2, 1, in_sample_rate, out_sample_rate, None)[0]
else:
import audioop
# Convert PCM to μ-law
ulaw_bytes = audioop.lin2ulaw(in_pcm_bytes, 2)
def ulaw_to_pcm(ulaw_bytes: bytes, in_sample_rate: int, out_sample_rate: int):
# Convert μ-law to PCM
in_pcm_bytes = audioop.ulaw2lin(ulaw_bytes, 2)
return ulaw_bytes
# Resample
out_pcm_bytes = audioop.ratecv(in_pcm_bytes, 2, 1, in_sample_rate, out_sample_rate, None)[0]
return out_pcm_bytes
def pcm_to_ulaw(pcm_bytes: bytes, in_sample_rate: int, out_sample_rate: int):
# Resample
in_pcm_bytes = audioop.ratecv(pcm_bytes, 2, 1, in_sample_rate, out_sample_rate, None)[0]
# Convert PCM to μ-law
ulaw_bytes = audioop.lin2ulaw(in_pcm_bytes, 2)
return ulaw_bytes

View File

@@ -6,13 +6,24 @@
import base64
import json
import sys
from pydantic import BaseModel
from pipecat.audio.utils import ulaw_to_pcm, pcm_to_ulaw
from pipecat.frames.frames import AudioRawFrame, Frame, InputAudioRawFrame, StartInterruptionFrame
from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
if sys.version_info >= (3, 13):
try:
from pipecat.audio.utils import pcm_to_ulaw, ulaw_to_pcm
except ImportError:
raise ImportError(
"Audio processing support required for TwilioFrameSerializer. "
"Please install with: pip install pipecat-ai[audio]"
)
else:
from pipecat.audio.utils import pcm_to_ulaw, ulaw_to_pcm
class TwilioFrameSerializer(FrameSerializer):
class InputParams(BaseModel):

View File

@@ -11,7 +11,7 @@ import json
import re
from asyncio import CancelledError
from dataclasses import dataclass
from typing import Any, Dict, List, Optional
from typing import Any, Dict, List, Optional, Union
from loguru import logger
from PIL import Image
@@ -75,8 +75,7 @@ class AnthropicContextAggregatorPair:
class AnthropicLLMService(LLMService):
"""
This class implements inference with Anthropic's AI models.
"""This class implements inference with Anthropic's AI models.
Can provide a custom client via the `client` kwarg, allowing you to
use `AsyncAnthropicBedrock` and `AsyncAnthropicVertex` clients
@@ -328,7 +327,7 @@ class AnthropicLLMContext(OpenAILLMContext):
tools: list[dict] | None = None,
tool_choice: dict | None = None,
*,
system: str | NotGiven = NOT_GIVEN,
system: Union[str, NotGiven] = NOT_GIVEN,
):
super().__init__(messages=messages, tools=tools, tool_choice=tool_choice)

View File

@@ -1,3 +1,9 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
from typing import AsyncGenerator
@@ -67,8 +73,7 @@ class AssemblyAISTTService(STTService):
await self._disconnect()
async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]:
"""
Process an audio chunk for STT transcription.
"""Process an audio chunk for STT transcription.
This method streams the audio data to AssemblyAI for real-time transcription.
Transcription results are handled asynchronously via callback functions.
@@ -83,8 +88,7 @@ class AssemblyAISTTService(STTService):
yield None
async def _connect(self):
"""
Establish a connection to the AssemblyAI real-time transcription service.
"""Establish a connection to the AssemblyAI real-time transcription service.
This method sets up the necessary callback functions and initializes the
AssemblyAI transcriber.
@@ -95,8 +99,7 @@ class AssemblyAISTTService(STTService):
logger.info(f"{self}: Connected to AssemblyAI")
def on_data(transcript: aai.RealtimeTranscript):
"""
Callback for handling incoming transcription data.
"""Callback for handling incoming transcription data.
This function runs in a separate thread from the main asyncio event loop.
It creates appropriate transcription frames and schedules them to be
@@ -121,8 +124,7 @@ class AssemblyAISTTService(STTService):
asyncio.run_coroutine_threadsafe(self.push_frame(frame), self._loop)
def on_error(error: aai.RealtimeError):
"""
Callback for handling errors from AssemblyAI.
"""Callback for handling errors from AssemblyAI.
Like on_data, this runs in a separate thread and schedules error
handling in the main event loop.

View File

@@ -108,7 +108,7 @@ def language_to_aws_language(language: Language) -> str | None:
return language_map.get(language)
class AWSTTSService(TTSService):
class PollyTTSService(TTSService):
class InputParams(BaseModel):
engine: Optional[str] = None
language: Optional[Language] = Language.EN
@@ -244,3 +244,16 @@ class AWSTTSService(TTSService):
finally:
yield TTSStoppedFrame()
class AWSTTSService(PollyTTSService):
def __init__(self, **kwargs):
super().__init__(**kwargs)
import warnings
with warnings.catch_warnings():
warnings.simplefilter("always")
warnings.warn(
"'AWSTTSService' is deprecated, use 'PollyTTSService' instead.", DeprecationWarning
)

View File

@@ -44,6 +44,7 @@ ElevenLabsOutputFormat = Literal["pcm_16000", "pcm_22050", "pcm_24000", "pcm_441
def language_to_elevenlabs_language(language: Language) -> str | None:
BASE_LANGUAGES = {
Language.AR: "ar",
Language.BG: "bg",
Language.CS: "cs",
Language.DA: "da",
@@ -52,8 +53,10 @@ def language_to_elevenlabs_language(language: Language) -> str | None:
Language.EN: "en",
Language.ES: "es",
Language.FI: "fi",
Language.FIL: "fil",
Language.FR: "fr",
Language.HI: "hi",
Language.HR: "hr",
Language.HU: "hu",
Language.ID: "id",
Language.IT: "it",
@@ -68,6 +71,7 @@ def language_to_elevenlabs_language(language: Language) -> str | None:
Language.RU: "ru",
Language.SK: "sk",
Language.SV: "sv",
Language.TA: "ta",
Language.TR: "tr",
Language.UK: "uk",
Language.VI: "vi",

View File

@@ -47,23 +47,24 @@ except ModuleNotFoundError as e:
def language_to_playht_language(language: Language) -> str | None:
language_map = {
BASE_LANGUAGES = {
Language.AF: "afrikans",
Language.AM: "amharic",
Language.AR: "arabic",
Language.BN: "bengali",
Language.BG: "bulgarian",
Language.CA: "catalan",
Language.CS: "czech",
Language.DA: "danish",
Language.DE: "german",
Language.EL: "greek",
Language.EN: "english",
Language.EN_US: "english",
Language.EN_GB: "english",
Language.EN_AU: "english",
Language.EN_NZ: "english",
Language.EN_IN: "english",
Language.ES: "spanish",
Language.FR: "french",
Language.FR_CA: "french",
Language.EL: "greek",
Language.GL: "galician",
Language.HE: "hebrew",
Language.HI: "hindi",
Language.HR: "croatian",
Language.HU: "hungarian",
Language.ID: "indonesian",
Language.IT: "italian",
@@ -73,14 +74,30 @@ def language_to_playht_language(language: Language) -> str | None:
Language.NL: "dutch",
Language.PL: "polish",
Language.PT: "portuguese",
Language.PT_BR: "portuguese",
Language.RU: "russian",
Language.SQ: "albanian",
Language.SR: "serbian",
Language.SV: "swedish",
Language.TH: "thai",
Language.TL: "tagalog",
Language.TR: "turkish",
Language.UK: "ukrainian",
Language.UR: "urdu",
Language.XH: "xhosa",
Language.ZH: "mandarin",
}
return language_map.get(language)
result = BASE_LANGUAGES.get(language)
# If not found in base languages, try to find the base language from a variant
if not result:
# Convert enum value to string and get the base language part (e.g. es-ES -> es)
lang_str = str(language.value)
base_code = lang_str.split("-")[0].lower()
# Look up the base code in our supported languages
result = base_code if base_code in BASE_LANGUAGES.values() else None
return result
class PlayHTTTSService(TTSService):

View File

@@ -1,3 +1,9 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
from typing import AsyncGenerator, Optional
import aiohttp

View File

@@ -8,7 +8,7 @@ import asyncio
from typing import AsyncGenerator, Optional
from loguru import logger
from pydantic.main import BaseModel
from pydantic import BaseModel
from pipecat.frames.frames import (
CancelFrame,
@@ -35,6 +35,8 @@ except ModuleNotFoundError as e:
)
raise Exception(f"Missing module: {e}")
FASTPITCH_TIMEOUT_SECS = 5
class FastPitchTTSService(TTSService):
class InputParams(BaseModel):
@@ -76,7 +78,10 @@ class FastPitchTTSService(TTSService):
)
async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
def read_audio_responses():
def read_audio_responses(queue: asyncio.Queue):
def add_response(r):
asyncio.run_coroutine_threadsafe(queue.put(r), self.get_event_loop())
try:
responses = self._service.synthesize_online(
text,
@@ -87,26 +92,35 @@ class FastPitchTTSService(TTSService):
quality=self._quality,
custom_dictionary={},
)
return responses
for r in responses:
add_response(r)
add_response(None)
except Exception as e:
logger.error(f"{self} exception: {e}")
return []
add_response(None)
await self.start_ttfb_metrics()
yield TTSStartedFrame()
logger.debug(f"Generating TTS: [{text}]")
responses = await asyncio.to_thread(read_audio_responses)
for resp in responses:
await self.stop_ttfb_metrics()
try:
queue = asyncio.Queue()
await asyncio.to_thread(read_audio_responses, queue)
frame = TTSAudioRawFrame(
audio=resp.audio,
sample_rate=self._sample_rate,
num_channels=1,
)
yield frame
# Wait for the thread to start.
resp = await asyncio.wait_for(queue.get(), FASTPITCH_TIMEOUT_SECS)
while resp:
await self.stop_ttfb_metrics()
frame = TTSAudioRawFrame(
audio=resp.audio,
sample_rate=self._sample_rate,
num_channels=1,
)
yield frame
resp = await asyncio.wait_for(queue.get(), FASTPITCH_TIMEOUT_SECS)
except asyncio.TimeoutError:
logger.error(f"{self} timeout waiting for audio response")
await self.start_tts_usage_metrics(text)
yield TTSStoppedFrame()

View File

@@ -15,7 +15,6 @@ from PIL import Image
from pipecat.audio.vad.vad_analyzer import VAD_STOP_SECS
from pipecat.frames.frames import (
AudioRawFrame,
BotSpeakingFrame,
BotStartedSpeakingFrame,
BotStoppedSpeakingFrame,
@@ -52,9 +51,7 @@ class BaseOutputTransport(FrameProcessor):
self._sink_clock_task = None
# Task to write/send audio and image frames.
self._audio_out_task = None
self._camera_out_task = None
self._running_out_tasks = True
# These are the images that we should send to the camera at our desired
# framerate.
@@ -77,22 +74,31 @@ class BaseOutputTransport(FrameProcessor):
# Start audio mixer.
if self._params.audio_out_mixer:
await self._params.audio_out_mixer.start(self._params.audio_out_sample_rate)
self._create_output_tasks()
self._create_camera_task()
self._create_sink_tasks()
async def stop(self, frame: EndFrame):
# We can't cancel output tasks because there might still be audio
# buffered to be played.
await self._stop_output_tasks()
# Stop audio mixer.
if self._params.audio_out_mixer:
await self._params.audio_out_mixer.stop()
# Let the sink tasks process the queue until they reach this EndFrame.
await self._sink_clock_queue.put((sys.maxsize, frame.id, frame))
await self._sink_queue.put(frame)
# At this point we have enqueued an EndFrame and we need to wait for
# that EndFrame to be processed by the sink tasks. We also need to wait
# for these tasks before cancelling the camera and audio tasks below
# because they might be still rendering.
if self._sink_task:
await self._sink_task
if self._sink_clock_task:
await self._sink_clock_task
# We can now cancel the camera task.
await self._cancel_camera_task()
async def cancel(self, frame: CancelFrame):
# Since we are cancelling everything it doesn't matter if we cancel sink
# tasks first or not.
await self._cancel_sink_tasks()
await self._cancel_output_tasks()
await self._cancel_camera_task()
async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame):
pass
@@ -103,6 +109,12 @@ class BaseOutputTransport(FrameProcessor):
async def write_raw_audio_frames(self, frames: bytes):
pass
async def send_audio(self, frame: OutputAudioRawFrame):
await self.queue_frame(frame, FrameDirection.DOWNSTREAM)
async def send_image(self, frame: OutputImageRawFrame | SpriteFrame):
await self.queue_frame(frame, FrameDirection.DOWNSTREAM)
#
# Frame processor
#
@@ -132,11 +144,8 @@ class BaseOutputTransport(FrameProcessor):
await self.push_frame(frame, direction)
# Control frames.
elif isinstance(frame, EndFrame):
# Process sink tasks.
await self._stop_sink_tasks(frame)
# Now we can stop.
await self.stop(frame)
# We finally push EndFrame down so PipelineTask stops nicely.
# Keep pushing EndFrame down so all the pipeline stops nicely.
await self.push_frame(frame, direction)
elif isinstance(frame, MixerControlFrame) and self._params.audio_out_mixer:
await self._params.audio_out_mixer.process_frame(frame)
@@ -151,30 +160,16 @@ class BaseOutputTransport(FrameProcessor):
else:
await self._sink_queue.put(frame)
async def _stop_sink_tasks(self, frame: EndFrame):
# Let the sink tasks process the queue until they reach this EndFrame.
await self._sink_clock_queue.put((sys.maxsize, frame.id, frame))
await self._sink_queue.put(frame)
# At this point we have enqueued an EndFrame and we need to wait for
# that EndFrame to be processed by the sink tasks. We also need to wait
# for these tasks before cancelling the camera and audio tasks below
# because they might be still rendering.
if self._sink_task:
await self._sink_task
if self._sink_clock_task:
await self._sink_clock_task
async def _handle_interruptions(self, frame: Frame):
if not self.interruptions_allowed:
return
if isinstance(frame, StartInterruptionFrame):
# Cancel sink and output tasks.
# Cancel sink and camera tasks.
await self._cancel_sink_tasks()
await self._cancel_output_tasks()
# Create sink and output tasks.
self._create_output_tasks()
await self._cancel_camera_task()
# Create sink and camera tasks.
self._create_camera_task()
self._create_sink_tasks()
# Let's send a bot stopped speaking if we have to.
await self._bot_stopped_speaking()
@@ -183,19 +178,16 @@ class BaseOutputTransport(FrameProcessor):
if not self._params.audio_out_enabled:
return
if self._params.audio_out_is_live:
await self._audio_out_queue.put(frame)
else:
cls = type(frame)
self._audio_buffer.extend(frame.audio)
while len(self._audio_buffer) >= self._audio_chunk_size:
chunk = cls(
bytes(self._audio_buffer[: self._audio_chunk_size]),
sample_rate=frame.sample_rate,
num_channels=frame.num_channels,
)
await self._sink_queue.put(chunk)
self._audio_buffer = self._audio_buffer[self._audio_chunk_size :]
cls = type(frame)
self._audio_buffer.extend(frame.audio)
while len(self._audio_buffer) >= self._audio_chunk_size:
chunk = cls(
bytes(self._audio_buffer[: self._audio_chunk_size]),
sample_rate=frame.sample_rate,
num_channels=frame.num_channels,
)
await self._sink_queue.put(chunk)
self._audio_buffer = self._audio_buffer[self._audio_chunk_size :]
async def _handle_image(self, frame: OutputImageRawFrame | SpriteFrame):
if not self._params.camera_out_enabled:
@@ -244,9 +236,7 @@ class BaseOutputTransport(FrameProcessor):
self._sink_clock_task = None
async def _sink_frame_handler(self, frame: Frame):
if isinstance(frame, OutputAudioRawFrame):
await self._audio_out_queue.put(frame)
elif isinstance(frame, OutputImageRawFrame):
if isinstance(frame, OutputImageRawFrame):
await self._set_camera_image(frame)
elif isinstance(frame, SpriteFrame):
await self._set_camera_images(frame.images)
@@ -256,19 +246,6 @@ class BaseOutputTransport(FrameProcessor):
elif not isinstance(frame, EndFrame):
await self.push_frame(frame)
async def _sink_task_handler(self):
running = True
while running:
try:
frame = await self._sink_queue.get()
await self._sink_frame_handler(frame)
running = not isinstance(frame, EndFrame)
self._sink_queue.task_done()
except asyncio.CancelledError:
break
except Exception as e:
logger.exception(f"{self} error processing sink queue: {e}")
async def _sink_clock_task_handler(self):
running = True
while running:
@@ -294,48 +271,93 @@ class BaseOutputTransport(FrameProcessor):
except Exception as e:
logger.exception(f"{self} error processing sink clock queue: {e}")
def _next_frame(self) -> AsyncGenerator[Frame, None]:
async def without_mixer(vad_stop_secs: float) -> AsyncGenerator[Frame, None]:
while True:
try:
frame = await asyncio.wait_for(self._sink_queue.get(), timeout=vad_stop_secs)
yield frame
except asyncio.TimeoutError:
# Notify the bot stopped speaking upstream if necessary.
await self._bot_stopped_speaking()
async def with_mixer(vad_stop_secs: float) -> AsyncGenerator[Frame, None]:
last_frame_time = 0
silence = b"\x00" * self._audio_chunk_size
while True:
try:
frame = self._sink_queue.get_nowait()
if isinstance(frame, OutputAudioRawFrame):
frame.audio = await self._params.audio_out_mixer.mix(frame.audio)
last_frame_time = time.time()
yield frame
except asyncio.QueueEmpty:
# Notify the bot stopped speaking upstream if necessary.
diff_time = time.time() - last_frame_time
if diff_time > vad_stop_secs:
await self._bot_stopped_speaking()
# Generate an audio frame with only the mixer's part.
frame = OutputAudioRawFrame(
audio=await self._params.audio_out_mixer.mix(silence),
sample_rate=self._params.audio_out_sample_rate,
num_channels=self._params.audio_out_channels,
)
yield frame
vad_stop_secs = (
self._params.vad_analyzer.params.stop_secs
if self._params.vad_analyzer
else VAD_STOP_SECS
)
if self._params.audio_out_mixer:
return with_mixer(vad_stop_secs)
else:
return without_mixer(vad_stop_secs)
async def _sink_task_handler(self):
try:
async for frame in self._next_frame():
# Notify the bot started speaking upstream if necessary and that
# it's actually speaking.
if isinstance(frame, TTSAudioRawFrame):
await self._bot_started_speaking()
await self.push_frame(BotSpeakingFrame())
await self.push_frame(BotSpeakingFrame(), FrameDirection.UPSTREAM)
# Handle frame.
await self._sink_frame_handler(frame)
# Also, push frame downstream in case anyone else needs it.
await self.push_frame(frame)
# Send audio.
if isinstance(frame, OutputAudioRawFrame):
await self.write_raw_audio_frames(frame.audio)
if isinstance(frame, EndFrame):
break
except asyncio.CancelledError:
pass
except Exception as e:
logger.exception(f"{self} error writing to microphone: {e}")
#
# Output tasks
# Camera task
#
def _create_output_tasks(self):
def _create_camera_task(self):
loop = self.get_event_loop()
# Create camera output queue and task if needed.
if self._params.camera_out_enabled:
self._camera_out_queue = asyncio.Queue()
self._camera_out_task = loop.create_task(self._camera_out_task_handler())
# Create audio output queue and task if needed.
if self._params.audio_out_enabled:
self._audio_out_queue = asyncio.Queue()
self._audio_out_task = loop.create_task(self._audio_out_task_handler())
async def _stop_output_tasks(self):
self._running_out_tasks = False
# Stop camera output task.
if self._camera_out_task and self._params.camera_out_enabled:
await self._camera_out_task
# Stop audio output task.
if self._audio_out_task and self._params.audio_out_enabled:
await self._audio_out_task
async def _cancel_output_tasks(self):
async def _cancel_camera_task(self):
# Stop camera output task.
if self._camera_out_task and self._params.camera_out_enabled:
self._camera_out_task.cancel()
await self._camera_out_task
self._camera_out_task = None
# Stop audio output task.
if self._audio_out_task and self._params.audio_out_enabled:
self._audio_out_task.cancel()
await self._audio_out_task
self._audio_out_task = None
#
# Camera out
#
async def send_image(self, frame: OutputImageRawFrame | SpriteFrame):
await self.queue_frame(frame, FrameDirection.DOWNSTREAM)
async def _draw_image(self, frame: OutputImageRawFrame):
desired_size = (self._params.camera_out_width, self._params.camera_out_height)
@@ -361,7 +383,7 @@ class BaseOutputTransport(FrameProcessor):
self._camera_out_frame_index = 0
self._camera_out_frame_duration = 1 / self._params.camera_out_framerate
self._camera_out_frame_reset = self._camera_out_frame_duration * 5
while self._running_out_tasks:
while True:
try:
if self._params.camera_out_is_live:
await self._camera_out_is_live_handler()
@@ -400,74 +422,3 @@ class BaseOutputTransport(FrameProcessor):
await self._draw_image(image)
self._camera_out_queue.task_done()
#
# Audio out
#
async def send_audio(self, frame: OutputAudioRawFrame):
await self.queue_frame(frame, FrameDirection.DOWNSTREAM)
def _next_audio_frame(self) -> AsyncGenerator[AudioRawFrame, None]:
async def without_mixer(vad_stop_secs: float) -> AsyncGenerator[AudioRawFrame, None]:
while self._running_out_tasks or self._bot_speaking:
try:
frame = await asyncio.wait_for(
self._audio_out_queue.get(), timeout=vad_stop_secs
)
yield frame
except asyncio.TimeoutError:
# Notify the bot stopped speaking upstream if necessary.
await self._bot_stopped_speaking()
async def with_mixer(vad_stop_secs: float) -> AsyncGenerator[AudioRawFrame, None]:
last_frame_time = 0
silence = b"\x00" * self._audio_chunk_size
while self._running_out_tasks or self._bot_speaking:
try:
frame = self._audio_out_queue.get_nowait()
frame.audio = await self._params.audio_out_mixer.mix(frame.audio)
last_frame_time = time.time()
yield frame
except asyncio.QueueEmpty:
# Notify the bot stopped speaking upstream if necessary.
diff_time = time.time() - last_frame_time
if diff_time > vad_stop_secs:
await self._bot_stopped_speaking()
# Generate an audio frame with only the mixer's part.
frame = OutputAudioRawFrame(
audio=await self._params.audio_out_mixer.mix(silence),
sample_rate=self._params.audio_out_sample_rate,
num_channels=self._params.audio_out_channels,
)
yield frame
vad_stop_secs = (
self._params.vad_analyzer.params.stop_secs
if self._params.vad_analyzer
else VAD_STOP_SECS
)
if self._params.audio_out_mixer:
return with_mixer(vad_stop_secs)
else:
return without_mixer(vad_stop_secs)
async def _audio_out_task_handler(self):
try:
async for frame in self._next_audio_frame():
# Notify the bot started speaking upstream if necessary and that
# it's actually speaking.
if isinstance(frame, TTSAudioRawFrame):
await self._bot_started_speaking()
await self.push_frame(BotSpeakingFrame())
await self.push_frame(BotSpeakingFrame(), FrameDirection.UPSTREAM)
# Also, push frame downstream in case anyone else needs it.
await self.push_frame(frame)
# Send audio.
await self.write_raw_audio_frames(frame.audio)
except asyncio.CancelledError:
pass
except Exception as e:
logger.exception(f"{self} error writing to microphone: {e}")

View File

@@ -69,18 +69,18 @@ class WebsocketServerInputTransport(BaseInputTransport):
self._stop_server_event = asyncio.Event()
async def start(self, frame: StartFrame):
self._server_task = self.get_event_loop().create_task(self._server_task_handler())
await super().start(frame)
self._server_task = self.get_event_loop().create_task(self._server_task_handler())
async def stop(self, frame: EndFrame):
await super().stop(frame)
self._stop_server_event.set()
await self._server_task
await super().stop(frame)
async def cancel(self, frame: CancelFrame):
await super().cancel(frame)
self._stop_server_event.set()
await self._server_task
await super().cancel(frame)
async def _server_task_handler(self):
logger.info(f"Starting websocket server on {self._host}:{self._port}")

View File

@@ -694,19 +694,17 @@ class DailyInputTransport(BaseInputTransport):
self._audio_in_task = self.get_event_loop().create_task(self._audio_in_task_handler())
async def stop(self, frame: EndFrame):
# Leave the room.
await self._client.leave()
# Stop audio thread.
if self._audio_in_task and (self._params.audio_in_enabled or self._params.vad_enabled):
self._audio_in_task.cancel()
await self._audio_in_task
self._audio_in_task = None
# Parent stop.
await super().stop(frame)
# Leave the room.
await self._client.leave()
# Stop audio thread.
if self._audio_in_task and (self._params.audio_in_enabled or self._params.vad_enabled):
self._audio_in_task.cancel()
await self._audio_in_task
self._audio_in_task = None
async def cancel(self, frame: CancelFrame):
# Parent stop.
await super().cancel(frame)
# Leave the room.
await self._client.leave()
# Stop audio thread.
@@ -714,6 +712,8 @@ class DailyInputTransport(BaseInputTransport):
self._audio_in_task.cancel()
await self._audio_in_task
self._audio_in_task = None
# Parent stop.
await super().cancel(frame)
async def cleanup(self):
await super().cleanup()
@@ -817,16 +817,16 @@ class DailyOutputTransport(BaseOutputTransport):
await self._client.join()
async def stop(self, frame: EndFrame):
# Leave the room.
await self._client.leave()
# Parent stop.
await super().stop(frame)
# Leave the room.
await self._client.leave()
async def cancel(self, frame: CancelFrame):
# Parent stop.
await super().cancel(frame)
# Leave the room.
await self._client.leave()
# Parent stop.
await super().cancel(frame)
async def cleanup(self):
await super().cleanup()

View File

@@ -4,23 +4,29 @@
# SPDX-License-Identifier: BSD 2-Clause License
#
"""
Daily REST Helpers
"""Daily REST Helpers.
Methods that wrap the Daily API to create rooms, check room URLs, and get meeting tokens.
"""
import aiohttp
import time
from typing import Literal, Optional
from urllib.parse import urlparse
from pydantic import Field, BaseModel, ValidationError
from typing import Literal, Optional
import aiohttp
from pydantic import BaseModel, Field, ValidationError
class DailyRoomSipParams(BaseModel):
"""SIP configuration parameters for Daily rooms.
Attributes:
display_name: Name shown for the SIP endpoint
video: Whether video is enabled for SIP
sip_mode: SIP connection mode, typically 'dial-in'
num_endpoints: Number of allowed SIP endpoints
"""
display_name: str = "sw-sip-dialin"
video: bool = False
sip_mode: str = "dial-in"
@@ -28,7 +34,19 @@ class DailyRoomSipParams(BaseModel):
class DailyRoomProperties(BaseModel, extra="allow"):
exp: float = Field(default_factory=lambda: time.time() + 5 * 60)
"""Properties for configuring a Daily room.
Attributes:
exp: Optional Unix epoch timestamp for room expiration (e.g., time.time() + 300 for 5 minutes)
enable_chat: Whether chat is enabled in the room
enable_emoji_reactions: Whether emoji reactions are enabled
eject_at_room_exp: Whether to remove participants when room expires
enable_dialout: Whether SIP dial-out is enabled
sip: SIP configuration parameters
sip_uri: SIP URI information returned by Daily
"""
exp: Optional[float] = None
enable_chat: bool = False
enable_emoji_reactions: bool = False
eject_at_room_exp: bool = True
@@ -38,6 +56,11 @@ class DailyRoomProperties(BaseModel, extra="allow"):
@property
def sip_endpoint(self) -> str:
"""Get the SIP endpoint URI if available.
Returns:
str: SIP endpoint URI or empty string if not available
"""
if not self.sip_uri:
return ""
else:
@@ -45,12 +68,32 @@ class DailyRoomProperties(BaseModel, extra="allow"):
class DailyRoomParams(BaseModel):
"""Parameters for creating a Daily room.
Attributes:
name: Optional custom name for the room
privacy: Room privacy setting ('private' or 'public')
properties: Room configuration properties
"""
name: Optional[str] = None
privacy: Literal["private", "public"] = "public"
properties: DailyRoomProperties = Field(default_factory=DailyRoomProperties)
class DailyRoomObject(BaseModel):
"""Represents a Daily room returned by the API.
Attributes:
id: Unique room identifier
name: Room name
api_created: Whether room was created via API
privacy: Room privacy setting ('private' or 'public')
url: Full URL for joining the room
created_at: Timestamp of room creation in ISO 8601 format (e.g., "2019-01-26T09:01:22.000Z").
config: Room configuration properties
"""
id: str
name: str
api_created: bool
@@ -61,6 +104,16 @@ class DailyRoomObject(BaseModel):
class DailyRESTHelper:
"""Helper class for interacting with Daily's REST API.
Provides methods for creating, managing, and accessing Daily rooms.
Args:
daily_api_key: Your Daily API key
daily_api_url: Daily API base URL (e.g. "https://api.daily.co/v1")
aiohttp_session: Async HTTP session for making requests
"""
def __init__(
self,
*,
@@ -73,13 +126,40 @@ class DailyRESTHelper:
self.aiohttp_session = aiohttp_session
def get_name_from_url(self, room_url: str) -> str:
"""Extract room name from a Daily room URL.
Args:
room_url: Full Daily room URL
Returns:
str: Room name portion of the URL
"""
return urlparse(room_url).path[1:]
async def get_room_from_url(self, room_url: str) -> DailyRoomObject:
"""Get room details from a Daily room URL.
Args:
room_url: Full Daily room URL
Returns:
DailyRoomObject: DailyRoomObject instance for the room
"""
room_name = self.get_name_from_url(room_url)
return await self._get_room_from_name(room_name)
async def create_room(self, params: DailyRoomParams) -> DailyRoomObject:
"""Create a new Daily room.
Args:
params: Room configuration parameters
Returns:
DailyRoomObject: DailyRoomObject instance for the created room
Raises:
Exception: If room creation fails or response is invalid
"""
headers = {"Authorization": f"Bearer {self.daily_api_key}"}
json = {**params.model_dump(exclude_none=True)}
async with self.aiohttp_session.post(
@@ -101,6 +181,19 @@ class DailyRESTHelper:
async def get_token(
self, room_url: str, expiry_time: float = 60 * 60, owner: bool = True
) -> str:
"""Generate a meeting token for user to join a Daily room.
Args:
room_url: Daily room URL
expiry_time: Token validity duration in seconds (default: 1 hour)
owner: Whether token has owner privileges
Returns:
str: Meeting token
Raises:
Exception: If token generation fails or room URL is missing
"""
if not room_url:
raise Exception(
"No Daily room specified. You must specify a Daily room in order a token to be generated."
@@ -124,10 +217,29 @@ class DailyRESTHelper:
return data["token"]
async def delete_room_by_url(self, room_url: str) -> bool:
"""Delete a room using its URL.
Args:
room_url: Daily room URL
Returns:
bool: True if deletion was successful
"""
room_name = self.get_name_from_url(room_url)
return await self.delete_room_by_name(room_name)
async def delete_room_by_name(self, room_name: str) -> bool:
"""Delete a room using its name.
Args:
room_name: Name of the room to delete
Returns:
bool: True if deletion was successful
Raises:
Exception: If deletion fails (excluding 404 Not Found)
"""
headers = {"Authorization": f"Bearer {self.daily_api_key}"}
async with self.aiohttp_session.delete(
f"{self.daily_api_url}/rooms/{room_name}", headers=headers
@@ -139,6 +251,17 @@ class DailyRESTHelper:
return True
async def _get_room_from_name(self, room_name: str) -> DailyRoomObject:
"""Internal method to get room details by name.
Args:
room_name: Name of the room
Returns:
DailyRoomObject: DailyRoomObject instance for the room
Raises:
Exception: If room is not found or response is invalid
"""
headers = {"Authorization": f"Bearer {self.daily_api_key}"}
async with self.aiohttp_session.get(
f"{self.daily_api_url}/rooms/{room_name}", headers=headers

View File

@@ -324,28 +324,19 @@ class LiveKitInputTransport(BaseInputTransport):
logger.info("LiveKitInputTransport started")
async def stop(self, frame: EndFrame):
await self._client.disconnect()
if self._audio_in_task:
self._audio_in_task.cancel()
try:
await self._audio_in_task
except asyncio.CancelledError:
pass
await self._audio_in_task
await super().stop(frame)
await self._client.disconnect()
logger.info("LiveKitInputTransport stopped")
async def process_frame(self, frame: Frame, direction: FrameDirection):
if isinstance(frame, EndFrame):
await self.stop(frame)
else:
await super().process_frame(frame, direction)
async def cancel(self, frame: CancelFrame):
await super().cancel(frame)
await self._client.disconnect()
if self._audio_in_task and (self._params.audio_in_enabled or self._params.vad_enabled):
self._audio_in_task.cancel()
await self._audio_in_task
await super().cancel(frame)
def vad_analyzer(self) -> VADAnalyzer | None:
return self._vad_analyzer
@@ -407,19 +398,13 @@ class LiveKitOutputTransport(BaseOutputTransport):
logger.info("LiveKitOutputTransport started")
async def stop(self, frame: EndFrame):
await super().stop(frame)
await self._client.disconnect()
await super().stop(frame)
logger.info("LiveKitOutputTransport stopped")
async def process_frame(self, frame: Frame, direction: FrameDirection):
if isinstance(frame, EndFrame):
await self.stop(frame)
else:
await super().process_frame(frame, direction)
async def cancel(self, frame: CancelFrame):
await super().cancel(frame)
await self._client.disconnect()
await super().cancel(frame)
async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame):
if isinstance(frame, (LiveKitTransportMessageFrame, LiveKitTransportMessageUrgentFrame)):
@@ -526,12 +511,6 @@ class LiveKitTransport(BaseTransport):
async def _on_disconnected(self):
await self._call_event_handler("on_disconnected")
# Attempt to reconnect
try:
await self._client.connect()
await self._call_event_handler("on_connected")
except Exception as e:
logger.error(f"Failed to reconnect: {e}")
async def _on_participant_connected(self, participant_id: str):
await self._call_event_handler("on_participant_connected", participant_id)