Compare commits

..

5 Commits

Author SHA1 Message Date
James Hush
a11fa05370 Save progress 2025-01-02 08:35:05 +08:00
James Hush
ed7b512f40 Change logic to launch 4 bots per min 2024-12-20 15:40:48 +08:00
James Hush
6377825cff Add way to specify phone number 2024-12-19 14:40:54 +08:00
James Hush
875ecf7fcb Add command line args
Fix
2024-12-19 14:40:50 +08:00
James Hush
8fb75cf6a7 test: batch dialout demo
Rename

Save changes

Batches of 50

No sort

Only write dialout false

Add rate limit handling

Save

Improvements

Save progress with recording

Fixed stop

Add backoff

Save logs

Make table nicer

Move to new folder
2024-12-19 14:40:47 +08:00
978 changed files with 20346 additions and 108457 deletions

View File

@@ -1,87 +0,0 @@
name: Bug report
description: Report a bug or unexpected behavior
type: Bug
body:
- type: markdown
attributes:
value: |
## Bug Report
Thank you for taking the time to fill out this bug report.
- type: markdown
attributes:
value: |
### Environment
- type: input
id: pipecat-version
attributes:
label: pipecat version
description: Which version are you using?
placeholder: e.g., 0.0.63
validations:
required: true
- type: input
id: python-version
attributes:
label: Python version
description: Which Python version are you using?
placeholder: e.g., 3.12.8
validations:
required: true
- type: input
id: os
attributes:
label: Operating System
description: Which OS are you using?
placeholder: e.g., Ubuntu 24.04, Windows 11, macOS 12.5
validations:
required: true
- type: textarea
id: description
attributes:
label: Issue description
description: Provide a clear description of the issue.
validations:
required: true
- type: textarea
id: repro
attributes:
label: Reproduction steps
description: List the steps to reproduce the issue.
placeholder: |
1. Do this...
2. Then do that...
3. Observe the error...
validations:
required: true
- type: textarea
id: expected
attributes:
label: Expected behavior
description: What did you expect to happen?
validations:
required: true
- type: textarea
id: actual
attributes:
label: Actual behavior
description: What actually happened?
validations:
required: true
- type: textarea
id: logs
attributes:
label: Logs
description: If applicable, include any relevant logs or error messages
render: shell
validations:
required: false

View File

@@ -1,67 +0,0 @@
name: Question
description: Ask a question or get help
type: Question
body:
- type: markdown
attributes:
value: |
## Question
Use this form to ask a question about pipecat.
- type: markdown
attributes:
value: |
### Environment (if applicable)
- type: input
id: pipecat-version
attributes:
label: pipecat version
description: Which version are you using? (if applicable)
placeholder: e.g., 0.0.63
validations:
required: false
- type: input
id: python-version
attributes:
label: Python version
description: Which Python version are you using? (if applicable)
placeholder: e.g., 3.12.8
validations:
required: false
- type: input
id: os
attributes:
label: Operating System
description: Which OS are you using? (if applicable)
placeholder: e.g., Ubuntu 24.04, Windows 11, macOS 12.5
validations:
required: false
- type: textarea
id: question
attributes:
label: Question
description: Provide your question in detail here.
validations:
required: true
- type: textarea
id: tried
attributes:
label: What I've tried
description: Describe what you've already tried or research you've done.
placeholder: I've looked at the documentation and tried...
validations:
required: false
- type: textarea
id: context
attributes:
label: Context
description: Any additional context or information that might help others understand your question better.
validations:
required: false

View File

@@ -1,52 +0,0 @@
name: Feature request
description: Suggest an enhancement or new feature
type: Enhancement
body:
- type: markdown
attributes:
value: |
## Feature Request
Thank you for suggesting an enhancement to pipecat.
- type: textarea
id: problem
attributes:
label: Problem Statement
description: A clear description of the problem this feature would solve.
placeholder: I'm always frustrated when...
validations:
required: true
- type: textarea
id: solution
attributes:
label: Proposed Solution
description: A clear and concise description of what you want to happen.
validations:
required: true
- type: textarea
id: alternatives
attributes:
label: Alternative Solutions
description: Any alternative solutions or features you've considered.
validations:
required: false
- type: textarea
id: context
attributes:
label: Additional Context
description: Add any other context, mockups, or screenshots about the feature request here.
placeholder: You can drag and drop images here to include them.
validations:
required: false
- type: checkboxes
id: contribution
attributes:
label: Would you be willing to help implement this feature?
options:
- label: Yes, I'd like to contribute
- label: No, I'm just suggesting

View File

@@ -1,82 +0,0 @@
name: Service Issue
description: An issue with a third-party service
type: Service Issue
body:
- type: markdown
attributes:
value: |
## Service Issue
Use this form to report an issue with a third-party service integration.
- type: input
id: pipecat-version
attributes:
label: pipecat version
description: Which version are you using?
placeholder: e.g., 0.0.63
validations:
required: true
- type: input
id: service-name
attributes:
label: Service Name
description: Which third-party service is having issues?
placeholder: e.g., OpenAI, ElevenLabs, Anthropic
validations:
required: true
- type: input
id: service-version
attributes:
label: Service or model version
description: Which version of the service API or model are you using?
placeholder: e.g., v1, gpt-4.1
validations:
required: false
- type: textarea
id: description
attributes:
label: Issue Description
description: Provide a clear description of the service issue.
validations:
required: true
- type: textarea
id: reproduction
attributes:
label: Reproduction Steps
description: Provide steps to reproduce the issue.
placeholder: |
1. Configure service X
2. Call method Y
3. See error Z
validations:
required: true
- type: textarea
id: expected
attributes:
label: Expected Behavior
description: What did you expect to happen?
validations:
required: true
- type: textarea
id: actual
attributes:
label: Actual Behavior
description: What actually happened?
validations:
required: true
- type: textarea
id: logs
attributes:
label: Error Logs
description: If available, include any error messages or logs.
render: shell
validations:
required: false

View File

@@ -1,56 +0,0 @@
name: New Service
description: Request to support a new third-party service
type: New Service
body:
- type: markdown
attributes:
value: |
## New Service Request
Use this form to request support for a new third-party service in pipecat.
- type: input
id: service-name
attributes:
label: Service Name
description: What is the name of the third-party service?
placeholder: e.g., NewAPI, SomeService
validations:
required: true
- type: input
id: service-website
attributes:
label: Service Website
description: Link to the service's website or documentation
placeholder: e.g., https://newapi.com
validations:
required: true
- type: textarea
id: service-description
attributes:
label: Service Description
description: Briefly describe what this service does and how it works.
validations:
required: true
- type: textarea
id: api-info
attributes:
label: API Information
description: If available, provide details about the service's API.
placeholder: |
- API documentation link
- Authentication method
- Key endpoints you'd like supported
validations:
required: false
- type: checkboxes
id: contribution
attributes:
label: Would you be willing to help implement this service?
options:
- label: Yes, I'd like to contribute
- label: No, I'm just suggesting

View File

@@ -1,74 +0,0 @@
name: Dependency Issue
description: An issue with a Pipecat dependency (not a third-party service)
type: Dependency Issue
body:
- type: markdown
attributes:
value: |
## Dependency Issue
Use this form to report an issue with a Pipecat dependency.
- type: input
id: pipecat-version
attributes:
label: pipecat version
description: Which version are you using?
placeholder: e.g., 0.0.63
validations:
required: true
- type: input
id: dependency-name
attributes:
label: Dependency Name
description: Which Pipecat dependency is causing the issue?
placeholder: e.g., openai, anthropic, fastapi
validations:
required: true
- type: input
id: dependency-version
attributes:
label: Dependency Version
description: Which version of the dependency are you using?
placeholder: e.g., 1.2.3
validations:
required: true
- type: textarea
id: description
attributes:
label: Issue Description
description: Provide a clear description of the dependency issue.
validations:
required: true
- type: textarea
id: impact
attributes:
label: Impact
description: How is this dependency issue affecting your usage of pipecat?
validations:
required: true
- type: textarea
id: reproduction
attributes:
label: Reproduction Steps
description: If applicable, provide steps to reproduce the issue.
placeholder: |
1. Install dependency X
2. Run command Y
3. See error Z
validations:
required: false
- type: textarea
id: logs
attributes:
label: Error Logs
description: If applicable, include any relevant error messages or logs.
render: shell
validations:
required: false

View File

@@ -1,70 +0,0 @@
name: Troubleshooting
description: Help with a specific use case
type: Troubleshooting
body:
- type: markdown
attributes:
value: |
## Troubleshooting Request
Use this form to get help with a specific use case or implementation.
- type: input
id: pipecat-version
attributes:
label: pipecat version
description: Which version are you using?
placeholder: e.g., 0.0.63
validations:
required: true
- type: input
id: python-version
attributes:
label: Python version
description: Which version of Python are you using?
placeholder: e.g., 3.12.8
validations:
required: true
- type: input
id: os
attributes:
label: Operating System
description: Which OS are you using?
placeholder: e.g., Ubuntu 24.04, Windows 11, macOS 12.5
validations:
required: true
- type: textarea
id: use-case
attributes:
label: Use Case Description
description: Describe what you're trying to accomplish with pipecat.
validations:
required: true
- type: textarea
id: current-approach
attributes:
label: Current Approach
description: What have you tried so far? Include code snippets if relevant.
render: python
validations:
required: true
- type: textarea
id: errors
attributes:
label: Errors or Unexpected Behavior
description: Describe any errors or unexpected behavior you're encountering.
validations:
required: true
- type: textarea
id: additional-context
attributes:
label: Additional Context
description: Any other information that might help us understand your situation.
validations:
required: false

View File

@@ -1 +0,0 @@
blank_issues_enabled: false

View File

@@ -1,48 +0,0 @@
name: android
on:
push:
branches:
- main
paths:
- "examples/simple-chatbot/client/android/**"
pull_request:
branches:
- "**"
paths:
- "examples/simple-chatbot/client/android/**"
workflow_dispatch:
inputs:
sdk_git_ref:
type: string
description: "Which git ref of the app to build"
concurrency:
group: build-android-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
sdk:
name: "Simple chatbot demo"
runs-on: ubuntu-latest
steps:
- name: Checkout repo
uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.sdk_git_ref || github.ref }}
- name: "Install Java"
uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: '17'
- name: Build demo app
working-directory: examples/simple-chatbot/client/android
run: ./gradlew :simple-chatbot-client:assembleDebug
- name: Upload demo APK
uses: actions/upload-artifact@v4
with:
name: Simple Chatbot Android Client
path: examples/simple-chatbot/client/android/simple-chatbot-client/build/outputs/apk/debug/simple-chatbot-client-debug.apk

View File

@@ -1,54 +0,0 @@
name: coverage
on:
workflow_dispatch:
push:
branches:
- main
pull_request:
branches:
- "**"
paths-ignore:
- "docs/**"
jobs:
coverage:
name: "Coverage"
runs-on: ubuntu-latest
steps:
- name: Checkout repo
uses: actions/checkout@v4
- name: Set up Python
id: setup_python
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Cache virtual environment
uses: actions/cache@v3
with:
# We are hashing dev-requirements.txt and test-requirements.txt which
# contain all dependencies needed to run the tests.
key: venv-${{ runner.os }}-${{ steps.setup_python.outputs.python-version}}-${{ hashFiles('dev-requirements.txt') }}-${{ hashFiles('test-requirements.txt') }}
path: .venv
- name: Install system packages
id: install_system_packages
run: |
sudo apt-get install -y portaudio19-dev
- name: Setup virtual environment
run: |
python -m venv .venv
- name: Install basic Python dependencies
run: |
source .venv/bin/activate
python -m pip install --upgrade pip
pip install -r dev-requirements.txt -r test-requirements.txt
- name: Run tests with coverage
run: |
source .venv/bin/activate
coverage run
coverage xml
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v5
with:
token: ${{ secrets.CODECOV_TOKEN }}
slug: pipecat-ai/pipecat

View File

@@ -1,4 +1,4 @@
name: tests
name: test
on:
workflow_dispatch:
@@ -49,4 +49,4 @@ jobs:
- name: Test with pytest
run: |
source .venv/bin/activate
pytest
pytest --ignore-glob="*to_be_updated*" --ignore-glob=*pipeline_source* src tests

21
.gitignore vendored
View File

@@ -7,7 +7,7 @@ venv
/.idea
#*#
# Distribution / Packaging
# Distribution / packaging
.Python
build/
develop-eggs/
@@ -30,23 +30,8 @@ MANIFEST
.env
fly.toml
# Examples
examples/telnyx-chatbot/templates/streams.xml
examples/twilio-chatbot/templates/streams.xml
examples/**/node_modules/
examples/**/.expo/
examples/**/dist/
examples/**/npm-debug.*
examples/**/*.jks
examples/**/*.p8
examples/**/*.p12
examples/**/*.key
examples/**/*.mobileprovision
examples/**/*.orig.*
examples/**/web-build/
# macOS
.DS_Store
# Example files
pipecat/examples/twilio-chatbot/templates/streams.xml
# Documentation
docs/api/_build/

View File

@@ -1,8 +0,0 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.9.7
hooks:
- id: ruff
language_version: python3
args: [ --select, I, ]
- id: ruff-format

File diff suppressed because it is too large Load Diff

View File

@@ -26,52 +26,11 @@ git commit -m "Description of your changes"
git push origin your-branch-name
```
8. **Submit a Pull Request (PR)**: Open a PR from your forked repository to the main branch of this repo.
> Important: Describe the changes you've made clearly!
9. **Submit a Pull Request (PR)**: Open a PR from your forked repository to the main branch of this repo.
> Important: Describe the changes you've made clearly!
Our maintainers will review your PR, and once everything is good, your contributions will be merged!
## Code Style and Documentation
### Python Code Style
We use Ruff for code linting and formatting. Please ensure your code passes all linting checks before submitting a PR.
### Docstring Conventions
We follow Google-style docstrings with these specific conventions:
- Class docstrings should fully document all parameters used in `__init__`
- We don't require separate docstrings for `__init__` methods when parameters are documented in the class docstring
- Property methods should have docstrings explaining their purpose and return value
Example of correctly documented class:
```python
class MyClass:
"""Class description.
Additional details about the class.
Args:
param1: Description of first parameter.
param2: Description of second parameter.
"""
def __init__(self, param1, param2):
# No docstring required here as parameters are documented above
self.param1 = param1
self.param2 = param2
@property
def some_property(self) -> str:
"""Get the formatted property value.
Returns:
A string representation of the property.
"""
return f"Property: {self.param1}"
```
# Contributor Covenant Code of Conduct
@@ -92,23 +51,23 @@ diverse, inclusive, and healthy community.
Examples of behavior that contributes to a positive environment for our
community include:
- Demonstrating empathy and kindness toward other people
- Being respectful of differing opinions, viewpoints, and experiences
- Giving and gracefully accepting constructive feedback
- Accepting responsibility and apologizing to those affected by our mistakes,
* Demonstrating empathy and kindness toward other people
* Being respectful of differing opinions, viewpoints, and experiences
* Giving and gracefully accepting constructive feedback
* Accepting responsibility and apologizing to those affected by our mistakes,
and learning from the experience
- Focusing on what is best not just for us as individuals, but for the overall
* Focusing on what is best not just for us as individuals, but for the overall
community
Examples of unacceptable behavior include:
- The use of sexualized language or imagery, and sexual attention or advances of
* The use of sexualized language or imagery, and sexual attention or advances of
any kind
- Trolling, insulting or derogatory comments, and personal or political attacks
- Public or private harassment
- Publishing others' private information, such as a physical or email address,
* Trolling, insulting or derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or email address,
without their explicit permission
- Other conduct which could reasonably be considered inappropriate in a
* Other conduct which could reasonably be considered inappropriate in a
professional setting
## Enforcement Responsibilities
@@ -203,4 +162,4 @@ For answers to common questions about this code of conduct, see the FAQ at
[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
[Mozilla CoC]: https://github.com/mozilla/diversity
[FAQ]: https://www.contributor-covenant.org/faq
[translations]: https://www.contributor-covenant.org/translations
[translations]: https://www.contributor-covenant.org/translations

View File

@@ -1,6 +1,6 @@
BSD 2-Clause License
Copyright (c) 20242025, Daily
Copyright (c) 2024, Daily
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

230
README.md
View File

@@ -1,72 +1,43 @@
<h1><div align="center">
<img alt="pipecat" width="300px" height="auto" src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/pipecat.png">
 <img alt="pipecat" width="300px" height="auto" src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/pipecat.png">
</div></h1>
[![PyPI](https://img.shields.io/pypi/v/pipecat-ai)](https://pypi.org/project/pipecat-ai) ![Tests](https://github.com/pipecat-ai/pipecat/actions/workflows/tests.yaml/badge.svg) [![codecov](https://codecov.io/gh/pipecat-ai/pipecat/graph/badge.svg?token=LNVUIVO4Y9)](https://codecov.io/gh/pipecat-ai/pipecat) [![Docs](https://img.shields.io/badge/Documentation-blue)](https://docs.pipecat.ai) [![Discord](https://img.shields.io/discord/1239284677165056021)](https://discord.gg/pipecat)
[![PyPI](https://img.shields.io/pypi/v/pipecat-ai)](https://pypi.org/project/pipecat-ai) [![Docs](https://img.shields.io/badge/Documentation-blue)](https://docs.pipecat.ai) [![Discord](https://img.shields.io/discord/1239284677165056021)](https://discord.gg/pipecat) <a href="https://app.commanddash.io/agent/github_pipecat-ai_pipecat"><img src="https://img.shields.io/badge/AI-Code%20Agent-EB9FDA"></a>
# 🎙️ Pipecat: Real-Time Voice & Multimodal AI Agents
Pipecat is an open source Python framework for building voice and multimodal conversational agents. It handles the complex orchestration of AI services, network transport, audio processing, and multimodal interactions, letting you focus on creating engaging experiences.
**Pipecat** is an open-source Python framework for building real-time voice and multimodal conversational agents. Orchestrate audio and video, AI services, different transports, and conversation pipelines effortlessly—so you can focus on what makes your agent unique.
## What you can build
## 🚀 What You Can Build
- **Voice Assistants**: [Natural, real-time conversations with AI](https://demo.dailybots.ai/)
- **Interactive Agents**: Personal coaches and meeting assistants
- **Multimodal Apps**: Combine voice, video, images, and text
- **Creative Tools**: [Story-telling experiences](https://storytelling-chatbot.fly.dev/) and social companions
- **Business Solutions**: [Customer intake flows](https://www.youtube.com/watch?v=lDevgsp9vn0) and support bots
- **Complex conversational flows**: [Refer to Pipecat Flows](https://github.com/pipecat-ai/pipecat-flows) to learn more
- **Voice Assistants** natural, streaming conversations with AI
- **AI Companions** coaches, meeting assistants, characters
- **Multimodal Interfaces** voice, video, images, and more
- **Interactive Storytelling** creative tools with generative media
- **Business Agents** customer intake, support bots, guided flows
- **Complex Dialog Systems** design logic with structured conversations
🧭 Looking to build structured conversations? Check out [Pipecat Flows](https://github.com/pipecat-ai/pipecat-flows) for managing complex conversational states and transitions.
## 🧠 Why Pipecat?
- **Voice-first**: Integrates speech recognition, text-to-speech, and conversation handling
- **Pluggable**: Supports many AI services and tools
- **Composable Pipelines**: Build complex behavior from modular components
- **Real-Time**: Ultra-low latency interaction with different transports (e.g. WebSockets or WebRTC)
## 🎬 See it in action
## See it in action
<p float="left">
<a href="https://github.com/pipecat-ai/pipecat/tree/main/examples/simple-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/examples/simple-chatbot/image.png" width="400" /></a>&nbsp;
<a href="https://github.com/pipecat-ai/pipecat/tree/main/examples/storytelling-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/examples/storytelling-chatbot/image.png" width="400" /></a>
<a href="https://github.com/pipecat-ai/pipecat/tree/main/examples/simple-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/examples/simple-chatbot/image.png" width="280" /></a>&nbsp;
<a href="https://github.com/pipecat-ai/pipecat/tree/main/examples/storytelling-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/examples/storytelling-chatbot/image.png" width="280" /></a>
<br/>
<a href="https://github.com/pipecat-ai/pipecat/tree/main/examples/translation-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/examples/translation-chatbot/image.png" width="400" /></a>&nbsp;
<a href="https://github.com/pipecat-ai/pipecat/tree/main/examples/moondream-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/examples/moondream-chatbot/image.png" width="400" /></a>
<a href="https://github.com/pipecat-ai/pipecat/tree/main/examples/translation-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/examples/translation-chatbot/image.png" width="280" /></a>&nbsp;
<a href="https://github.com/pipecat-ai/pipecat/tree/main/examples/moondream-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/examples/moondream-chatbot/image.png" width="280" /></a>
</p>
## 📱 Client SDKs
## Key features
You can connect to Pipecat from any platform using our official SDKs:
- **Voice-first Design**: Built-in speech recognition, TTS, and conversation handling
- **Flexible Integration**: Works with popular AI services (OpenAI, ElevenLabs, etc.)
- **Pipeline Architecture**: Build complex apps from simple, reusable components
- **Real-time Processing**: Frame-based pipeline architecture for fluid interactions
- **Production Ready**: Enterprise-grade WebRTC and Websocket support
| Platform | SDK Repo | Description |
| -------- | ------------------------------------------------------------------------------ | -------------------------------- |
| Web | [pipecat-client-web](https://github.com/pipecat-ai/pipecat-client-web) | JavaScript and React client SDKs |
| iOS | [pipecat-client-ios](https://github.com/pipecat-ai/pipecat-client-ios) | Swift SDK for iOS |
| Android | [pipecat-client-android](https://github.com/pipecat-ai/pipecat-client-android) | Kotlin SDK for Android |
| C++ | [pipecat-client-cxx](https://github.com/pipecat-ai/pipecat-client-cxx) | C++ client SDK |
💡 Looking to build structured conversations? Check out [Pipecat Flows](https://github.com/pipecat-ai/pipecat-flows) for managing complex conversational states and transitions.
## 🧩 Available services
## Getting started
| Category | Services |
|---------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Parakeet (NVIDIA)](https://docs.pipecat.ai/server/services/stt/parakeet), [Ultravox](https://docs.pipecat.ai/server/services/stt/ultravox), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
| Text-to-Speech | [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [FastPitch (NVIDIA)](https://docs.pipecat.ai/server/services/tts/fastpitch), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
| Speech-to-Speech | [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) |
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local |
| Video | [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/fal), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) |
| Analytics & Metrics | [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
📚 [View full services documentation →](https://docs.pipecat.ai/server/services/supported-services)
## ⚡ Getting started
You can get started with Pipecat running on your local machine, then move your agent processes to the cloud when youre ready.
You can get started with Pipecat running on your local machine, then move your agent processes to the cloud when youre ready. You can also add a 📞 telephone number, 🖼️ image output, 📺 video input, use different LLMs, and more.
```shell
# Install the module
@@ -82,71 +53,141 @@ To keep things lightweight, only the core framework is included by default. If y
pip install "pipecat-ai[option,...]"
```
## 🧪 Code examples
Available options include:
| Category | Services | Install Command Example |
| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------- |
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) | `pip install "pipecat-ai[deepgram]"` |
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [Together AI](https://docs.pipecat.ai/server/services/llm/together) | `pip install "pipecat-ai[openai]"` |
| Text-to-Speech | [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Google](https://docs.pipecat.ai/server/services/tts/google), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) | `pip install "pipecat-ai[cartesia]"` |
| Speech-to-Speech | [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) | `pip install "pipecat-ai[openai]"` |
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), WebSocket, Local | `pip install "pipecat-ai[daily]"` |
| Video | [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) | `pip install "pipecat-ai[tavus,simli]"` |
| Vision & Image | [Moondream](https://docs.pipecat.ai/server/services/vision/moondream), [fal](https://docs.pipecat.ai/server/services/image-generation/fal) | `pip install "pipecat-ai[moondream]"` |
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) | `pip install "pipecat-ai[silero]"` |
| Analytics & Metrics | [Canonical AI](https://docs.pipecat.ai/server/services/analytics/canonical), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) | `pip install "pipecat-ai[canonical]"` |
📚 [View full services documentation →](https://docs.pipecat.ai/server/services/supported-services)
## Code examples
- [Foundational](https://github.com/pipecat-ai/pipecat/tree/main/examples/foundational) — small snippets that build on each other, introducing one or two concepts at a time
- [Example apps](https://github.com/pipecat-ai/pipecat/tree/main/examples/) — complete applications that you can use as starting points for development
## 🛠️ Hacking on the framework itself
## A simple voice agent running locally
1. Set up a virtual environment before following these instructions. From the root of the repo:
Here is a very basic Pipecat bot that greets a user when they join a real-time session. We'll use [Daily](https://daily.co) for real-time media transport, and [Cartesia](https://cartesia.ai/) for text-to-speech.
```shell
python3 -m venv venv
source venv/bin/activate
```
```python
import asyncio
2. Install the development dependencies:
from pipecat.frames.frames import EndFrame, TextFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.task import PipelineTask
from pipecat.pipeline.runner import PipelineRunner
from pipecat.services.cartesia import CartesiaTTSService
from pipecat.transports.services.daily import DailyParams, DailyTransport
```shell
pip install -r dev-requirements.txt
```
async def main():
# Use Daily as a real-time media transport (WebRTC)
transport = DailyTransport(
room_url=...,
token="", # leave empty. Note: token is _not_ your api key
bot_name="Bot Name",
params=DailyParams(audio_out_enabled=True))
3. Install the git pre-commit hooks (these help ensure your code follows project rules):
# Use Cartesia for Text-to-Speech
tts = CartesiaTTSService(
api_key=...,
voice_id=...
)
```shell
pre-commit install
```
# Simple pipeline that will process text to speech and output the result
pipeline = Pipeline([tts, transport.output()])
4. Install the `pipecat-ai` package locally in editable mode:
# Create Pipecat processor that can run one or more pipelines tasks
runner = PipelineRunner()
```shell
pip install -e .
```
# Assign the task callable to run the pipeline
task = PipelineTask(pipeline)
> The `-e` or `--editable` option allows you to modify the code without reinstalling.
# Register an event handler to play audio when a
# participant joins the transport WebRTC session
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
participant_name = participant.get("info", {}).get("userName", "")
# Queue a TextFrame that will get spoken by the TTS service (Cartesia)
await task.queue_frame(TextFrame(f"Hello there, {participant_name}!"))
5. Include optional dependencies as needed. For example:
# Register an event handler to exit the application when the user leaves.
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
```shell
pip install -e ".[daily,deepgram,cartesia,openai,silero]"
```
# Run the pipeline task
await runner.run(task)
6. (Optional) If you want to use this package from another directory:
if __name__ == "__main__":
asyncio.run(main())
```
```shell
pip install "path_to_this_repo[option,...]"
```
### Running tests
Install the test dependencies:
Run it with:
```shell
pip install -r test-requirements.txt
python app.py
```
Daily provides a prebuilt WebRTC user interface. While the app is running, you can visit at `https://<yourdomain>.daily.co/<room_url>` and listen to the bot say hello!
## WebRTC for production use
WebSockets are fine for server-to-server communication or for initial development. But for production use, youll need client-server audio to use a protocol designed for real-time media transport. (For an explanation of the difference between WebSockets and WebRTC, see [this post.](https://www.daily.co/blog/how-to-talk-to-an-llm-with-your-voice/#webrtc))
One way to get up and running quickly with WebRTC is to sign up for a Daily developer account. Daily gives you SDKs and global infrastructure for audio (and video) routing. Every account gets 10,000 audio/video/transcription minutes free each month.
Sign up [here](https://dashboard.daily.co/u/signup) and [create a room](https://docs.daily.co/reference/rest-api/rooms) in the developer Dashboard.
## Hacking on the framework itself
_Note that you may need to set up a virtual environment before following the instructions below. For instance, you might need to run the following from the root of the repo:_
```shell
python3 -m venv venv
source venv/bin/activate
```
From the root of this repo, run the following:
```shell
pip install -r dev-requirements.txt
python -m build
```
This builds the package. To use the package locally (e.g. to run sample files), run
```shell
pip install --editable ".[option,...]"
```
If you want to use this package from another directory, you can run:
```shell
pip install "path_to_this_repo[option,...]"
```
### Running tests
From the root directory, run:
```shell
pytest
pytest --doctest-modules --ignore-glob="*to_be_updated*" --ignore-glob=*pipeline_source* src tests
```
### Setting up your editor
## Setting up your editor
This project uses strict [PEP 8](https://peps.python.org/pep-0008/) formatting via [Ruff](https://github.com/astral-sh/ruff).
#### Emacs
### Emacs
You can use [use-package](https://github.com/jwiegley/use-package) to install [emacs-lazy-ruff](https://github.com/christophermadsen/emacs-lazy-ruff) package and configure `ruff` arguments:
@@ -168,7 +209,7 @@ You can use [use-package](https://github.com/jwiegley/use-package) to install [e
:hook ((python-mode . pyvenv-auto-run)))
```
#### Visual Studio Code
### Visual Studio Code
Install the
[Ruff](https://marketplace.visualstudio.com/items?itemName=charliermarsh.ruff) extension. Then edit the user settings (_Ctrl-Shift-P_ `Open User Settings (JSON)`) and set it as the default Python formatter, and enable formatting on save:
@@ -180,17 +221,16 @@ Install the
}
```
#### PyCharm
`ruff` was installed in the `venv` environment described before, now to enable autoformatting on save, go to `File` -> `Settings` -> `Tools` -> `File Watchers` and add a new watcher with the following settings:
### PyCharm
`ruff` was installed in the `venv` environment described before, now to enable autoformatting on save, go to `File` -> `Settings` -> `Tools` -> `File Watchers` and add a new watcher with the following settings:
1. **Name**: `Ruff formatter`
2. **File type**: `Python`
3. **Working directory**: `$ContentRoot$`
4. **Arguments**: `format $FilePath$`
5. **Program**: `$PyInterpreterDirectory$/ruff`
## 🤝 Contributing
## Contributing
We welcome contributions from the community! Whether you're fixing bugs, improving documentation, or adding new features, here's how you can help:
@@ -203,7 +243,7 @@ Before submitting a pull request, please check existing issues and PRs to avoid
We aim to review all contributions promptly and provide constructive feedback to help get your changes merged.
## 🛟 Getting help
## Getting help
➡️ [Join our Discord](https://discord.gg/pipecat)

View File

@@ -1,11 +0,0 @@
coverage:
range: 50..90 # coverage lower than 50 is red, higher than 90 green, between color code
status:
project:
default:
target: auto # auto % coverage target
threshold: 5% # allow for 5% reduction of coverage without failing
# do not run coverage on patch nor changes
patch: false

View File

@@ -1,13 +1,9 @@
build~=1.2.2
coverage~=7.6.12
grpcio-tools~=1.67.1
grpcio-tools~=1.68.1
pip-tools~=7.4.1
pre-commit~=4.0.1
pyright~=1.1.397
pyright~=1.1.390
pytest~=8.3.4
pytest-asyncio~=0.25.3
pytest-aiohttp==1.1.0
ruff~=0.11.1
setuptools~=70.0.0
ruff~=0.8.3
setuptools~=75.6.0
setuptools_scm~=8.1.0
python-dotenv~=1.0.1

22
docs/ISSUE_TEMPLATE.md Normal file
View File

@@ -0,0 +1,22 @@
# Description
Is this reporting a bug or feature request?
If reporting a bug, please fill out the following:
### Environment
- pipecat-ai version:
- python version:
- OS:
### Issue description
Provide a clear description of the issue.
### Repro steps
List the steps to reproduce the issue.
### Expected behavior
### Actual behavior
### Logs

View File

@@ -50,13 +50,6 @@ autodoc_mock_imports = [
"pyht.protos",
"pyht.protos.api_pb2",
"pipecat_ai_playht", # PlayHT wrapper
"aiortc",
"aiortc.mediastreams",
"cv2",
"av",
"pyneuphonic",
"mem0",
"mlx_whisper",
"anthropic",
"assemblyai",
"boto3",
@@ -75,6 +68,7 @@ autodoc_mock_imports = [
"openpipe",
"simli",
"soundfile",
# Existing mocks
"pipecat_ai_krisp",
"pyaudio",
"_tkinter",
@@ -85,66 +79,6 @@ autodoc_mock_imports = [
"pydantic.Field",
"pydantic._internal._model_construction",
"pydantic._internal._fields",
# Moondream dependencies
"torch",
"transformers",
"intel_extension_for_pytorch",
# Ultravox dependencies
"huggingface_hub",
"vllm",
"vllm.engine.arg_utils",
"transformers.AutoTokenizer",
# Langchain dependencies
"langchain_core",
"langchain_core.messages",
"langchain_core.runnables",
"langchain_core.messages.AIMessageChunk",
"langchain_core.runnables.Runnable",
# LiveKit dependencies
"livekit",
"livekit.rtc",
"livekit_api",
"livekit_protocol",
"tenacity",
"tenacity.retry",
"tenacity.stop_after_attempt",
"tenacity.wait_exponential",
"rtc",
"rtc.Room",
"rtc.RoomOptions",
"rtc.AudioSource",
"rtc.LocalAudioTrack",
"rtc.TrackPublishOptions",
"rtc.TrackSource",
"rtc.AudioStream",
"rtc.AudioFrameEvent",
"rtc.AudioFrame",
"rtc.Track",
"rtc.TrackKind",
"rtc.RemoteParticipant",
"rtc.RemoteTrackPublication",
"rtc.DataPacket",
# Riva dependencies
"riva",
"riva.client",
"riva.client.Auth",
"riva.client.ASRService",
"riva.client.StreamingRecognitionConfig",
"riva.client.RecognitionConfig",
"riva.client.AudioEncoding",
"riva.client.proto.riva_tts_pb2",
"riva.client.SpeechSynthesisService",
# Local CoreML Smart Turn dependencies
"coremltools",
"coremltools.models",
"coremltools.models.MLModel",
"torch",
"torch.nn",
"torch.nn.functional",
"transformers",
"transformers.AutoFeatureExtractor",
# Also add specific classes that are imported
"AutoFeatureExtractor",
]
# HTML output settings
@@ -176,25 +110,12 @@ def verify_modules():
},
}
# Skip importing modules that are in autodoc_mock_imports
skipped_modules = set(autodoc_mock_imports)
missing = []
for category, modules in required_modules.items():
if isinstance(modules, dict):
# Handle nested structure
for subcategory, submodules in modules.items():
for module in submodules:
# Check if module is in autodoc_mock_imports
if (
f"pipecat.{category}.{subcategory}.{module}" in skipped_modules
or module in skipped_modules
):
logger.info(
f"Skipping import of mocked module: pipecat.{category}.{subcategory}.{module}"
)
continue
try:
__import__(f"pipecat.{category}.{subcategory}.{module}")
logger.info(
@@ -208,11 +129,6 @@ def verify_modules():
else:
# Handle flat structure
for module in modules:
# Check if module is in autodoc_mock_imports
if f"pipecat.{category}.{module}" in skipped_modules or module in skipped_modules:
logger.info(f"Skipping import of mocked module: pipecat.{category}.{module}")
continue
try:
__import__(f"pipecat.{category}.{module}")
logger.info(f"Successfully imported pipecat.{category}.{module}")

View File

@@ -45,10 +45,8 @@ Transport & Serialization
Utilities
~~~~~~~~~
* :mod:`Adapters <pipecat.adapters>`
* :mod:`Clocks <pipecat.clocks>`
* :mod:`Metrics <pipecat.metrics>`
* :mod:`Observers <pipecat.observers>`
* :mod:`Sync <pipecat.sync>`
* :mod:`Transcriptions <pipecat.transcriptions>`
* :mod:`Utils <pipecat.utils>`
@@ -58,12 +56,10 @@ Utilities
:caption: API Reference
:hidden:
Adapters <api/pipecat.adapters>
Audio <api/pipecat.audio>
Clocks <api/pipecat.clocks>
Frames <api/pipecat.frames>
Metrics <api/pipecat.metrics>
Observers <api/pipecat.observers>
Pipeline <api/pipecat.pipeline>
Processors <api/pipecat.processors>
Serializers <api/pipecat.serializers>

View File

@@ -10,44 +10,31 @@ pipecat-ai[anthropic]
pipecat-ai[assemblyai]
pipecat-ai[aws]
pipecat-ai[azure]
pipecat-ai[canonical]
pipecat-ai[cartesia]
pipecat-ai[cerebras]
pipecat-ai[deepseek]
pipecat-ai[daily]
pipecat-ai[deepgram]
pipecat-ai[elevenlabs]
pipecat-ai[fal]
pipecat-ai[fireworks]
pipecat-ai[fish]
pipecat-ai[gladia]
pipecat-ai[google]
pipecat-ai[grok]
pipecat-ai[groq]
# pipecat-ai[krisp] # Mocked
pipecat-ai[koala]
# pipecat-ai[langchain] # Mocked
# pipecat-ai[livekit] # Mocked
# pipecat-ai[krisp] # Mocked instead
pipecat-ai[langchain]
pipecat-ai[livekit]
pipecat-ai[lmnt]
pipecat-ai[local]
# pipecat-ai[local-smart-turn] # Mocked
# pipecat-ai[mem0] # Mocked
# pipecat-ai[mlx-whisper] # Mocked
# pipecat-ai[moondream] # Mocked
pipecat-ai[moondream]
pipecat-ai[nim]
# pipecat-ai[neuphonic] # Mocked
pipecat-ai[noisereduce]
pipecat-ai[openai]
# pipecat-ai[openpipe]
# pipecat-ai[playht] # Mocked due to grpcio conflict with riva
pipecat-ai[qwen]
pipecat-ai[remote-smart-turn]
# pipecat-ai[riva] # Mocked
pipecat-ai[riva]
pipecat-ai[silero]
pipecat-ai[simli]
pipecat-ai[soundfile]
pipecat-ai[tavus]
pipecat-ai[together]
# pipecat-ai[ultravox] # Mocked
# pipecat-ai[webrtc] # Mocked
pipecat-ai[websocket]
pipecat-ai[whisper]

View File

@@ -18,9 +18,6 @@ AZURE_DALLE_API_KEY=...
AZURE_DALLE_ENDPOINT=https://...
AZURE_DALLE_MODEL=...
# Cartesia
CARTESIA_API_KEY=...
# Daily
DAILY_API_KEY=...
DAILY_SAMPLE_ROOM_URL=https://...
@@ -29,9 +26,6 @@ DAILY_SAMPLE_ROOM_URL=https://...
ELEVENLABS_API_KEY=...
ELEVENLABS_VOICE_ID=...
# Neuphonic
NEUPHONIC_API_KEY=...
# Fal
FAL_KEY=...
@@ -66,38 +60,3 @@ SIMLI_FACE_ID=...
# Krisp
KRISP_MODEL_PATH=...
# DeepSeek
DEEPSEEK_API_KEY=...
# Groq
GROQ_API_KEY=...
# Grok
GROK_API_KEY=...
# Together.ai
TOGETHER_API_KEY=...
# Cerebras
CEREBRAS_API_KEY=...
# Fish Audio
FISH_API_KEY=...
# Assembly AI
ASSEMBLYAI_API_KEY=...
# OpenRouter
OPENROUTER_API_KEY=...
# Piper
PIPER_BASE_URL=...
# Smart turn
LOCAL_SMART_TURN_MODEL_PATH=
FAL_SMART_TURN_API_KEY=...
# Twilio
TWILIO_ACCOUNT_SID=
TWILIO_AUTH_TOKEN=

View File

@@ -39,10 +39,10 @@ Next, follow the steps in the README for each demo.
| [Translation Chatbot](translation-chatbot) | Listens for user speech, then translates that speech to Spanish and speaks the translation back. Demonstrates multi-participant use-cases. | Deepgram, Azure, OpenAI, Daily, Daily Prebuilt UI |
| [Moondream Chatbot](moondream-chatbot) | Demonstrates how to add vision capabilities to GPT4. **Note: works best with a GPU** | Deepgram, ElevenLabs, OpenAI, Moondream, Daily, Daily Prebuilt UI |
| [Patient intake](patient-intake) | A chatbot that can call functions in response to user input. | Deepgram, ElevenLabs, OpenAI, Daily, Daily Prebuilt UI |
| [Phone Chatbot](phone-chatbot) | A chatbot that connects to PSTN/SIP phone calls, powered by Daily or Twilio. | Deepgram, ElevenLabs, OpenAI, Daily, Twilio |
| [Dialin Chatbot](dialin-chatbot) | A chatbot that connects to an incoming phone call from Daily or Twilio. | Deepgram, ElevenLabs, OpenAI, Daily, Twilio |
| [Twilio Chatbot](twilio-chatbot) | A chatbot that connects to an incoming phone call from Twilio. | Deepgram, ElevenLabs, OpenAI, Daily, Twilio |
| [studypal](studypal) | A chatbot to have a conversation about any article on the web | |
| [WebSocket Chatbot Server](websocket-server) | A real-time websocket server that handles audio streaming and bot interactions with speech-to-text and text-to-speech capabilities. | Cartesia, Deepgram, OpenAI, Websockets |
| [WebSocket Chatbot Server](websocket-server) | A real-time websocket server that handles audio streaming and bot interactions with speech-to-text and text-to-speech capabilities | `python-websockets`, `openai`, `deepgram`, `silero-tts`, `numpy` |
> [!IMPORTANT]
> These example projects use Daily as a WebRTC transport and can be joined using their hosted Prebuilt UI.

View File

@@ -0,0 +1,21 @@
import json
with open("logs.json", "r") as file:
logs = json.load(file)
# Find IDs with duration less than 245 seconds
ids_with_short_duration = [
[entry["id"], entry["mtgSessionId"], entry["duration"]]
for entry in logs["data"]
if entry["duration"] < 245
]
print("\n=== Entries with Duration < 245 seconds ===")
print(f"Total entries found: {len(ids_with_short_duration)} / 144")
print("-" * 100)
print(f"{'Recording ID':36} | {'Session ID':36} | {'Duration':10}")
print("-" * 100)
for rec_id, session_id, duration in ids_with_short_duration:
print(f"{rec_id:20} | {session_id:20} | {duration:8} seconds")
print("-" * 100 + "\n")

View File

@@ -0,0 +1,217 @@
import argparse
import asyncio
import os
import sys
import aiohttp
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame, LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.cartesia import CartesiaTTSService
from pipecat.services.deepgram import DeepgramTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
logger.remove(0)
logger.add(sys.stderr, level="INFO")
daily_api_key = os.getenv("DAILY_API_KEY", "")
daily_api_url = os.getenv("DAILY_API_URL", "https://api.daily.co/v1")
class DialoutBot:
def __init__(self, room_url: str, token: str, callId: int, run_number: int, phone_number: str):
self.recording_id = None
self.room_url = room_url
self.token = token
self.callId = callId
self.run_number = run_number
self.phone_number = phone_number
async def run(self):
transport = DailyTransport(
self.room_url,
self.token,
"Chatbot",
DailyParams(
api_url=daily_api_url,
api_key=daily_api_key,
audio_in_enabled=True,
audio_out_enabled=True,
camera_out_enabled=False,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
transcription_enabled=True,
),
)
tts = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
)
tts = DeepgramTTSService(
api_key=os.getenv("DEEPGRAM_API_KEY"),
voice="aura-helios-en",
)
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
messages = [
{
"role": "system",
"content": "You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by saying 'Oh, hello! Who dares dial me at this hour?!'.",
},
]
context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)
pipeline = Pipeline(
[
transport.input(),
context_aggregator.user(),
llm,
tts,
transport.output(),
context_aggregator.assistant(),
]
)
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
def get_phone_number(callId: int, run_number: int) -> str:
# if self.phone_number:
# return self.phone_number
if run_number % 2 == 0:
phone_numbers = [
"+12097135124", # James
"+12097135125", # James
self.phone_number,
# "+19499870006", # Varun
]
return phone_numbers[callId % len(phone_numbers)]
else:
phone_numbers = [
"+14155204406", # James
"+18187229086", # James (Avoca)
self.phone_number,
# "+16673870006", # Varun
]
return phone_numbers[callId % len(phone_numbers)]
@transport.event_handler("on_call_state_updated")
async def on_call_state_updated(transport, state):
logger.info(f"on_call_state_updated, state: {state}")
# dialout_id = None
if state == "joined":
logger.info(f"on_call_state_updated {state}")
backoff_time = 1 # Initial backoff time in seconds
for _ in range(5):
try:
phone_number = get_phone_number(self.callId, self.run_number)
logger.info(f"Starting dialout to {phone_number}")
settings = {
"phoneNumber": phone_number,
"display_name": "Dialout User",
}
await transport.start_dialout(settings)
break # Break out of the loop if start_dialout is successful
except Exception as e:
logger.error(f"Error starting dialout: {e}")
await asyncio.sleep(backoff_time) # Wait for the current backoff time
backoff_time *= 2 # Double the backoff time for the next attempt
if state == "left":
logger.info(f"on_call_state_updated {state}")
# await transport.stop_dialout(dialout_id)
async with aiohttp.ClientSession() as aiohttp_session:
print(f"Deleting room: {self.room_url}")
rest = DailyRESTHelper(
daily_api_key=os.getenv("DAILY_API_KEY", ""),
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
aiohttp_session=aiohttp_session,
)
await rest.delete_room_by_url(self.room_url)
# @transport.event_handler("on_first_participant_joined")
# async def on_first_participant_joined(transport, participant):
# await transport.capture_participant_transcription(participant["id"])
# await task.queue_frames([LLMMessagesFrame(messages)])
@transport.event_handler("on_dialout_answered")
async def on_dialout_answered(transport, participant):
logger.info(f"on_dialout_answered {participant["participantId"]}")
streaming_settings = {
"minIdleTimeOut": 10,
"layout": {
"preset": "audio-only",
},
}
backoff_time = 1 # Initial backoff time in seconds
for _ in range(5):
try:
await transport.start_recording(streaming_settings=streaming_settings)
break # Break out of the loop if start_dialout is successful
except Exception as e:
logger.error(f"Error starting recording: {e}")
await asyncio.sleep(backoff_time) # Wait for the current backoff time
backoff_time *= 2 # Double the backoff time for the next attempt
await transport.capture_participant_transcription(participant["participantId"])
await task.queue_frames([LLMMessagesFrame(messages)])
@transport.event_handler("on_recording_started")
async def on_recording_started(transport, stream_id):
self.recording_id = stream_id["streamId"]
logger.info(f"Recording started: {self.recording_id}")
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
logger.info(f"Participant left: {participant}, reason: {reason}")
logger.info(f"Stopping recording: {self.recording_id}")
await transport.stop_recording(self.recording_id)
await task.queue_frame(EndFrame())
@transport.event_handler("on_recording_error")
async def on_recording_error(transport, error):
logger.error(f"Recording error: {error}")
await task.queue_frame(EndFrame())
@transport.event_handler("on_dialout_error")
async def on_dialout_error(transport, error):
logger.error(f"Dialout error: {error}")
await task.queue_frame(EndFrame())
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Pipecat Simple ChatBot")
parser.add_argument("-u", type=str, help="Room URL")
parser.add_argument("-t", type=str, help="Token")
parser.add_argument("-i", type=str, help="Call ID")
parser.add_argument("-r", type=str, help="Run Number")
parser.add_argument("-p", type=str, help="Phone Number")
config = parser.parse_args()
bot = DialoutBot(config.u, config.t, int(config.i), int(config.r), config.p)
try:
asyncio.run(bot.run())
except Exception as e:
logger.error(f"++++++++++++++ Error: {e}")
sys.exit(1)

View File

@@ -0,0 +1,109 @@
import argparse
import asyncio
import csv
import os
import subprocess
import time
from datetime import datetime
import aiohttp
from pipecat.transports.services.helpers.daily_rest import (
DailyRESTHelper,
DailyRoomParams,
DailyRoomProperties,
)
async def run_bot(id: int, run_number: int, bot_run_time: int, phone_number: str, csv_writer):
async with aiohttp.ClientSession() as aiohttp_session:
print(f"Starting bot number: {id}")
rest = DailyRESTHelper(
daily_api_key=os.getenv("DAILY_API_KEY", ""),
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
aiohttp_session=aiohttp_session,
)
# Create daily.co room with dialin and dialout enabled
exp = time.time() + bot_run_time + 600 + 60
room_params = DailyRoomParams(
properties=DailyRoomProperties(
exp=exp,
enable_dialout=True,
eject_at_room_exp=True,
enable_recording="cloud",
)
)
try:
# Create the room with the specified parameters
room = await rest.create_room(room_params)
# Create token with owner permissions
token = await rest.get_token(
room_url=room.url,
expiry_time=60 * 60,
owner=True, # Ensure the token has owner permissions
)
# print(f"{id}: Room Token: {token}")
room_info = await rest.get_room_from_url(room.url)
# print(f"{id}: Room Info: {room_info}")
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
# Other party joined or not and start dialout joined
csv_writer.writerow([id, room_info.config.enable_dialout, current_time])
except Exception as e:
print(f"Error creating room for bot {id}: {e}")
print("Sleeping for 10 seconds")
await asyncio.sleep(10)
csv_writer.writerow(
[id, "Rate Limit Error", datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]]
)
await asyncio.sleep(15)
bot_proc = f"python3 -m batch_dialout_bot -u {room.url} -t {token} -i {id} -r {run_number} -p {phone_number}"
try:
subprocess.Popen(
[bot_proc], shell=True, bufsize=1, cwd=os.path.dirname(os.path.abspath(__file__))
)
except Exception as e:
print(f"Failed to start subprocess: {e}")
async def main():
parser = argparse.ArgumentParser(description="Pipecat Simple Dialout Bot")
parser.add_argument("-b", type=int, help="Number of bots per run")
parser.add_argument("-r", type=int, help="Number of runs")
parser.add_argument("-t", type=int, help="Time to run the bot in seconds")
parser.add_argument("-p", type=str, help="Phone Number")
config = parser.parse_args()
number_of_batches = int(config.r)
number_of_bots = int(config.b)
bot_run_time = int(config.t)
phone_number = config.p
# Open the CSV file in append mode
with open("output.csv", mode="w", newline="") as file:
csv_writer = csv.writer(file)
# Write the header row
csv_writer.writerow(["bot_id", "enable_dialout", "timestamp"])
for run_number in range(number_of_batches):
print(f"-- Starting batch run number: {run_number} of {number_of_batches}")
bots = [
run_bot(i, run_number, bot_run_time, phone_number, csv_writer)
for i in range(number_of_bots)
]
print(f"-- Number of bots: {len(bots)}")
await asyncio.gather(*bots)
print(f"-- Waiting {bot_run_time} seconds...")
await asyncio.sleep(bot_run_time)
print(f"-- Finished waiting {bot_run_time} seconds...")
if __name__ == "__main__":
try:
asyncio.run(main())
except KeyboardInterrupt:
print("Parent process interrupted")

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,41 @@
bot_id,enable_dialout,timestamp
0,True,2024-12-20 17:18:47.197
1,True,2024-12-20 17:18:47.247
0,True,2024-12-20 17:20:03.366
1,True,2024-12-20 17:20:03.392
1,True,2024-12-20 17:21:19.477
0,True,2024-12-20 17:21:19.514
0,True,2024-12-20 17:22:35.613
1,True,2024-12-20 17:22:35.637
3,True,2024-12-20 17:10:15.329
2,True,2024-12-20 17:10:15.356
1,True,2024-12-20 17:10:15.359
0,True,2024-12-20 17:10:15.389
3,True,2024-12-20 17:11:31.428
0,True,2024-12-20 17:11:31.469
2,True,2024-12-20 17:11:31.511
1,True,2024-12-20 17:11:31.670
0,True,2024-12-20 17:12:47.680
3,True,2024-12-20 17:12:47.753
1,True,2024-12-20 17:12:47.770
2,True,2024-12-20 17:12:47.779
3,True,2024-12-20 17:14:03.844
1,True,2024-12-20 17:14:03.847
2,True,2024-12-20 17:14:03.883
0,True,2024-12-20 17:14:03.977
3,True,2024-12-20 17:15:20.026
0,True,2024-12-20 17:15:20.069
1,True,2024-12-20 17:15:20.071
2,True,2024-12-20 17:15:20.127
0,True,2024-12-20 17:16:36.155
1,True,2024-12-20 17:16:36.166
3,True,2024-12-20 17:16:36.217
2,True,2024-12-20 17:16:36.248
3,True,2024-12-20 17:17:52.320
2,True,2024-12-20 17:17:52.357
0,True,2024-12-20 17:17:52.401
1,True,2024-12-20 17:17:52.477
1,True,2024-12-20 17:19:08.543
3,True,2024-12-20 17:19:08.553
2,True,2024-12-20 17:19:08.578
0,True,2024-12-20 17:19:08.656
1 bot_id enable_dialout timestamp
2 0 True 2024-12-20 17:18:47.197
3 1 True 2024-12-20 17:18:47.247
4 0 True 2024-12-20 17:20:03.366
5 1 True 2024-12-20 17:20:03.392
6 1 True 2024-12-20 17:21:19.477
7 0 True 2024-12-20 17:21:19.514
8 0 True 2024-12-20 17:22:35.613
9 1 True 2024-12-20 17:22:35.637
10 3 True 2024-12-20 17:10:15.329
11 2 True 2024-12-20 17:10:15.356
12 1 True 2024-12-20 17:10:15.359
13 0 True 2024-12-20 17:10:15.389
14 3 True 2024-12-20 17:11:31.428
15 0 True 2024-12-20 17:11:31.469
16 2 True 2024-12-20 17:11:31.511
17 1 True 2024-12-20 17:11:31.670
18 0 True 2024-12-20 17:12:47.680
19 3 True 2024-12-20 17:12:47.753
20 1 True 2024-12-20 17:12:47.770
21 2 True 2024-12-20 17:12:47.779
22 3 True 2024-12-20 17:14:03.844
23 1 True 2024-12-20 17:14:03.847
24 2 True 2024-12-20 17:14:03.883
25 0 True 2024-12-20 17:14:03.977
26 3 True 2024-12-20 17:15:20.026
27 0 True 2024-12-20 17:15:20.069
28 1 True 2024-12-20 17:15:20.071
29 2 True 2024-12-20 17:15:20.127
30 0 True 2024-12-20 17:16:36.155
31 1 True 2024-12-20 17:16:36.166
32 3 True 2024-12-20 17:16:36.217
33 2 True 2024-12-20 17:16:36.248
34 3 True 2024-12-20 17:17:52.320
35 2 True 2024-12-20 17:17:52.357
36 0 True 2024-12-20 17:17:52.401
37 1 True 2024-12-20 17:17:52.477
38 1 True 2024-12-20 17:19:08.543
39 3 True 2024-12-20 17:19:08.553
40 2 True 2024-12-20 17:19:08.578
41 0 True 2024-12-20 17:19:08.656

View File

@@ -1,45 +0,0 @@
# Bot ready signaling
A simple Pipecat example demonstrating how to handle signaling between the client and the bot,
ensuring that the bot starts sending audio only when the client is available,
thereby avoiding the risk of cutting off the beginning of the audio.
## Quick Start
### First, start the bot server:
1. Navigate to the server directory:
```bash
cd server
```
2. Create and activate a virtual environment:
```bash
python3 -m venv venv
source venv/bin/activate # On Windows: venv\Scripts\activate
```
3. Install requirements:
```bash
pip install -r requirements.txt
```
4. Copy env.example to .env and configure:
- Add your API keys
5. Start the server:
```bash
python server.py
```
### Next, connect using the client app:
For client-side setup, refer to the [JavaScript Guide](client/javascript/README.md).
## Important Note
Ensure the bot server is running before using any client implementations.
## Requirements
- Python 3.10+
- Node.js 16+ (for JavaScript)
- Daily API key
- Cartesia API key
- Modern web browser with WebRTC support

View File

@@ -1,27 +0,0 @@
# JavaScript Implementation
Basic implementation using the [Pipecat JavaScript SDK](https://docs.pipecat.ai/client/js/introduction).
## Setup
1. Run the bot server. See the [server README](../../README).
2. Navigate to the `client/javascript` directory:
```bash
cd client/javascript
```
3. Install dependencies:
```bash
npm install
```
4. Run the client app:
```
npm run dev
```
5. Visit http://localhost:5173 in your browser.

View File

@@ -1,34 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>AI Chatbot</title>
</head>
<body>
<div class="container">
<div class="status-bar">
<div class="status">
Status: <span id="connection-status">Disconnected</span>
</div>
<div class="controls">
<button id="connect-btn">Connect</button>
<button id="disconnect-btn" disabled>Disconnect</button>
</div>
</div>
<audio id="bot-audio" autoplay></audio>
<div class="debug-panel">
<h3>Debug Info</h3>
<div id="debug-log"></div>
</div>
</div>
<script type="module" src="/src/app.js"></script>
<link rel="stylesheet" href="/src/style.css">
</body>
</html>

File diff suppressed because it is too large Load Diff

View File

@@ -1,216 +0,0 @@
/**
* Copyright (c) 20242025, Daily
*
* SPDX-License-Identifier: BSD 2-Clause License
*/
import Daily from "@daily-co/daily-js";
/**
* ChatbotClient handles the connection and media management for a real-time
* voice interaction with an AI bot.
*/
class ChatbotClient {
constructor() {
// Initialize client state
this.dailyCallObject = null;
this.setupDOMElements();
this.setupEventListeners();
}
/**
* Set up references to DOM elements and create necessary media elements
*/
setupDOMElements() {
// Get references to UI control elements
this.connectBtn = document.getElementById('connect-btn');
this.disconnectBtn = document.getElementById('disconnect-btn');
this.statusSpan = document.getElementById('connection-status');
this.debugLog = document.getElementById('debug-log');
// Create an audio element for bot's voice output
this.botAudio = document.createElement('audio');
this.botAudio.autoplay = true;
this.botAudio.playsInline = true;
document.body.appendChild(this.botAudio);
}
/**
* Set up event listeners for connect/disconnect buttons
*/
setupEventListeners() {
this.connectBtn.addEventListener('click', () => this.connect());
this.disconnectBtn.addEventListener('click', () => this.disconnect());
}
/**
* Add a timestamped message to the debug log
*/
log(message) {
const entry = document.createElement('div');
entry.textContent = `${new Date().toISOString()} - ${message}`;
// Add styling based on message type
if (message.startsWith('User: ')) {
entry.style.color = '#2196F3'; // blue for user
} else if (message.startsWith('Bot: ')) {
entry.style.color = '#4CAF50'; // green for bot
}
this.debugLog.appendChild(entry);
this.debugLog.scrollTop = this.debugLog.scrollHeight;
console.log(message);
}
/**
* Update the connection status display
*/
updateStatus(status) {
this.statusSpan.textContent = status;
this.log(`Status: ${status}`);
}
handleEventToConsole (evt) {
this.log(`Received event: ${evt.action}`);
};
/**
* Set up listeners for track events (start/stop)
* This handles new tracks being added during the session
*/
setupTrackListeners() {
if (!this.dailyCallObject) return;
this.dailyCallObject.on("joined-meeting", () => {
this.updateStatus('Connected');
this.connectBtn.disabled = true;
this.disconnectBtn.disabled = false;
this.log('Client connected');
});
this.dailyCallObject.on("track-started", (evt) => {
if (evt.track.kind === "audio" && evt.participant.local === false) {
this.log("Audio track started.")
this.setupAudioTrack(evt.track);
}
});
this.dailyCallObject.on("track-stopped", this.handleEventToConsole.bind(this));
this.dailyCallObject.on("participant-joined", this.handleEventToConsole.bind(this));
this.dailyCallObject.on("participant-updated", this.handleEventToConsole.bind(this));
this.dailyCallObject.on("participant-left", () => {
// When the bot leaves, we are also disconnecting from the call
this.disconnect()
});
this.dailyCallObject.on("left-meeting", () => {
this.updateStatus('Disconnected');
this.connectBtn.disabled = false;
this.disconnectBtn.disabled = true;
this.log('Client disconnected');
});
this.dailyCallObject.on("error", this.handleEventToConsole.bind(this));
}
/**
* Set up an audio track for playback
* Handles both initial setup and track updates
*/
setupAudioTrack(track) {
this.log(`Setting up audio track, track state: ${track.readyState}, muted: ${track.muted}`);
// Check if we're already playing this track
if (this.botAudio.srcObject) {
const oldTrack = this.botAudio.srcObject.getAudioTracks()[0];
if (oldTrack?.id === track.id) return;
}
// Create a new MediaStream with the track and set it as the audio source
this.botAudio.srcObject = new MediaStream([track]);
this.botAudio.onplaying = async (event) => {
this.log("onplaying")
this.log("Will send the audio message to play the audio at the next tick")
this.dailyCallObject.sendAppMessage("playable")
}
}
async fetchRoomInfo() {
let connectUrl = '/connect'
let res = await fetch(connectUrl, {
method: "POST",
mode: "cors",
headers: new Headers({
"Content-Type": "application/json"
}),
})
if (res.ok) {
return res.json();
}
}
/**
* Initialize and connect to the bot
* This sets up the RTVI client, initializes devices, and establishes the connection
*/
async connect() {
try {
// Initialize the client
this.dailyCallObject = Daily.createCallObject({
subscribeToTracksAutomatically: true,
});
// Set up listeners for media track events
this.setupTrackListeners();
this.log('Creating the bot...');
let roomInfo = await this.fetchRoomInfo()
// Connect to the bot
this.log('Connecting to bot...');
// Only for making debugger easier
window.callObject = this.dailyCallObject;
await this.dailyCallObject.join({
url: roomInfo.room_url,
});
this.log('Connection complete');
} catch (error) {
// Handle any errors during connection
this.log(`Error connecting: ${error.message}`);
this.log(`Error stack: ${error.stack}`);
this.updateStatus('Error');
// Clean up if there's an error
if (this.dailyCallObject) {
try {
await this.dailyCallObject.leave();
} catch (disconnectError) {
this.log(`Error during disconnect: ${disconnectError.message}`);
}
}
}
}
/**
* Disconnect from the bot and clean up media resources
*/
async disconnect() {
if (this.dailyCallObject) {
try {
// Disconnect the RTVI client
await this.dailyCallObject.leave();
await this.dailyCallObject.destroy();
this.dailyCallObject = null;
// Clean up audio
if (this.botAudio.srcObject) {
this.botAudio.srcObject.getTracks().forEach((track) => track.stop());
this.botAudio.srcObject = null;
}
} catch (error) {
this.log(`Error disconnecting: ${error.message}`);
}
}
}
}
// Initialize the client when the page loads
window.addEventListener('DOMContentLoaded', () => {
new ChatbotClient();
});

View File

@@ -1,13 +0,0 @@
import { defineConfig } from 'vite';
export default defineConfig({
server: {
proxy: {
// Proxy /api requests to the backend server
'/connect': {
target: 'http://0.0.0.0:7860', // Replace with your backend URL
changeOrigin: true,
},
},
},
});

View File

@@ -1,60 +0,0 @@
# React Native Implementation
Basic implementation using the [Pipecat React Native SDK](https://docs.pipecat.ai/client/react-native/introduction).
## Usage
### Expo requirements
This project cannot be used with an [Expo Go](https://docs.expo.dev/workflow/expo-go/) app because [it requires custom native code](https://docs.expo.io/workflow/customizing/).
When a project requires custom native code or a config plugin, we need to transition from using [Expo Go](https://docs.expo.dev/workflow/expo-go/)
to a [development build](https://docs.expo.dev/development/introduction/).
More details about the custom native code used by this demo can be found in [rn-daily-js-expo-config-plugin](https://github.com/daily-co/rn-daily-js-expo-config-plugin).
### Building remotely
If you do not have experience with Xcode and Android Studio builds or do not have them installed locally on your computer, you will need to follow [this guide from Expo to use EAS Build](https://docs.expo.dev/development/create-development-builds/#create-and-install-eas-build).
### Building locally
You will need to have installed locally on your computer:
- [Xcode](https://developer.apple.com/xcode/) to build for iOS;
- [Android Studio](https://developer.android.com/studio) to build for Android;
#### Install the demo dependencies
```bash
# Use the version of node specified in .nvmrc
nvm i
# Install dependencies
npm i
# Before a native app can be compiled, the native source code must be generated.
npx expo prebuild
# Configure the environment variable to connect to the local server
cp env.example .env
# edit .env and add your local ip address, for example: http://192.168.1.16:7860
```
#### Running on Android
After plugging in an Android device [configured for debugging](https://developer.android.com/studio/debug/dev-options), run the following command:
```
npm run android
```
#### Running on iOS
Run the following command:
```
npm run ios
```
#### Connect to the server
Use the http://localhost:5173 in your app.

View File

@@ -1,75 +0,0 @@
{
"expo": {
"name": "bot-ready-rn",
"slug": "bot-ready-rn",
"version": "1.0.0",
"orientation": "portrait",
"icon": "./assets/icon.png",
"userInterfaceStyle": "light",
"splash": {
"image": "./assets/splash.png",
"resizeMode": "contain",
"backgroundColor": "#ffffff"
},
"updates": {
"fallbackToCacheTimeout": 0
},
"assetBundlePatterns": [
"**/*"
],
"ios": {
"supportsTablet": true,
"bitcode": false,
"bundleIdentifier": "co.daily.expo.BotReady",
"infoPlist": {
"UIBackgroundModes": [
"voip"
]
},
"appleTeamId": "EEBGKV9N3N"
},
"android": {
"adaptiveIcon": {
"foregroundImage": "./assets/adaptive-icon.png",
"backgroundColor": "#FFFFFF"
},
"package": "co.daily.expo.BotReady",
"permissions": [
"android.permission.ACCESS_NETWORK_STATE",
"android.permission.BLUETOOTH",
"android.permission.CAMERA",
"android.permission.INTERNET",
"android.permission.MODIFY_AUDIO_SETTINGS",
"android.permission.RECORD_AUDIO",
"android.permission.SYSTEM_ALERT_WINDOW",
"android.permission.WAKE_LOCK",
"android.permission.FOREGROUND_SERVICE",
"android.permission.FOREGROUND_SERVICE_CAMERA",
"android.permission.FOREGROUND_SERVICE_MICROPHONE",
"android.permission.FOREGROUND_SERVICE_MEDIA_PROJECTION",
"android.permission.POST_NOTIFICATIONS"
]
},
"web": {
"favicon": "./assets/favicon.png"
},
"plugins": [
"@config-plugins/react-native-webrtc",
"@daily-co/config-plugin-rn-daily-js",
[
"expo-build-properties",
{
"android": {
"minSdkVersion": 24,
"compileSdkVersion": 35,
"targetSdkVersion": 34,
"buildToolsVersion": "35.0.0"
},
"ios": {
"deploymentTarget": "15.1"
}
}
]
]
}
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 46 KiB

View File

@@ -1,7 +0,0 @@
module.exports = function(api) {
api.cache(true);
return {
presets: ['babel-preset-expo'],
plugins: [["module:react-native-dotenv"]],
};
};

View File

@@ -1 +0,0 @@
API_BASE_URL=http://YOUR_LOCAL_IP:7860

View File

@@ -1,7 +0,0 @@
import { registerRootComponent } from "expo";
import App from "./src/App";
// registerRootComponent calls AppRegistry.registerComponent('main', () => App);
// It also ensures that the environment is set up appropriately
registerRootComponent(App);

View File

@@ -1,4 +0,0 @@
// Learn more https://docs.expo.io/guides/customizing-metro
const { getDefaultConfig } = require('expo/metro-config');
module.exports = getDefaultConfig(__dirname);

File diff suppressed because it is too large Load Diff

View File

@@ -1,31 +0,0 @@
{
"name": "bot-ready-rn",
"version": "1.0.0",
"scripts": {
"start": "expo start --dev-client",
"android": "expo run:android --device",
"ios": "expo run:ios --device",
"web": "expo start --web"
},
"dependencies": {
"@config-plugins/react-native-webrtc": "^10.0.0",
"@daily-co/config-plugin-rn-daily-js": "0.0.7",
"@daily-co/react-native-daily-js": "^0.70.0",
"@daily-co/react-native-webrtc": "^118.0.3-daily.2",
"@react-native-async-storage/async-storage": "1.23.1",
"expo": "^52.0.0",
"expo-build-properties": "~0.13.1",
"expo-dev-client": "~5.0.5",
"expo-splash-screen": "~0.29.16",
"expo-status-bar": "~2.0.0",
"react": "18.3.1",
"react-native": "0.76.3",
"react-native-background-timer": "^2.4.1",
"react-native-dotenv": "^3.4.11",
"react-native-get-random-values": "^1.11.0"
},
"devDependencies": {
"@babel/core": "^7.12.9"
},
"private": true
}

View File

@@ -1,121 +0,0 @@
import React, { useState, useEffect } from 'react';
import {SafeAreaView, View, Text, Button, StyleSheet, ScrollView} from 'react-native';
import Daily from "@daily-co/react-native-daily-js";
import { API_BASE_URL } from "@env";
const CallScreen = () => {
const [connectionStatus, setConnectionStatus] = useState('Disconnected');
const [isConnected, setIsConnected] = useState(false);
const [callObject, setCallObject] = useState(null);
const [logs, setLogs] = useState([]);
useEffect(() => {
if (callObject) {
setupTrackListeners(callObject);
}
}, [callObject]);
const log = (message) => {
setLogs((prevLogs) => [...prevLogs, `${new Date().toISOString()} - ${message}`]);
console.log(message);
};
const setupTrackListeners = (callObject) => {
callObject.on("joined-meeting", () => {
setConnectionStatus('Connected');
setIsConnected(true);
log('Client connected');
});
callObject.on("left-meeting", () => {
setConnectionStatus('Disconnected');
setIsConnected(false);
log('Client disconnected');
});
callObject.on("participant-left", () => {
// When the bot leaves, we are also disconnecting from the call
disconnect().catch((err) => {
log(`Failed to disconnect ${err}`);
})
});
// Trigger so the bot can start sending audio
callObject.on("track-started", (evt) => {
if (evt.track.kind === "audio" && evt.participant.local === false) {
handleEventToConsole(evt)
log("Sending the message that will trigger the bot to play the audio.")
callObject.sendAppMessage("playable")
}
});
callObject.on("error", (evt) => log(`Error: ${evt.error}`));
// Other events just for awareness
callObject.on("track-stopped", handleEventToConsole);
callObject.on("participant-joined", handleEventToConsole);
callObject.on("participant-updated", handleEventToConsole);
};
const handleEventToConsole = (evt) => {
log(`Received event: ${evt.action}`);
};
const connect = async () => {
try {
const callObject = Daily.createCallObject({ subscribeToTracksAutomatically: true });
setCallObject(callObject);
const connectionUrl = `${API_BASE_URL}/connect`
const res = await fetch(connectionUrl, { method: "POST", headers: { "Content-Type": "application/json" } });
const roomInfo = await res.json();
await callObject.join({ url: roomInfo.room_url });
} catch (error) {
log(`Error connecting: ${error.message}`);
}
};
const disconnect = async () => {
if (callObject) {
try {
await callObject.leave();
await callObject.destroy();
setCallObject(null);
} catch (error) {
log(`Error disconnecting: ${error.message}`);
}
}
};
return (
<SafeAreaView style={styles.safeArea}>
<View style={styles.container}>
<View style={styles.statusBar}>
<Text>Status: <Text style={styles.status}>{connectionStatus}</Text></Text>
<View style={styles.controls}>
<Button
title={isConnected ? "Disconnect" : "Connect"}
onPress={isConnected ? disconnect : connect}
/>
</View>
</View>
<View style={styles.debugPanel}>
<Text style={styles.debugTitle}>Debug Info</Text>
<ScrollView style={styles.debugLog}>
{logs.map((logEntry, index) => (
<Text key={index} style={styles.logText}>{logEntry}</Text>
))}
</ScrollView>
</View>
</View>
</SafeAreaView>
);
};
const styles = StyleSheet.create({
safeArea: { flex: 1, backgroundColor: '#f0f0f0', padding: 20 },
container: { flex: 1, margin: 20 },
statusBar: { flexDirection: 'row', justifyContent: 'space-between', alignItems: 'center', padding: 10, backgroundColor: '#fff', borderRadius: 8, marginBottom: 20 },
status: { fontWeight: 'bold' },
controls: { flexDirection: 'row', gap: 10 },
debugPanel: { height: '80%', backgroundColor: '#fff', borderRadius: 8, padding: 20},
debugTitle: { fontSize: 16, fontWeight: 'bold' },
debugLog: { height: '100%', overflow: 'scroll', backgroundColor: '#f8f8f8', padding: 10, borderRadius: 4, fontFamily: 'monospace', fontSize: 12, lineHeight: 1.4 },
});
export default CallScreen;

View File

@@ -1,50 +0,0 @@
# Bot ready signaling Server
A FastAPI server that manages bot instances and provide endpoint for Pipecat client connections.
## Endpoints
- `POST /connect` - Pipecat client connection endpoint
## Environment Variables
Copy `env.example` to `.env` and configure:
```ini
# Required API Keys
DAILY_API_KEY= # Your Daily API key
CARTESIA_API_KEY= # Your Cartesia API key
# Optional Configuration
DAILY_API_URL= # Optional: Daily API URL (defaults to https://api.daily.co/v1)
DAILY_SAMPLE_ROOM_URL= # Optional: Fixed room URL for development
HOST= # Optional: Host address (defaults to 0.0.0.0)
FAST_API_PORT= # Optional: Port number (defaults to 7860)
```
## Running the Server
Set up and activate your virtual environment:
```bash
python3 -m venv venv
source venv/bin/activate # On Windows: venv\Scripts\activate
```
Install dependencies:
```bash
pip install -r requirements.txt
```
If you want to use the local version of `pipecat` in this repo rather than the last published version, also run:
```bash
pip install --editable "../../../[daily,cartesia,openai]"
```
Run the server:
```bash
python server.py
```

View File

@@ -1,3 +0,0 @@
DAILY_SAMPLE_ROOM_URL=https://yourdomain.daily.co/yourroom # (for joining the bot to the same room repeatedly for local dev)
DAILY_API_KEY=
CARTESIA_API_KEY=

View File

@@ -1,4 +0,0 @@
python-dotenv
fastapi[all]
uvicorn
pipecat-ai[daily,cartesia,openai]

View File

@@ -1,147 +0,0 @@
#
# Copyright (c) 2025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import os
import subprocess
from contextlib import asynccontextmanager
from typing import Any, Dict
import aiohttp
from dotenv import load_dotenv
from fastapi import FastAPI, HTTPException, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomParams
# Load environment variables from .env file
load_dotenv(override=True)
# Dictionary to track bot processes: {pid: (process, room_url)}
bot_procs = {}
# Store Daily API helpers
daily_helpers = {}
def cleanup():
"""Cleanup function to terminate all bot processes.
Called during server shutdown.
"""
for entry in bot_procs.values():
proc = entry[0]
proc.terminate()
proc.wait()
@asynccontextmanager
async def lifespan(app: FastAPI):
"""FastAPI lifespan manager that handles startup and shutdown tasks.
- Creates aiohttp session
- Initializes Daily API helper
- Cleans up resources on shutdown
"""
aiohttp_session = aiohttp.ClientSession()
daily_helpers["rest"] = DailyRESTHelper(
daily_api_key=os.getenv("DAILY_API_KEY", ""),
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
aiohttp_session=aiohttp_session,
)
yield
await aiohttp_session.close()
cleanup()
# Initialize FastAPI app with lifespan manager
app = FastAPI(lifespan=lifespan)
# Configure CORS to allow requests from any origin
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
async def create_room_and_token() -> tuple[str, str]:
"""Helper function to create a Daily room and generate an access token.
Returns:
tuple[str, str]: A tuple containing (room_url, token)
Raises:
HTTPException: If room creation or token generation fails
"""
room = await daily_helpers["rest"].create_room(DailyRoomParams())
if not room.url:
raise HTTPException(status_code=500, detail="Failed to create room")
token = await daily_helpers["rest"].get_token(room.url)
if not token:
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room.url}")
return room.url, token
@app.post("/connect")
async def bot_connect(request: Request) -> Dict[Any, Any]:
"""Connect endpoint that creates a room and returns connection credentials.
This endpoint is called by client to establish a connection.
Returns:
Dict[Any, Any]: Authentication bundle containing room_url and token
Raises:
HTTPException: If room creation, token generation, or bot startup fails
"""
print("Creating room for RTVI connection")
room_url, token = await create_room_and_token()
print(f"Room URL: {room_url}")
# Start the bot process
try:
bot_file = "signalling_bot"
proc = subprocess.Popen(
[f"python3 -m {bot_file} -u {room_url} -t {token}"],
shell=True,
bufsize=1,
cwd=os.path.dirname(os.path.abspath(__file__)),
)
bot_procs[proc.pid] = (proc, room_url)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to start subprocess: {e}")
# Return the authentication bundle in format expected by DailyTransport
return {"room_url": room_url, "token": token}
if __name__ == "__main__":
import uvicorn
# Parse command line arguments for server configuration
default_host = os.getenv("HOST", "0.0.0.0")
default_port = int(os.getenv("FAST_API_PORT", "7860"))
parser = argparse.ArgumentParser(description="Daily Travel Companion FastAPI server")
parser.add_argument("--host", type=str, default=default_host, help="Host address")
parser.add_argument("--port", type=int, default=default_port, help="Port number")
parser.add_argument("--reload", action="store_true", help="Reload code on change")
config = parser.parse_args()
# Start the FastAPI server
uvicorn.run(
"server:app",
host=config.host,
port=config.port,
reload=config.reload,
)

View File

@@ -1,95 +0,0 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import os
import sys
from dataclasses import dataclass
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.frames.frames import AudioRawFrame, EndFrame, OutputAudioRawFrame, TTSSpeakFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineTask
from pipecat.services.cartesia.tts import CartesiaTTSService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
@dataclass
class SilenceFrame(OutputAudioRawFrame):
def __init__(
self,
*,
sample_rate: int,
duration: float,
):
# Initialize the parent class with the silent frame's data
super().__init__(
audio=self.create_silent_audio_frame(sample_rate, 1, duration).audio,
sample_rate=sample_rate,
num_channels=1,
)
@staticmethod
def create_silent_audio_frame(
sample_rate: int, num_channels: int, duration: float
) -> AudioRawFrame:
"""Create an AudioRawFrame containing silence."""
frame_size = num_channels * 2 # 2 bytes per sample for 16-bit audio
total_frames = int(sample_rate * duration)
total_bytes = total_frames * frame_size
silent_audio = bytes(total_bytes) # Create a byte array filled with zeros
return AudioRawFrame(audio=silent_audio, sample_rate=sample_rate, num_channels=num_channels)
async def main():
async with aiohttp.ClientSession() as session:
(room_url, _) = await configure(session)
transport = DailyTransport(
room_url, None, "Say One Thing", DailyParams(audio_out_enabled=True)
)
tts = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
)
runner = PipelineRunner()
task = PipelineTask(Pipeline([tts, transport.output()]))
# Register an event handler so we can play the audio when we receive a specific message
@transport.event_handler("on_app_message")
async def on_app_message(transport, message, sender):
logger.debug(f"Received app message: {message} - {sender}")
if "playable" not in message:
return
await task.queue_frames(
[
SilenceFrame(
sample_rate=task.params.audio_out_sample_rate,
duration=0.5,
),
TTSSpeakFrame(f"Hello there, how are you doing today ?"),
EndFrame(),
]
)
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -2,7 +2,7 @@
__pycache__/
*.py[cod]
*$py.class
recordings/
# C extensions
*.so

View File

@@ -1,12 +1,7 @@
FROM python:3.10-bullseye
RUN mkdir /app
RUN mkdir /app/assets
RUN mkdir /app/utils
COPY *.py /app/
COPY requirements.txt /app/
WORKDIR /app
RUN pip3 install -r requirements.txt

View File

@@ -0,0 +1,66 @@
# Chatbot with canonical-metrics
This project implements a chatbot using a pipeline architecture that integrates audio processing, transcription, and a language model for conversational interactions. The chatbot operates within a daily communication environment, utilizing various services for text-to-speech and language model responses.
## Features
- **Audio Input and Output**: Captures microphone input and plays back audio responses.
- **Voice Activity Detection**: Utilizes Silero VAD to manage audio input intelligently.
- **Text-to-Speech**: Integrates ElevenLabs TTS service to convert text responses into audio.
- **Language Model Interaction**: Uses OpenAI's GPT-4 model to generate responses based on user input.
- **Transcription Services**: Captures and transcribes participant speech for analytics.
- **Metrics Collection**: Sends audio data for analysis via Canonical Metrics Service.
## Requirements
- Python 3.10+
- `python-dotenv`
- Additional libraries from the `pipecat` package.
## Setup
1. Clone the repository.
2. Install the required packages.
3. Set up environment variables for API keys:
- `OPENAI_API_KEY`
- `ELEVENLABS_API_KEY`
- `CANONICAL_API_KEY`
- `CANONICAL_API_URL`
4. Run the script.
## Usage
The chatbot introduces itself and engages in conversations, providing brief and creative responses. Designed for flexibility, it can support multiple languages with appropriate configuration.
## Events
- Participants joining or leaving the call are handled dynamically, adjusting the chatbot's behavior accordingly.
The first time, things might take extra time to get started since VAD (Voice Activity Detection) model needs to be downloaded.
## Get started
```python
python3 -m venv venv
source venv/bin/activate
pip install -r requirements.txt
cp env.example .env # and add your credentials
```
## Run the server
```bash
python server.py
```
Then, visit `http://localhost:7860/` in your browser to start a chatbot session.
## Build and test the Docker image
```
docker build -t chatbot .
docker run --env-file .env -p 7860:7860 chatbot
```

View File

@@ -0,0 +1,145 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import os
import sys
import uuid
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame, LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
from pipecat.services.canonical import CanonicalMetricsService
from pipecat.services.elevenlabs import ElevenLabsTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
"Chatbot",
DailyParams(
audio_out_enabled=True,
audio_in_enabled=True,
camera_out_enabled=False,
vad_enabled=True,
vad_audio_passthrough=True,
vad_analyzer=SileroVADAnalyzer(),
transcription_enabled=True,
#
# Spanish
#
# transcription_settings=DailyTranscriptionSettings(
# language="es",
# tier="nova",
# model="2-general"
# )
),
)
tts = ElevenLabsTTSService(
api_key=os.getenv("ELEVENLABS_API_KEY"),
#
# English
#
voice_id="cgSgspJ2msm6clMCkdW9",
aiohttp_session=session,
#
# Spanish
#
# model="eleven_multilingual_v2",
# voice_id="gD1IexrzCvsXPHUuT0s3",
)
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
messages = [
{
"role": "system",
#
# English
#
"content": "You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by introducing yourself. Keep all your responses to 12 words or fewer.",
#
# Spanish
#
# "content": "Eres Chatbot, un amigable y útil robot. Tu objetivo es demostrar tus capacidades de una manera breve. Tus respuestas se convertiran a audio así que nunca no debes incluir caracteres especiales. Contesta a lo que el usuario pregunte de una manera creativa, útil y breve. Empieza por presentarte a ti mismo.",
},
]
context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)
"""
CanonicalMetrics uses AudioBufferProcessor under the hood to buffer the audio. On
call completion, CanonicalMetrics will send the audio buffer to Canonical for
analysis. Visit https://voice.canonical.chat to learn more.
"""
audio_buffer_processor = AudioBufferProcessor()
canonical = CanonicalMetricsService(
audio_buffer_processor=audio_buffer_processor,
aiohttp_session=session,
api_key=os.getenv("CANONICAL_API_KEY"),
call_id=str(uuid.uuid4()),
assistant="pipecat-chatbot",
assistant_speaks_first=True,
)
pipeline = Pipeline(
[
transport.input(), # microphone
context_aggregator.user(),
llm,
tts,
transport.output(),
audio_buffer_processor, # captures audio into a buffer
canonical, # uploads audio buffer to Canonical AI for metrics
context_aggregator.assistant(),
]
)
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
await task.queue_frames([LLMMessagesFrame(messages)])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
print(f"Participant left: {participant}")
await task.queue_frame(EndFrame())
@transport.event_handler("on_call_state_updated")
async def on_call_state_updated(transport, state):
if state == "left":
await task.queue_frame(EndFrame())
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -1,4 +1,6 @@
DAILY_SAMPLE_ROOM_URL=https://yourdomain.daily.co/yourroom # (for joining the bot to the same room repeatedly for local dev)
DAILY_API_KEY=7df...
OPENAI_API_KEY=sk-PL...
ELEVENLABS_API_KEY=aeb...
ELEVENLABS_API_KEY=aeb...
CANONICAL_API_KEY=can...
CANONICAL_API_URL=

View File

@@ -0,0 +1,5 @@
python-dotenv
fastapi[all]
uvicorn
pipecat-ai[daily,openai,silero,elevenlabs,canonical]

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
@@ -53,3 +53,4 @@ async def configure(aiohttp_session: aiohttp.ClientSession):
token = await daily_rest_helper.get_token(url, expiry_time)
return (url, token)
return (url, token)

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
@@ -18,13 +18,14 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame, LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.services.elevenlabs import ElevenLabsTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
@@ -32,16 +33,10 @@ load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
# Create the recordings directory if it doesn't exist
os.makedirs("recordings", exist_ok=True)
async def save_audio(audio: bytes, sample_rate: int, num_channels: int, name: str):
async def save_audio(audio: bytes, sample_rate: int, num_channels: int):
if len(audio) > 0:
filename = os.path.join(
"recordings",
f"{name}_conversation_recording{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.wav",
)
filename = f"conversation_recording{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
with io.BytesIO() as buffer:
with wave.open(buffer, "wb") as wf:
wf.setsampwidth(2)
@@ -66,7 +61,9 @@ async def main():
DailyParams(
audio_out_enabled=True,
audio_in_enabled=True,
video_out_enabled=False,
camera_out_enabled=False,
vad_enabled=True,
vad_audio_passthrough=True,
vad_analyzer=SileroVADAnalyzer(),
transcription_enabled=True,
#
@@ -86,6 +83,7 @@ async def main():
# English
#
voice_id="cgSgspJ2msm6clMCkdW9",
aiohttp_session=session,
#
# Spanish
#
@@ -93,7 +91,7 @@ async def main():
# voice_id="gD1IexrzCvsXPHUuT0s3",
)
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
messages = [
{
@@ -112,9 +110,8 @@ async def main():
context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)
# NOTE: Watch out! This will save all the conversation in memory. You
# can pass `buffer_size` to get periodic callbacks.
audiobuffer = AudioBufferProcessor(enable_turn_audio=True)
# Save audio every 10 seconds.
audiobuffer = AudioBufferProcessor(buffer_size=480000)
pipeline = Pipeline(
[
@@ -128,30 +125,21 @@ async def main():
]
)
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
@audiobuffer.event_handler("on_audio_data")
async def on_audio_data(buffer, audio, sample_rate, num_channels):
await save_audio(audio, sample_rate, num_channels, "full")
@audiobuffer.event_handler("on_user_turn_audio_data")
async def on_user_turn_audio_data(buffer, audio, sample_rate, num_channels):
await save_audio(audio, sample_rate, num_channels, "user")
@audiobuffer.event_handler("on_bot_turn_audio_data")
async def on_bot_turn_audio_data(buffer, audio, sample_rate, num_channels):
await save_audio(audio, sample_rate, num_channels, "bot")
await save_audio(audio, sample_rate, num_channels)
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await audiobuffer.start_recording()
await transport.capture_participant_transcription(participant["id"])
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMMessagesFrame(messages)])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
print(f"Participant left: {participant}")
await task.cancel()
await task.queue_frame(EndFrame())
runner = PipelineRunner()

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
@@ -53,3 +53,4 @@ async def configure(aiohttp_session: aiohttp.ClientSession):
token = await daily_rest_helper.get_token(url, expiry_time)
return (url, token)
return (url, token)

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#

View File

@@ -1,39 +0,0 @@
# Daily Custom Tracks
This example shows how to send and receive Daily custom tracks. We will run a simple `daily-python` application to send an audio file with a custom track (named "pipecat") to a room. Then, the Pipecat bot will mirror that custom track into another custom track (named "pipecat-mirror") in the same room.
## Get started
```python
python3 -m venv venv
source venv/bin/activate
pip install -r requirements.txt
```
## Run the bot
Start the bot by giving it a Daily room URL.
```bash
python bot.py -u ROOM_URL
```
The bot will wait for the first participant to join. Then, it will mirror a custom track named "pipecat" into a new custom track named "pipecat-mirror".
## Run the sender
Now, run the custom track sender. This is a simple `daily-python` application that opens and audio file and sends it as a custom track to the same Daily room.
```bash
python custom_track_sender.py -u ROOM_URL -i office-ambience-mono-16000.mp3
```
## Open client
Finally, open the client so you can hear both custom tracks.
```bash
open index.html
```
Once the client is opened, copy the URL of the Daily room and join it. You should be able to select which custom track you want to hear.

View File

@@ -1,87 +0,0 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import sys
import aiohttp
from loguru import logger
from runner import configure
from pipecat.frames.frames import Frame, InputAudioRawFrame, OutputAudioRawFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.transports.services.daily import DailyParams, DailyTransport
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
class CustomTrackMirrorProcessor(FrameProcessor):
def __init__(self, transport_destination: str, **kwargs):
super().__init__(**kwargs)
self._transport_destination = transport_destination
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
if isinstance(frame, InputAudioRawFrame) and frame.transport_source:
output_frame = OutputAudioRawFrame(
audio=frame.audio,
sample_rate=frame.sample_rate,
num_channels=frame.num_channels,
)
output_frame.transport_destination = self._transport_destination
await self.push_frame(output_frame)
else:
await self.push_frame(frame, direction)
async def main():
async with aiohttp.ClientSession() as session:
(room_url, _) = await configure(session)
transport = DailyTransport(
room_url,
None,
"Custom tracks mirror",
DailyParams(
audio_in_enabled=True,
audio_out_enabled=True,
microphone_out_enabled=False, # Disable since we just use custom tracks
audio_out_destinations=["pipecat-mirror"],
),
)
pipeline = Pipeline(
[
transport.input(), # Transport user input
CustomTrackMirrorProcessor("pipecat-mirror"),
transport.output(), # Transport bot output
]
)
task = PipelineTask(
pipeline,
params=PipelineParams(
audio_in_sample_rate=16000,
audio_out_sample_rate=16000,
),
)
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_audio(participant["id"], audio_source="pipecat")
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -1,74 +0,0 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import time
from daily import CallClient, CustomAudioSource, Daily
from pydub import AudioSegment
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
parser.add_argument("-u", "--url", type=str, required=True, help="URL of the Daily room to join")
parser.add_argument(
"-i", "--input", type=str, required=True, help="Input audio file (needs 16000 sample rate)"
)
args, _ = parser.parse_known_args()
audio = AudioSegment.from_mp3(args.input)
raw_bytes = audio.raw_data
sample_rate = audio.frame_rate
channels = audio.channels
print(f"Length: {len(raw_bytes)} bytes")
print(f"Sample rate: {sample_rate}, Channels: {channels}")
# Initialize the Daily context & create call client
Daily.init()
client = CallClient()
# Join the room and indicate we have a custom track named "pipecat".
client.join(
args.url,
client_settings={
"publishing": {
"camera": False,
"microphone": False,
"customAudio": {"pipecat": True},
},
},
)
# Just sleep for a couple of seconds. To do this well we should really use
# completions.
time.sleep(2)
# Create the custom audio source. This is where we will write our audio.
audio_source = CustomAudioSource(sample_rate, channels)
# Create an audio track and assign it our audio source.
client.add_custom_audio_track("pipecat", audio_source)
# Just sleep for a second. To do this well we should really use completions.
time.sleep(1)
try:
# Just write one second of audio until we have read all the file.
chunk_size = sample_rate * channels * 2
while len(raw_bytes) > 0:
chunk = raw_bytes[:chunk_size]
raw_bytes = raw_bytes[chunk_size:]
audio_source.write_frames(chunk)
except KeyboardInterrupt:
client.leave()
# Just sleep for a second. To do this well we should really use completions.
time.sleep(1)
client.release()

View File

@@ -1,173 +0,0 @@
<html>
<head>
<title>daily custom tracks</title>
</head>
<script crossorigin src="https://unpkg.com/@daily-co/daily-js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/fomantic-ui/2.8.6/semantic.min.js"></script>
<link
rel="stylesheet"
type="text/css"
href="https://cdnjs.cloudflare.com/ajax/libs/fomantic-ui/2.8.6/semantic.min.css"
/>
<script>
function enableButton(buttonId, enable) {
const button = document.getElementById(buttonId);
button.disabled = !enable;
}
function enableJoinButton(enable) {
enableButton("join-button", enable);
}
function enableLeaveButton(enable) {
enableButton("leave-button", enable);
}
function destroyPlayers(query) {
const items = document.querySelectorAll(query);
if (items) {
for (const item of items) {
item.remove();
}
}
}
function destroyParticipantPlayers(participantId) {
destroyPlayers(`audio[data-participant-id="${participantId}"]`);
destroyPlayers(`button[data-participant-id="${participantId}"]`);
}
async function startPlayer(player, track) {
player.muted = false;
player.autoplay = true;
if (track != null) {
player.srcObject = new MediaStream([track]);
}
}
async function buildAudioPlayer(track, participantId) {
const audioContainer = document.getElementById("audio-container");
const player = document.createElement("audio");
player.dataset.participantId = participantId;
// Create a new button for controlling audio
const audioControlButton = document.createElement("button");
audioControlButton.className = "ui primary green button"
audioControlButton.innerText = track._mediaTag == "cam-audio" ? "english" : track._mediaTag;
audioControlButton.dataset.participantId = participantId;
audioControlButton.onclick = () => {
if (player.paused) {
player.play();
audioControlButton.className = "ui primary red button"
} else {
player.pause();
audioControlButton.className = "ui primary green button"
}
};
audioContainer.appendChild(player);
audioContainer.appendChild(audioControlButton);
await startPlayer(player, track);
player.pause()
return player;
}
function subscribeToTracks(participantId) {
console.log(`subscribing to track`);
if (participantId === "local") {
return;
}
callObject.updateParticipant(participantId, {
setSubscribedTracks: {
audio: true,
video: false,
custom: true,
},
});
}
function startDaily() {
enableJoinButton(true);
enableLeaveButton(false);
window.callObject = window.DailyIframe.createCallObject({});
callObject.on("participant-joined", (e) => {
if (!e.participant.local) {
console.log("participant-joined", e.participant);
subscribeToTracks(e.participant.session_id);
}
});
callObject.on("participant-left", (e) => {
console.log("participant-left", e.participant.session_id);
destroyParticipantPlayers(e.participant.session_id);
});
callObject.on("track-started", async (e) => {
console.log("track-started", e.track);
if (e.track.kind === "audio") {
await buildAudioPlayer(e.track, e.participant.session_id);
}
});
}
async function joinRoom() {
enableJoinButton(false);
enableLeaveButton(true);
const meetingUrl = document.getElementById("meeting-url").value;
callObject.join({
url: meetingUrl,
startVideoOff: true,
startAudioOff: true,
subscribeToTracksAutomatically: false,
receiveSettings: {
base: { video: { layer: 0 } },
},
});
}
async function leaveRoom() {
enableJoinButton(true);
enableLeaveButton(false);
callObject.leave();
const audioContainer = document.getElementById("audio-container");
audioContainer.replaceChildren();
}
</script>
<body onload="startDaily()">
<div class="ui centered page grid" style="margin-top: 30px">
<div class="ten wide column">
<div class="ui form" style="margin-top: 30px">
<div class="field">
<label>Meeting URL</label>
<input id="meeting-url" value="" />
</div>
</div>
</div>
</div>
<div class="ui centered aligned header" style="margin-top: 30px">
<button id="join-button" class="ui primary button" onclick="joinRoom()">
Join
</button>
<button id="leave-button" class="ui button" onclick="leaveRoom()">
Leave
</button>
</div>
<div id="tile" class="ui container" style="margin-top: 30px">
<div id="tile" class="ui center aligned grid">
<div id="audio-container"></div><br/>
</div>
</div>
</body>
</html>

View File

@@ -1,2 +0,0 @@
pydub
pipecat-ai[daily]

View File

@@ -1,15 +0,0 @@
FROM python:3.10-bullseye
RUN mkdir /app
RUN mkdir /app/assets
RUN mkdir /app/utils
COPY *.py /app/
COPY requirements.txt /app/
WORKDIR /app
RUN pip3 install -r requirements.txt
EXPOSE 7860
CMD ["python3", "server.py"]

View File

@@ -1,39 +0,0 @@
# Daily Multi Translation
This example shows how to use Daily to stream multiple simultaneous translations using a single transport. Daily provides custom tracks and in this example we will simultaneously translate incoming audio in English to Spanish, French and German, each of them being sent to a custom track.
## Get started
```python
python3 -m venv venv
source venv/bin/activate
pip install -r requirements.txt
cp env.example .env # and add your credentials
```
## Run the server
```bash
python server.py
```
Then, visit `http://localhost:7860/` in your browser. This will open a Daily Prebuilt room where you will speak in English (make sure you are not muted).
## Open client
Next, you need to open the client that will listen to the translations.
```bash
open index.html
```
Once the client is opened, copy the URL of the Daily room created above and join it. You should be able to select which translation you want to hear.
## Build and test the Docker image
```
docker build -t daily-multi-translation .
docker run --env-file .env -p 7860:7860 daily-multi-translation
```

View File

@@ -1,165 +0,0 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import os
import sys
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.audio.mixers.soundfile_mixer import SoundfileMixer
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.observers.loggers.transcription_log_observer import TranscriptionLogObserver
from pipecat.pipeline.parallel_pipeline import ParallelPipeline
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.cartesia.tts import CartesiaTTSService
from pipecat.services.deepgram.stt import DeepgramSTTService
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
BACKGROUND_SOUND_FILE = "office-ambience-mono-16000.mp3"
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
"Multi translation bot",
DailyParams(
audio_in_enabled=True,
audio_out_enabled=True,
audio_out_mixer={
"spanish": SoundfileMixer(
sound_files={"office": BACKGROUND_SOUND_FILE}, default_sound="office"
),
"french": SoundfileMixer(
sound_files={"office": BACKGROUND_SOUND_FILE}, default_sound="office"
),
"german": SoundfileMixer(
sound_files={"office": BACKGROUND_SOUND_FILE}, default_sound="office"
),
},
audio_out_destinations=["spanish", "french", "german"],
microphone_out_enabled=False, # Disable since we just use custom tracks
vad_analyzer=SileroVADAnalyzer(),
),
)
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
tts_spanish = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_id="cefcb124-080b-4655-b31f-932f3ee743de",
transport_destination="spanish",
)
tts_french = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_id="8832a0b5-47b2-4751-bb22-6a8e2149303d",
transport_destination="french",
)
tts_german = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_id="38aabb6a-f52b-4fb0-a3d1-988518f4dc06",
transport_destination="german",
)
messages_spanish = [
{
"role": "system",
"content": "You will be provided with a sentence in English, and your task is to only translate it into Spanish.",
},
]
messages_french = [
{
"role": "system",
"content": "You will be provided with a sentence in English, and your task is to only translate it into French.",
},
]
messages_german = [
{
"role": "system",
"content": "You will be provided with a sentence in English, and your task is to only translate it into German.",
},
]
llm_spanish = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
llm_french = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
llm_german = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
context_spanish = OpenAILLMContext(messages_spanish)
context_aggregator_spanish = llm_spanish.create_context_aggregator(context_spanish)
context_french = OpenAILLMContext(messages_french)
context_aggregator_french = llm_french.create_context_aggregator(context_french)
context_german = OpenAILLMContext(messages_german)
context_aggregator_german = llm_german.create_context_aggregator(context_german)
pipeline = Pipeline(
[
transport.input(), # Transport user input
stt,
ParallelPipeline(
# Spanish pipeline.
[
context_aggregator_spanish.user(),
llm_spanish,
tts_spanish,
context_aggregator_spanish.assistant(),
],
# French pipeline.
[
context_aggregator_french.user(),
llm_french,
tts_french,
context_aggregator_french.assistant(),
],
# German pipeline.
[
context_aggregator_german.user(),
llm_german,
tts_german,
context_aggregator_german.assistant(),
],
),
transport.output(), # Transport bot output
]
)
task = PipelineTask(
pipeline,
params=PipelineParams(
audio_in_sample_rate=16000,
audio_out_sample_rate=16000,
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
report_only_initial_ttfb=True,
),
observers=[TranscriptionLogObserver()],
)
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -1,5 +0,0 @@
DAILY_SAMPLE_ROOM_URL=https://yourdomain.daily.co/yourroom # (for joining the bot to the same room repeatedly for local dev)
DAILY_API_KEY=7df...
OPENAI_API_KEY=sk-PL...
DEEPGRAM_API_KEY=efb...
CARTESIA_API_KEY=aeb...

View File

@@ -1,202 +0,0 @@
<html>
<head>
<title>daily multi translation</title>
</head>
<script crossorigin src="https://unpkg.com/@daily-co/daily-js"></script>
<script
src="https://code.jquery.com/jquery-3.1.1.min.js"
integrity="sha256-hVVnYaiADRTO2PzUGmuLJr8BLUSjGIZsDYGmIJLv2b8="
crossorigin="anonymous"
></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/fomantic-ui/2.8.6/semantic.min.js"></script>
<link
rel="stylesheet"
type="text/css"
href="https://cdnjs.cloudflare.com/ajax/libs/fomantic-ui/2.8.6/semantic.min.css"
/>
<script>
function enableButton(buttonId, enable) {
const button = document.getElementById(buttonId);
button.disabled = !enable;
}
function enableJoinButton(enable) {
enableButton("join-button", enable);
}
function enableLeaveButton(enable) {
enableButton("leave-button", enable);
}
function destroyPlayers(query) {
const items = document.querySelectorAll(query);
if (items) {
for (const item of items) {
item.remove();
}
}
}
function destroyParticipantPlayers(participantId) {
destroyPlayers(`video[data-participant-id="${participantId}"]`);
destroyPlayers(`audio[data-participant-id="${participantId}"]`);
destroyPlayers(`button[data-participant-id="${participantId}"]`);
}
async function startPlayer(player, track) {
player.muted = false;
player.autoplay = true;
if (track != null) {
player.srcObject = new MediaStream([track]);
}
}
async function buildVideoPlayer(track, participantId) {
const videoContainer = document.getElementById("video-container");
const player = document.createElement("video");
player.dataset.participantId = participantId;
videoContainer.appendChild(player);
await startPlayer(player, track);
await player.play();
return player;
}
async function buildAudioPlayer(track, participantId) {
const audioContainer = document.getElementById("audio-container");
const player = document.createElement("audio");
player.dataset.participantId = participantId;
// Create a new button for controlling audio
const audioControlButton = document.createElement("button");
audioControlButton.className = "ui primary green button"
audioControlButton.innerText = track._mediaTag == "cam-audio" ? "english" : track._mediaTag;
audioControlButton.dataset.participantId = participantId;
audioControlButton.onclick = () => {
if (player.paused) {
player.play();
audioControlButton.className = "ui primary red button"
} else {
player.pause();
audioControlButton.className = "ui primary green button"
}
};
audioContainer.appendChild(player);
audioContainer.appendChild(audioControlButton);
await startPlayer(player, track);
player.pause()
return player;
}
function subscribeToTracks(participantId) {
console.log(`subscribing to track`);
if (participantId === "local") {
return;
}
callObject.updateParticipant(participantId, {
setSubscribedTracks: {
audio: true,
video: true,
custom: true,
},
});
}
function startDaily() {
enableJoinButton(true);
enableLeaveButton(false);
window.callObject = window.DailyIframe.createCallObject({});
callObject.on("participant-joined", (e) => {
if (!e.participant.local) {
console.log("participant-joined", e.participant);
subscribeToTracks(e.participant.session_id);
}
});
callObject.on("participant-left", (e) => {
console.log("participant-left", e.participant.session_id);
destroyParticipantPlayers(e.participant.session_id);
});
callObject.on("track-started", async (e) => {
console.log("track-started", e.track);
if (e.track.kind === "video") {
await buildVideoPlayer(e.track, e.participant.session_id);
} else if (e.track.kind === "audio") {
await buildAudioPlayer(e.track, e.participant.session_id);
}
});
}
async function joinRoom() {
enableJoinButton(false);
enableLeaveButton(true);
const meetingUrl = document.getElementById("meeting-url").value;
callObject.join({
url: meetingUrl,
startVideoOff: true,
startAudioOff: true,
subscribeToTracksAutomatically: false,
receiveSettings: {
base: { video: { layer: 0 } },
},
});
}
async function leaveRoom() {
enableJoinButton(true);
enableLeaveButton(false);
callObject.leave();
const videoContainer = document.getElementById("video-container");
videoContainer.replaceChildren();
const audioContainer = document.getElementById("audio-container");
audioContainer.replaceChildren();
}
</script>
<body onload="startDaily()">
<div class="ui centered page grid" style="margin-top: 30px">
<div class="ten wide column">
<div class="ui form" style="margin-top: 30px">
<div class="field">
<label>Meeting URL</label>
<input id="meeting-url" value="" />
</div>
</div>
</div>
</div>
<div class="ui centered aligned header" style="margin-top: 30px">
<button id="join-button" class="ui primary button" onclick="joinRoom()">
Join
</button>
<button id="leave-button" class="ui button" onclick="leaveRoom()">
Leave
</button>
</div>
<div id="tile" class="ui container" style="margin-top: 30px">
<div id="tile" class="ui center aligned grid">
<div id="audio-container"></div><br/>
</div>
</div>
<div id="tile" class="ui container" style="margin-top: 30px">
<div id="tile" class="ui center aligned grid">
<div id="video-container" class="ui segment"></div>
</div>
</div>
</body>
</html>

View File

@@ -1,5 +0,0 @@
aiofiles
python-dotenv
fastapi[all]
uvicorn
pipecat-ai[daily,deepgram,openai,silero,cartesia]

View File

@@ -1,55 +0,0 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import os
import aiohttp
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
async def configure(aiohttp_session: aiohttp.ClientSession):
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
parser.add_argument(
"-u", "--url", type=str, required=False, help="URL of the Daily room to join"
)
parser.add_argument(
"-k",
"--apikey",
type=str,
required=False,
help="Daily API Key (needed to create an owner token for the room)",
)
args, unknown = parser.parse_known_args()
url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL")
key = args.apikey or os.getenv("DAILY_API_KEY")
if not url:
raise Exception(
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL."
)
if not key:
raise Exception(
"No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers."
)
daily_rest_helper = DailyRESTHelper(
daily_api_key=key,
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
aiohttp_session=aiohttp_session,
)
# Create a meeting token for the given room with an expiration 1 hour in
# the future.
expiry_time: float = 60 * 60
token = await daily_rest_helper.get_token(url, expiry_time)
return (url, token)

View File

@@ -1,139 +0,0 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import os
import subprocess
from contextlib import asynccontextmanager
import aiohttp
from dotenv import load_dotenv
from fastapi import FastAPI, HTTPException, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, RedirectResponse
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomParams
MAX_BOTS_PER_ROOM = 1
# Bot sub-process dict for status reporting and concurrency control
bot_procs = {}
daily_helpers = {}
load_dotenv(override=True)
def cleanup():
# Clean up function, just to be extra safe
for entry in bot_procs.values():
proc = entry[0]
proc.terminate()
proc.wait()
@asynccontextmanager
async def lifespan(app: FastAPI):
aiohttp_session = aiohttp.ClientSession()
daily_helpers["rest"] = DailyRESTHelper(
daily_api_key=os.getenv("DAILY_API_KEY", ""),
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
aiohttp_session=aiohttp_session,
)
yield
await aiohttp_session.close()
cleanup()
app = FastAPI(lifespan=lifespan)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/")
async def start_agent(request: Request):
print(f"!!! Creating room")
room = await daily_helpers["rest"].create_room(DailyRoomParams())
print(f"!!! Room URL: {room.url}")
# Ensure the room property is present
if not room.url:
raise HTTPException(
status_code=500,
detail="Missing 'room' property in request data. Cannot start agent without a target room!",
)
# Check if there is already an existing process running in this room
num_bots_in_room = sum(
1 for proc in bot_procs.values() if proc[1] == room.url and proc[0].poll() is None
)
if num_bots_in_room >= MAX_BOTS_PER_ROOM:
raise HTTPException(status_code=500, detail=f"Max bot limited reach for room: {room.url}")
# Get the token for the room
token = await daily_helpers["rest"].get_token(room.url)
if not token:
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room.url}")
# Spawn a new agent, and join the user session
# Note: this is mostly for demonstration purposes (refer to 'deployment' in README)
try:
proc = subprocess.Popen(
[f"python3 -m bot -u {room.url} -t {token}"],
shell=True,
bufsize=1,
cwd=os.path.dirname(os.path.abspath(__file__)),
)
bot_procs[proc.pid] = (proc, room.url)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to start subprocess: {e}")
return RedirectResponse(room.url)
@app.get("/status/{pid}")
def get_status(pid: int):
# Look up the subprocess
proc = bot_procs.get(pid)
# If the subprocess doesn't exist, return an error
if not proc:
raise HTTPException(status_code=404, detail=f"Bot with process id: {pid} not found")
# Check the status of the subprocess
if proc[0].poll() is None:
status = "running"
else:
status = "finished"
return JSONResponse({"bot_id": pid, "status": status})
if __name__ == "__main__":
import uvicorn
default_host = os.getenv("HOST", "0.0.0.0")
default_port = int(os.getenv("FAST_API_PORT", "7860"))
parser = argparse.ArgumentParser(description="Daily Storyteller FastAPI server")
parser.add_argument("--host", type=str, default=default_host, help="Host address")
parser.add_argument("--port", type=int, default=default_port, help="Port number")
parser.add_argument("--reload", action="store_true", help="Reload code on change")
config = parser.parse_args()
uvicorn.run(
"server:app",
host=config.host,
port=config.port,
reload=config.reload,
)

View File

@@ -1,9 +1,3 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import asyncio
import os
@@ -13,13 +7,13 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame
from pipecat.frames.frames import EndFrame, LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.services.elevenlabs import ElevenLabsTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
@@ -41,7 +35,8 @@ async def main(room_url: str, token: str):
api_key=daily_api_key,
audio_in_enabled=True,
audio_out_enabled=True,
video_out_enabled=False,
camera_out_enabled=False,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
transcription_enabled=True,
),
@@ -52,7 +47,7 @@ async def main(room_url: str, token: str):
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
)
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
messages = [
{
@@ -75,22 +70,20 @@ async def main(room_url: str, token: str):
]
)
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMMessagesFrame(messages)])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
await task.queue_frame(EndFrame())
@transport.event_handler("on_call_state_updated")
async def on_call_state_updated(transport, state):
if state == "left":
# Here we don't want to cancel, we just want to finish sending
# whatever is queued, so we use an EndFrame().
await task.queue_frame(EndFrame())
runner = PipelineRunner()

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#

View File

@@ -1,9 +1,3 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import os
import aiohttp

View File

@@ -1,9 +1,3 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import os
import sys
@@ -11,15 +5,6 @@ import sys
from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.cartesia.tts import CartesiaTTSService
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
logger.remove(0)
@@ -27,23 +12,33 @@ logger.add(sys.stderr, level="DEBUG")
async def main(room_url: str, token: str):
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame, LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.cartesia import CartesiaTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
transport = DailyTransport(
room_url,
token,
"bot",
DailyParams(
audio_in_enabled=True,
audio_out_enabled=True,
transcription_enabled=True,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
),
)
tts = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY", ""), voice_id="71a7ad14-091c-4e8e-a314-022ece01c121"
api_key=os.getenv("CARTESIA_API_KEY", ""), voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22"
)
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
messages = [
{
@@ -68,7 +63,7 @@ async def main(room_url: str, token: str):
task = PipelineTask(
pipeline,
params=PipelineParams(
PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
@@ -80,11 +75,11 @@ async def main(room_url: str, token: str):
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMMessagesFrame(messages)])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
await task.queue_frame(EndFrame())
runner = PipelineRunner()

View File

@@ -1,4 +1,5 @@
python-dotenv==1.0.1
modal==0.71.3
pipecat-ai[daily,silero,cartesia,openai]
fastapi==0.115.6
modal==0.65.48
pipecat-ai[daily,silero,cartesia,openai]==0.0.48
fastapi==0.115.4
aiohttp==3.11.9

View File

@@ -1,178 +0,0 @@
# Handling PSTN/SIP Dial-in on Pipecat Cloud
This repository contains two server implementations for handling
the pinless dial-in workflow in Pipecat Cloud. This is the companion to the
Pipecat Cloud [pstn_sip starter image](https://github.com/daily-co/pipecat-cloud-images/tree/main/pipecat-starters/pstn_sip).
In addition you can use `/api/dial` to trigger dial-out, and
eventually, call-transfers.
1. [FastAPI Server](fastapi-webhook-server/README.md) -
A FastAPI implementation that handles PSTN (Public Switched Telephone
Network) and SIP (Session Initiation Protocol) calls using the Daily API.
2. [Next.js Serverless](nextjs-webhook-server/README.md) -
A Next.js API implementation designed for deployment on Vercel's
serverless platform.
Both implementations provide:
- HMAC signature validation for pinless webhook
- Structured logging
- Support for dial-in and dial-out settings
- Voicemail detection and call transfer functionality (coming soon)
- Test request handling
## Choosing an Implementation
- Use the **FastAPI Server** if you:
- Need a standalone server
- Prefer Python and FastAPI
- Want to deploy to traditional hosting platforms
- Use the **Next.js Serverless** implementation if you:
- Want serverless deployment
- Prefer JavaScript/TypeScript
- Already use Next.js and Vercel for other projects
- Need quick scaling and zero maintenance
## Prerequisites
### Environment Variables
Both implementations require similar environment variables:
- `PIPECAT_CLOUD_API_KEY`: Pipecat Cloud API Key, begins with pk\_\*
- `AGENT_NAME`: Your Daily agent name
- `PINLESS_HMAC_SECRET`: Your HMAC secret for request verification
- `LOG_LEVEL`: (Optional) Logging level (defaults to 'info')
See the individual README files in each implementation directory for
specific setup instructions.
### Phone number setup
You can buy a phone number through the Pipecat Cloud Dashboard:
1. Go to `Settings` > `Telephony`
2. Follow the UI to purchase a phone number
3. Configure the webhook URL to receive incoming calls (e.g. `https://my-webhook-url.com/api/dial`)
Or purchase the number using Daily's
[PhoneNumbers API](https://docs.daily.co/reference/rest-api/phone-numbers).
```bash
curl --request POST \
--url https://api.daily.co/v1/domain-dialin-config \
--header 'Authorization: Bearer $TOKEN' \
--header 'Content-Type: application/json' \
--data-raw '{
"type": "pinless_dialin",
"name_prefix": "Customer1",
"phone_number": "+1PURCHASED_NUM",
"room_creation_api": "https://example.com/api/dial",
"hold_music_url": "https://example.com/static/ringtone.mp3",
"timeout_config": {
"message": "No agent is available right now"
}
}'
```
The API will return a static SIP URI (`sip_uri`) that can be called
from other SIP services.
### `room_creation_api`
To make and receive calls currently you have to host a server that
handles incoming calls. In the coming weeks, incoming calls will be
directly handled within Daily and we will expose an endpoint similar
to `{service}/start` that will manage this for you.
In the meantime, the server described below serves as the webhook
handler for the `room_creation_api`. Configure your pinless phone
number or SIP interconnect to the `ngrok` tunnel or
the actual server URL, append `/api/dial` to the webhook URL.
## Example curl commands
Note: Replace `http://localhost:3000` with your actual server URL and
phone numbers with valid values for your use case.
### Dialin Request
The server will receive a request when a call is received from Daily.
### Dialout Request
Dial a number, will use any purchased number
```bash
curl -X POST http://localhost:3000/api/dial \
-H "Content-Type: application/json" \
-d '{
"dialout_settings": [
{
"phoneNumber": "+1234567890",
}
]
}'
```
Dial a number with callerId, which is the UUID of a purchased number.
```bash
curl -X POST http://localhost:3000/api/dial \
-H "Content-Type: application/json" \
-d '{
"dialout_settings": [
{
"phoneNumber": "+1234567890",
"callerId": "purchased_phone_uuid"
}
]
}'
```
Dial a number
```bash
curl -X POST http://localhost:3000/api/dial \
-H "Content-Type: application/json" \
-d '{
"dialout_settings": [
{
"phoneNumber": "+1234567890",
"callerId": "purchased_phone_uuid"
}
]
}'
```
### Advanced Request with Voicemail Detection
```bash
curl -X POST http://localhost:3000/api/dial \
-H "Content-Type: application/json" \
-d '{
"To": "+1234567890",
"From": "+1987654321",
"callId": "call-uuid-123",
"callDomain": "domain-uuid-456",
"dialout_settings": [
{
"phoneNumber": "+1234567890",
"callerId": "purchased_phone_uuid"
}
],
"voicemail_detection": {
"testInPrebuilt": true
},
"call_transfer": {
"mode": "dialout",
"speakSummary": true,
"storeSummary": true,
"operatorNumber": "+1234567890",
"testInPrebuilt": true
}
}'
```

View File

@@ -1,98 +0,0 @@
# FastAPI server for handling Daily PSTN/SIP Webhook
A FastAPI server that handles PSTN (Public Switched Telephone Network) and SIP (Session Initiation Protocol) calls using the Daily API.
## Setup
1. Clone the repository
2. Navigate to the `fastapi-webhook-server` directory:
```bash
cd fastapi-webhook-server
```
3. Install dependencies:
```bash
pip install -r requirements.txt
```
4. Copy `env.example` to `.env`:
```bash
cp env.example .env
```
5. Update `.env` with your credentials:
- `AGENT_NAME`: Your Daily agent name
- `PIPECAT_CLOUD_API_KEY`: Your Daily API key
- `PINLESS_HMAC_SECRET`: Your HMAC secret for request verification
## Running the Server
Start the server:
```bash
python server.py
```
The server will run on `http://localhost:7860` and you can expose it via ngrok for testing:
```bash
`ngrok http 7860`
```
> Tip: Use a subdomain for a consistent URL (e.g. `ngrok http -subdomain=mydomain http://localhost:7860`)
## API Endpoints
### GET /
Health check endpoint that returns a "Hello, World!" message.
### POST /api/dial
Initiates a PSTN/SIP call with the following request body format:
```json
{
"To": "+14152251493",
"From": "+14158483432",
"callId": "string-contains-uuid",
"callDomain": "string-contains-uuid",
"dialout_settings": [
{
"phoneNumber": "+14158483432",
"callerId": "+14152251493"
}
],
"voicemail_detection": {
"testInPrebuilt": true
},
"call_transfer": {
"mode": "dialout",
"speakSummary": true,
"storeSummary": true,
"operatorNumber": "+14152250006",
"testInPrebuilt": true
}
}
```
#### Response
Returns a JSON object containing:
- `status`: Success/failure status
- `data`: Response from Daily API
- `room_properties`: Properties of the created Daily room
## Error Handling
- 401: Invalid signature
- 400: Invalid authorization header (e.g. missing Daily API key in bot.py)
- 405: Method not allowed (e.g. incorrect route on the webhook URL)
- 500: Server errors (missing API key, network issues)
- Other status codes are passed through from the Daily API

View File

@@ -1,3 +0,0 @@
AGENT_NAME="your-agent-name"
PIPECAT_CLOUD_API_KEY="your-daily-api-key"
PINLESS_HMAC_SECRET="hmac-secret-pinless-dialin"

View File

@@ -1,6 +0,0 @@
fastapi
uvicorn
python-dotenv
requests
pydantic
loguru

View File

@@ -1,202 +0,0 @@
#
# Copyright (c) 2025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
# server.py
import base64 # for calculating hmac signature
import hmac
import os # for accessing environment variables
import time # for setting expiration time
from typing import Any, Dict, List, Optional
import requests
from dotenv import load_dotenv
from fastapi import FastAPI, HTTPException, Request
from loguru import logger
from pydantic import BaseModel, Field
load_dotenv(override=True)
app = FastAPI()
class RoomRequest(BaseModel):
test: Optional[str] = Field(None, alias="Test", description="Test field")
To: Optional[str] = Field(None, alias="to", description="Destination phone number")
From: Optional[str] = Field(None, alias="from", description="Source phone number")
callId: Optional[str] = Field(None, alias="call_id", description="Unique call identifier")
callDomain: Optional[str] = Field(
None, alias="call_domain", description="Call domain identifier"
)
dialout_settings: Optional[List[Dict[str, Any]]] = Field(
None, description="An array of phone numbers or SIP URIs to dialout to"
)
voicemail_detection: Optional[Dict[str, Any]] = Field(
None, description="A flag to perform voicemail or answeing-machine detection"
)
call_transfer: Optional[Dict[str, Any]] = Field(None, description="to initiate a call transfer")
class Config:
populate_by_name = True
alias_generator = None
"""
body can contain any fields, but for handling PSTN/SIP,
we recommend sending the following custom values:
dialin, dialout, voicemail detection, and call transfer
"To": "+14152251493",
"From": "+14158483432",
"callId": "string-contains-uuid",
"callDomain": "string-contains-uuid"
These need to be remapped to dialin_settings
"dialout_settings": [
{"phoneNumber": "+14158483432", "callerId": "+14152251493"},
{"sipUri": "sip:username@sip.hostname"}
],
},
voicemail_detection:{
testInPrebuilt: true
},
"call_transfer": {
"mode": "dialout",
"speakSummary": true,
"storeSummary": true,
"operatorNumber": "+14152250006",
"testInPrebuilt": true
}
"""
@app.get("/")
async def read_root():
return {"message": "Hello, World!"}
@app.post("/api/dial")
async def dial(request: RoomRequest, raw_request: Request):
logger.info("Incoming request to /dial:")
logger.info(f"Headers: {dict(raw_request.headers)}")
raw_body = await raw_request.body()
raw_body_str = raw_body.decode()
logger.info(f"Raw body: {raw_body_str}")
logger.info(f"Parsed body: {request.dict()}")
# calculate signature and compare/verify
hmac_secret = os.getenv("PINLESS_HMAC_SECRET")
timestamp = raw_request.headers.get("x-pinless-timestamp")
signature = raw_request.headers.get("x-pinless-signature")
if not hmac_secret:
logger.debug("Skipping HMAC validation - PINLESS_HMAC_SECRET not set")
elif timestamp and signature:
message = timestamp + "." + raw_body_str
base64_decoded_secret = base64.b64decode(hmac_secret)
computed_signature = base64.b64encode(
hmac.new(base64_decoded_secret, message.encode(), "sha256").digest()
).decode()
if computed_signature != signature:
logger.error(f"Invalid signature. Expected {signature}, got {computed_signature}")
raise HTTPException(status_code=401, detail="Invalid signature")
else:
logger.debug("Skipping HMAC validation - no signature headers present")
if request.test == "test":
logger.debug("Test request received")
return {"status": "success", "message": "Test request received"}
dialin_settings = None
# these fields are camelCase in the request
required_fields = ["To", "From", "callId", "callDomain"]
if all(
field in request.dict() and request.dict()[field] is not None for field in required_fields
):
# transform from camelCase to snake_case because daily-python expects snake_case
dialin_settings = {
"From": request.From,
"To": request.To,
"call_id": request.callId,
"call_domain": request.callDomain,
# transform from camelCase to snake_case
}
logger.debug(f"Populated dialin_settings from request: {dialin_settings}")
daily_room_properties = {
"enable_dialout": request.dialout_settings is not None,
}
if dialin_settings is not None:
sip_config = {
"display_name": request.From,
"sip_mode": "dial-in",
"num_endpoints": 2 if request.call_transfer is not None else 1,
"codecs": {"audio": ["OPUS"]},
}
daily_room_properties["sip"] = sip_config
# Setting default expiry to 5 minutes from now
daily_room_properties["exp"] = int(time.time()) + (5 * 60)
logger.debug(f"Daily room properties: {daily_room_properties}")
payload = {
"createDailyRoom": True,
"dailyRoomProperties": daily_room_properties,
"body": {
"dialin_settings": dialin_settings,
"dialout_settings": request.dialout_settings,
"voicemail_detection": request.voicemail_detection,
"call_transfer": request.call_transfer,
},
}
pcc_api_key = os.getenv("PIPECAT_CLOUD_API_KEY")
agent_name = os.getenv("AGENT_NAME", "my-first-agent")
if not pcc_api_key:
raise HTTPException(status_code=500, detail="DAILY_API_KEY environment variable is not set")
headers = {"Authorization": f"Bearer {pcc_api_key}", "Content-Type": "application/json"}
url = f"https://api.pipecat.daily.co/v1/public/{agent_name}/start"
logger.debug(f"Making API call to Daily: {url} {headers} {payload}")
try:
response = requests.post(url, json=payload, headers=headers)
response.raise_for_status()
response_data = response.json()
logger.debug(f"Response: {response_data}")
return {
"status": "success",
"data": response_data,
"room_properties": daily_room_properties,
}
except requests.exceptions.HTTPError as e:
# Pass through the status code and error details from the Daily API
status_code = e.response.status_code
error_detail = e.response.json() if e.response.content else str(e)
logger.error(f"HTTP error: {error_detail}")
raise HTTPException(status_code=status_code, detail=error_detail)
except requests.exceptions.RequestException as e:
logger.error(f"Request error: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
try:
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)
except KeyboardInterrupt:
logger.info("Server stopped manually")

View File

@@ -1,53 +0,0 @@
# dependencies
/node_modules
/.pnp
.pnp.js
# testing
/coverage
# next.js
/.next/
/out/
# production
/build
# misc
.DS_Store
*.pem
# debug
npm-debug.log*
yarn-debug.log*
yarn-error.log*
.pnpm-debug.log*
# local env files
.env*.local
# vercel
.vercel
# typescript
*.tsbuildinfo
next-env.d.ts
# IDE specific files
.idea/
.vscode/
*.swp
*.swo
# Logs
logs
*.log
# OS generated files
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db

View File

@@ -1,115 +0,0 @@
# Next.js server for handling Daily PSTN/SIP Webhook
Next.js API routes for handling Daily PSTN/SIP Pipecat requests.
## Features
- API endpoint for handling Daily PSTN/SIP Pipecat requests
- HMAC signature validation
- Structured logging with Pino
- Support for dial-in and dial-out settings
- Voicemail detection and call transfer functionality
- Test request handling
## Setup
1. Clone the repository
2. Navigate to the `nextjs-webhook-server` directory:
```bash
cd nextjs-webhook-server
```
3. Install dependencies:
```bash
npm install
```
4. Create `.env.local` file with your credentials:
```bash
cp env.local.example .env.local
```
5. Update your `.env` with your secrets:
```bash
PIPECAT_CLOUD_API_KEY=pk_*
AGENT_NAME=my-first-agent
PINLESS_HMAC_SECRET=your_hmac_secret
LOG_LEVEL=info
```
### Running the server
Run the development server:
```bash
npm run dev
```
The server will run on `http://localhost:7860` and you can expose it via ngrok for testing:
```bash
`ngrok http 7860`
```
> Tip: Use a subdomain for a consistent URL (e.g. `ngrok http -subdomain=mydomain http://localhost:7860`)
## API Endpoints
### GET /api
Returns a simple "Hello, World!" message with a cute cat emoji to verify the server is running.
### POST /api/dial
Handles dial-in and dial-out requests for Pipecat Cloud.
#### Test Requests
The endpoint handles test requests when a webhook is configured. Send a request with `"Test": "test"` to verify your setup:
```json
{
"Test": "test"
}
```
#### Production Request Format
```json
{
// for dial-in from webhook
"To": "+14152251493",
"From": "+14158483432",
"callId": "string-contains-uuid",
"callDomain": "string-contains-uuid",
// for making a dial out to a phone or SIP
"dialout_settings": [
{ "phoneNumber": "+14158483432", "callerId": "purchased_phone_uuid" },
{ "sipUri": "sip:username@sip.hostname.com" }
]
}
```
## Deployment
The application is configured for Vercel deployment:
1. Push your code to a Git repository
2. Import your project in Vercel dashboard
3. Configure environment variables:
- `PIPECAT_CLOUD_API_KEY`
- `AGENT_NAME`
- `PINLESS_HMAC_SECRET`
- `LOG_LEVEL` (optional, defaults to 'info')
4. Deploy!
## Security
- HMAC signature validation for request authentication
- Environment variables for sensitive credentials
- Method validation (POST only for /dial)

View File

@@ -1,4 +0,0 @@
AGENT_NAME=my-first-agent
PIPECAT_CLOUD_API_KEY=your_daily_api_key
PINLESS_HMAC_SECRET=your_hmac_secret
LOG_LEVEL="info"

View File

@@ -1,22 +0,0 @@
{
"name": "my-daily-app",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "next dev -p 7860",
"build": "next build",
"start": "next start -p 7860",
"lint": "next lint"
},
"dependencies": {
"axios": "^1.6.0",
"next": "^14.0.0",
"pino": "^8.15.0",
"react": "^18.2.0",
"react-dom": "^18.2.0"
},
"devDependencies": {
"eslint": "^8.46.0",
"eslint-config-next": "^14.0.0"
}
}

View File

@@ -1,176 +0,0 @@
import { logger } from '../../lib/utils';
import axios from 'axios';
import crypto from 'crypto';
const validateSignature = (body, signature, timestamp, secret) => {
// Skip if any required fields are missing
if (!signature || !timestamp || !secret) {
logger.warn('Missing required fields for HMAC validation');
return true;
}
try {
const decodedSecret = Buffer.from(secret, 'base64');
const hmac = crypto.createHmac('sha256', decodedSecret);
const signatureData = `${timestamp}.${body}`;
const computedSignature = hmac.update(signatureData).digest('base64');
logger.debug('Signature validation:', {
timestamp,
signatureData: signatureData.substring(0, 50) + '...',
computedSignature,
receivedSignature: signature
});
return computedSignature === signature;
} catch (error) {
logger.error('Error validating signature:', error);
return true; // Allow request to proceed on error
}
};
export default async function handler(req, res) {
// Only allow POST requests
if (req.method !== 'POST') {
return res.status(405).json({ error: 'Method not allowed' });
}
try {
logger.info('Incoming request to /api/dial:');
logger.info(`Headers: ${JSON.stringify(req.headers)}`);
const rawBody = JSON.stringify(req.body);
logger.info(`Raw body: ${rawBody}`);
const signature = req.headers['x-pinless-signature'];
const timestamp = req.headers['x-pinless-timestamp'];
if (signature && timestamp) {
logger.info('Validating HMAC signature');
if (!validateSignature(rawBody, signature, timestamp, process.env.PINLESS_HMAC_SECRET)) {
logger.error('Invalid HMAC signature', { signature, timestamp });
return res.status(401).json({
error: 'Invalid signature',
message: 'Invalid HMAC signature'
});
}
} else {
logger.info('Skipping HMAC validation - no signature headers present');
}
// Extract request data
const {
Test: test,
To,
From,
callId,
callDomain,
dialout_settings,
voicemail_detection,
call_transfer
} = req.body;
// Handle test requests when a webhook is configured
if (test === 'test') {
logger.debug('Test request received');
return res.status(200).json({ status: 'success', message: 'Test request received' });
}
// Process dialin settings
let dialin_settings = null;
const requiredFields = ['To', 'From', 'callId', 'callDomain'];
if (requiredFields.every(field => req.body[field] !== undefined && req.body[field] !== null)) {
dialin_settings = {
// snake_case because pipecat expects this format
From,
To,
call_id: callId,
call_domain: callDomain,
};
logger.debug(`Populated dialin_settings from request: ${JSON.stringify(dialin_settings)}`);
}
// Set up Daily room properties
const daily_room_properties = {
enable_dialout: dialout_settings !== undefined && dialout_settings !== null,
exp: Math.floor(Date.now() / 1000) + (5 * 60), // 5 minutes from now
};
// Configure SIP if dialin settings are provided
if (dialin_settings !== null) {
const sip_config = {
display_name: From,
sip_mode: 'dial-in',
num_endpoints: call_transfer !== null ? 2 : 1,
codecs: {"audio": ["OPUS"]},
};
daily_room_properties.sip = sip_config;
}
// Prepare payload for {service}/start API call
const payload = {
createDailyRoom: true,
dailyRoomProperties: daily_room_properties,
body: {
dialin_settings,
dialout_settings,
voicemail_detection,
call_transfer,
},
};
logger.debug(`Daily room properties: ${JSON.stringify(daily_room_properties)}`);
// Get Daily API key and agent name from environment variables
const pccApiKey = process.env.PIPECAT_CLOUD_API_KEY;
const agentName = process.env.AGENT_NAME || 'my-first-agent';
if (!pccApiKey) {
throw new Error('PIPECAT_CLOUD_API_KEY environment variable is not set');
}
// Set up headers for Daily API call
const headers = {
'Authorization': `Bearer ${pccApiKey}`,
'Content-Type': 'application/json',
};
const url = `https://api.pipecat.daily.co/v1/public/${agentName}/start`;
logger.debug(`Making API call to Daily: ${url} ${JSON.stringify(headers)} ${JSON.stringify(payload)}`);
try {
const response = await axios.post(url, payload, { headers });
logger.debug(`Response: ${JSON.stringify(response.data)}`);
return res.status(200).json({
status: 'success',
data: response.data,
room_properties: daily_room_properties,
});
} catch (error) {
if (error.response) {
// Pass through status code and error details from the Daily API
const statusCode = error.response.status;
const errorDetail = error.response.data || error.message;
logger.error(`HTTP error: ${JSON.stringify(errorDetail)}`);
return res.status(statusCode).json(errorDetail);
} else {
logger.error(`Request error: ${error.message}`);
return res.status(500).json({ error: error.message });
}
}
} catch (error) {
logger.error(`Unexpected error: ${error.message}`);
return res.status(500).json({ error: 'Internal server error', message: error.message });
}
}
// Configure body parser to preserve raw body text
export const config = {
api: {
bodyParser: {
sizeLimit: '1mb',
},
},
};

View File

@@ -1,6 +0,0 @@
import { logger } from '../../lib/utils';
export default function handler(req, res) {
logger.info('Received request to /api');
res.status(200).json({ message: 'Hello, World! from ᓚᘏᗢ' });
}

View File

@@ -1,6 +0,0 @@
module.exports = {
version: 2,
buildCommand: "next build",
outputDirectory: ".next",
cleanUrls: true
};

Some files were not shown because too many files have changed in this diff Show More