Compare commits
1 Commits
jpt/runner
...
update-mod
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
50b19a9e77 |
18
.github/workflows/android.yaml
vendored
@@ -6,13 +6,11 @@ on:
|
||||
- main
|
||||
paths:
|
||||
- "examples/simple-chatbot/client/android/**"
|
||||
- "examples/p2p-webrtc/video-transform/client/android/**"
|
||||
pull_request:
|
||||
branches:
|
||||
- "**"
|
||||
paths:
|
||||
- "examples/simple-chatbot/client/android/**"
|
||||
- "examples/p2p-webrtc/video-transform/client/android/**"
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
sdk_git_ref:
|
||||
@@ -25,7 +23,7 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
sdk:
|
||||
name: "Demo apps"
|
||||
name: "Simple chatbot demo"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repo
|
||||
@@ -39,22 +37,12 @@ jobs:
|
||||
distribution: 'temurin'
|
||||
java-version: '17'
|
||||
|
||||
- name: "Example app: Simple Chatbot"
|
||||
- name: Build demo app
|
||||
working-directory: examples/simple-chatbot/client/android
|
||||
run: ./gradlew :simple-chatbot-client:assembleDebug
|
||||
|
||||
- name: Upload Simple Chatbot APK
|
||||
- name: Upload demo APK
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: Simple Chatbot Android Client
|
||||
path: examples/simple-chatbot/client/android/simple-chatbot-client/build/outputs/apk/debug/simple-chatbot-client-debug.apk
|
||||
|
||||
- name: "Example app: Small WebRTC Client"
|
||||
working-directory: examples/p2p-webrtc/video-transform/client/android
|
||||
run: ./gradlew :small-webrtc-client:assembleDebug
|
||||
|
||||
- name: Upload Small WebRTC APK
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: Small WebRTC Android Client
|
||||
path: examples/p2p-webrtc/video-transform/client/android/small-webrtc-client/build/outputs/apk/debug/small-webrtc-client-debug.apk
|
||||
|
||||
6
.github/workflows/format.yaml
vendored
@@ -17,7 +17,7 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
ruff-format:
|
||||
name: "Code quality checks"
|
||||
name: "Formatting checker"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repo
|
||||
@@ -39,8 +39,8 @@ jobs:
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
ruff format --diff
|
||||
- name: Ruff linter (all rules)
|
||||
- name: Ruff import linter
|
||||
id: ruff-check
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
ruff check
|
||||
ruff check --select I
|
||||
|
||||
2
.github/workflows/publish.yaml
vendored
@@ -5,7 +5,7 @@ on:
|
||||
inputs:
|
||||
gitref:
|
||||
type: string
|
||||
description: "what git tag to build (e.g. v0.0.74)"
|
||||
description: "what git ref to build"
|
||||
required: true
|
||||
|
||||
jobs:
|
||||
|
||||
30
.gitignore
vendored
@@ -7,7 +7,7 @@ venv
|
||||
/.idea
|
||||
#*#
|
||||
|
||||
# Distribution / Packaging
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
@@ -30,24 +30,24 @@ MANIFEST
|
||||
.env
|
||||
fly.toml
|
||||
|
||||
# Examples
|
||||
examples/telnyx-chatbot/templates/streams.xml
|
||||
examples/twilio-chatbot/templates/streams.xml
|
||||
examples/**/node_modules/
|
||||
examples/**/.expo/
|
||||
examples/**/dist/
|
||||
examples/**/npm-debug.*
|
||||
examples/**/*.jks
|
||||
examples/**/*.p8
|
||||
examples/**/*.p12
|
||||
examples/**/*.key
|
||||
examples/**/*.mobileprovision
|
||||
examples/**/*.orig.*
|
||||
examples/**/web-build/
|
||||
# Example files
|
||||
pipecat/examples/twilio-chatbot/templates/streams.xml
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/node_modules/
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/.expo/
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/dist/
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/npm-debug.*
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/*.jks
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/*.p8
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/*.p12
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/*.key
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/*.mobileprovision
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/*.orig.*
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/web-build/
|
||||
|
||||
# macOS
|
||||
.DS_Store
|
||||
|
||||
|
||||
# Documentation
|
||||
docs/api/_build/
|
||||
docs/api/api
|
||||
@@ -4,5 +4,5 @@ repos:
|
||||
hooks:
|
||||
- id: ruff
|
||||
language_version: python3
|
||||
args: [--fix]
|
||||
args: [ --select, I, ]
|
||||
- id: ruff-format
|
||||
|
||||
1071
CHANGELOG.md
150
CONTRIBUTING.md
@@ -41,150 +41,36 @@ We use Ruff for code linting and formatting. Please ensure your code passes all
|
||||
|
||||
We follow Google-style docstrings with these specific conventions:
|
||||
|
||||
**Regular Classes:**
|
||||
- Class docstrings should fully document all parameters used in `__init__`
|
||||
- We don't require separate docstrings for `__init__` methods when parameters are documented in the class docstring
|
||||
- Property methods should have docstrings explaining their purpose and return value
|
||||
|
||||
- Class docstring describes the class purpose and key functionality
|
||||
- `__init__` method has its own docstring with complete `Args:` section documenting all parameters
|
||||
- All public methods must have docstrings with `Args:` and `Returns:` sections as appropriate
|
||||
|
||||
**Dataclasses:**
|
||||
|
||||
- Class docstring describes the purpose and documents all fields in a `Parameters:` section
|
||||
- No `__init__` docstring (auto-generated)
|
||||
|
||||
**Properties:**
|
||||
|
||||
- Must have docstrings with `Returns:` section
|
||||
|
||||
**Abstract Methods:**
|
||||
|
||||
- Must have docstrings explaining what subclasses should implement
|
||||
|
||||
**`__init__.py` Files:**
|
||||
|
||||
- **Skip docstrings** for pure import/re-export modules
|
||||
- **Add brief docstrings** for top-level packages or those with initialization logic
|
||||
|
||||
**Enums:**
|
||||
|
||||
- Class docstring describes the enumeration purpose
|
||||
- Use `Parameters:` section to document each enum value and its meaning
|
||||
- No `__init__` docstring (Enums don't have custom constructors)
|
||||
|
||||
**Code Examples in Docstrings:**
|
||||
|
||||
- Use `Examples:` as a section header for multiple examples
|
||||
- Use descriptive text followed by double colons (`::`) for each example
|
||||
- **Always include a blank line after the `::"`**
|
||||
- Indent all code consistently within each block
|
||||
- Separate multiple examples with blank lines for readability
|
||||
|
||||
**Lists and Bullets in Docstrings:**
|
||||
|
||||
- Use dashes (`-`) for bullet points, not asterisks (`*`)
|
||||
- **Add a blank line before bullet lists** when they follow a colon
|
||||
- Use section headers like "Supported features:" or "Behavior:" before lists
|
||||
- For complex nested information, consider using paragraph format instead
|
||||
|
||||
**Deprecations:**
|
||||
|
||||
- Use `warnings.warn()` in code for runtime deprecation warnings
|
||||
- Add `.. deprecated::` directive in docstrings for documentation visibility
|
||||
- Include version information and describe current status
|
||||
- Describe parameters in present tense, use directive to indicate deprecation status
|
||||
|
||||
#### Examples:
|
||||
Example of correctly documented class:
|
||||
|
||||
```python
|
||||
# Regular class
|
||||
class MyService(BaseService):
|
||||
"""Description of what the service does.
|
||||
class MyClass:
|
||||
"""Class description.
|
||||
|
||||
Provides detailed explanation of the service's functionality,
|
||||
key features, and usage patterns.
|
||||
Additional details about the class.
|
||||
|
||||
Supported features:
|
||||
|
||||
- Feature one with detailed explanation
|
||||
- Feature two with additional context
|
||||
- Feature three for advanced use cases
|
||||
Args:
|
||||
param1: Description of first parameter.
|
||||
param2: Description of second parameter.
|
||||
"""
|
||||
|
||||
def __init__(self, param1: str, old_param: str = None, **kwargs):
|
||||
"""Initialize the service.
|
||||
|
||||
Args:
|
||||
param1: Description of param1.
|
||||
old_param: Controls legacy behavior.
|
||||
|
||||
.. deprecated:: 1.2.0
|
||||
This parameter no longer has any effect and will be removed in version 2.0.
|
||||
|
||||
**kwargs: Additional arguments passed to parent.
|
||||
"""
|
||||
if old_param is not None:
|
||||
import warnings
|
||||
warnings.warn(
|
||||
"Parameter 'old_param' is deprecated and will be removed in version 2.0.",
|
||||
DeprecationWarning,
|
||||
)
|
||||
super().__init__(**kwargs)
|
||||
def __init__(self, param1, param2):
|
||||
# No docstring required here as parameters are documented above
|
||||
self.param1 = param1
|
||||
self.param2 = param2
|
||||
|
||||
@property
|
||||
def sample_rate(self) -> int:
|
||||
"""Get the current sample rate.
|
||||
def some_property(self) -> str:
|
||||
"""Get the formatted property value.
|
||||
|
||||
Returns:
|
||||
The sample rate in Hz.
|
||||
A string representation of the property.
|
||||
"""
|
||||
return self._sample_rate
|
||||
|
||||
async def process_data(self, data: str) -> bool:
|
||||
"""Process the provided data.
|
||||
|
||||
Args:
|
||||
data: The data to process.
|
||||
|
||||
Returns:
|
||||
True if processing succeeded.
|
||||
"""
|
||||
pass
|
||||
|
||||
# Dataclass with code examples
|
||||
@dataclass
|
||||
class MessageFrame:
|
||||
"""Frame containing messages in OpenAI format.
|
||||
|
||||
Supports both simple and content list message formats.
|
||||
|
||||
Example::
|
||||
|
||||
[
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "assistant", "content": "Hi there!"}
|
||||
]
|
||||
|
||||
Parameters:
|
||||
messages: List of messages in OpenAI format.
|
||||
"""
|
||||
|
||||
messages: List[dict]
|
||||
|
||||
# Enum class
|
||||
class Status(Enum):
|
||||
"""Status codes for processing operations.
|
||||
|
||||
Parameters:
|
||||
PENDING: Operation is queued but not started.
|
||||
RUNNING: Operation is currently in progress.
|
||||
COMPLETED: Operation finished successfully.
|
||||
FAILED: Operation encountered an error.
|
||||
"""
|
||||
|
||||
PENDING = "pending"
|
||||
RUNNING = "running"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
return f"Property: {self.param1}"
|
||||
```
|
||||
|
||||
# Contributor Covenant Code of Conduct
|
||||
|
||||
@@ -1,4 +0,0 @@
|
||||
prune docs
|
||||
prune examples
|
||||
prune scripts
|
||||
prune tests
|
||||
33
README.md
@@ -8,8 +8,6 @@
|
||||
|
||||
**Pipecat** is an open-source Python framework for building real-time voice and multimodal conversational agents. Orchestrate audio and video, AI services, different transports, and conversation pipelines effortlessly—so you can focus on what makes your agent unique.
|
||||
|
||||
> Want to dive right in? [Install Pipecat](https://docs.pipecat.ai/getting-started/installation) then try the [quickstart](https://docs.pipecat.ai/getting-started/quickstart).
|
||||
|
||||
## 🚀 What You Can Build
|
||||
|
||||
- **Voice Assistants** – natural, streaming conversations with AI
|
||||
@@ -51,19 +49,18 @@ You can connect to Pipecat from any platform using our official SDKs:
|
||||
|
||||
## 🧩 Available services
|
||||
|
||||
| Category | Services |
|
||||
| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Parakeet (NVIDIA)](https://docs.pipecat.ai/server/services/stt/parakeet), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Ultravox](https://docs.pipecat.ai/server/services/stt/ultravox), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova) [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
|
||||
| Text-to-Speech | [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [FastPitch (NVIDIA)](https://docs.pipecat.ai/server/services/tts/fastpitch), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
|
||||
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local |
|
||||
| Serializers | [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx) |
|
||||
| Video | [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
|
||||
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
|
||||
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/fal), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
|
||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) |
|
||||
| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
|
||||
| Category | Services |
|
||||
| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Parakeet (NVIDIA)](https://docs.pipecat.ai/server/services/stt/parakeet), [Ultravox](https://docs.pipecat.ai/server/services/stt/ultravox), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
|
||||
| Text-to-Speech | [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [FastPitch (NVIDIA)](https://docs.pipecat.ai/server/services/tts/fastpitch), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
|
||||
| Speech-to-Speech | [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local |
|
||||
| Video | [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
|
||||
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
|
||||
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/fal), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
|
||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) |
|
||||
| Analytics & Metrics | [Canonical AI](https://docs.pipecat.ai/server/services/analytics/canonical), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
|
||||
|
||||
📚 [View full services documentation →](https://docs.pipecat.ai/server/services/supported-services)
|
||||
|
||||
@@ -133,12 +130,6 @@ pip install "pipecat-ai[option,...]"
|
||||
|
||||
### Running tests
|
||||
|
||||
Install the test dependencies:
|
||||
|
||||
```shell
|
||||
pip install -r test-requirements.txt
|
||||
```
|
||||
|
||||
From the root directory, run:
|
||||
|
||||
```shell
|
||||
|
||||
@@ -1,20 +1,13 @@
|
||||
build~=1.2.2
|
||||
coverage~=7.9.1
|
||||
coverage~=7.6.12
|
||||
grpcio-tools~=1.67.1
|
||||
pip-tools~=7.4.1
|
||||
pre-commit~=4.2.0
|
||||
pyright~=1.1.402
|
||||
pytest~=8.4.1
|
||||
pytest-asyncio~=1.0.0
|
||||
pre-commit~=4.0.1
|
||||
pyright~=1.1.397
|
||||
pytest~=8.3.4
|
||||
pytest-asyncio~=0.25.3
|
||||
pytest-aiohttp==1.1.0
|
||||
ruff~=0.12.1
|
||||
setuptools~=78.1.1
|
||||
setuptools_scm~=8.3.1
|
||||
python-dotenv~=1.1.1
|
||||
|
||||
# For running examples
|
||||
uvicorn
|
||||
python-dotenv
|
||||
fastapi
|
||||
aiohttp
|
||||
aiortc
|
||||
ruff~=0.11.1
|
||||
setuptools~=70.0.0
|
||||
setuptools_scm~=8.1.0
|
||||
python-dotenv~=1.0.1
|
||||
|
||||
235
docs/api/conf.py
@@ -1,6 +1,5 @@
|
||||
import logging
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
# Configure logging
|
||||
@@ -14,8 +13,7 @@ sys.path.insert(0, str(project_root / "src"))
|
||||
|
||||
# Project information
|
||||
project = "pipecat-ai"
|
||||
current_year = datetime.now().year
|
||||
copyright = f"2024-{current_year}, Daily" if current_year > 2024 else "2024, Daily"
|
||||
copyright = "2024, Daily"
|
||||
author = "Daily"
|
||||
|
||||
# General configuration
|
||||
@@ -26,20 +24,19 @@ extensions = [
|
||||
"sphinx.ext.intersphinx",
|
||||
]
|
||||
|
||||
suppress_warnings = [
|
||||
"autodoc.mocked_object",
|
||||
]
|
||||
|
||||
# Napoleon settings
|
||||
napoleon_google_docstring = True
|
||||
napoleon_numpy_docstring = False
|
||||
napoleon_include_init_with_doc = True
|
||||
|
||||
# AutoDoc settings
|
||||
autodoc_default_options = {
|
||||
"members": True,
|
||||
"member-order": "bysource",
|
||||
"undoc-members": False,
|
||||
"exclude-members": "__weakref__,model_config",
|
||||
"special-members": "__init__",
|
||||
"undoc-members": True,
|
||||
"exclude-members": "__weakref__",
|
||||
"no-index": True,
|
||||
"show-inheritance": True,
|
||||
}
|
||||
|
||||
@@ -53,6 +50,7 @@ autodoc_mock_imports = [
|
||||
"pyht.protos",
|
||||
"pyht.protos.api_pb2",
|
||||
"pipecat_ai_playht", # PlayHT wrapper
|
||||
"vllm",
|
||||
"aiortc",
|
||||
"aiortc.mediastreams",
|
||||
"cv2",
|
||||
@@ -74,146 +72,82 @@ autodoc_mock_imports = [
|
||||
"langchain",
|
||||
"lmnt",
|
||||
"noisereduce",
|
||||
"openai",
|
||||
"openpipe",
|
||||
"simli",
|
||||
"soundfile",
|
||||
"soniox",
|
||||
# Existing mocks
|
||||
"pipecat_ai_krisp",
|
||||
"pyaudio",
|
||||
"_tkinter",
|
||||
"tkinter",
|
||||
"daily",
|
||||
"daily_python",
|
||||
# Moondream dependencies
|
||||
"torch",
|
||||
"transformers",
|
||||
"intel_extension_for_pytorch",
|
||||
# Ultravox dependencies
|
||||
"huggingface_hub",
|
||||
"vllm",
|
||||
"vllm.engine.arg_utils",
|
||||
"transformers.AutoTokenizer",
|
||||
# Langchain dependencies
|
||||
"langchain_core",
|
||||
"langchain_core.messages",
|
||||
"langchain_core.runnables",
|
||||
"langchain_core.messages.AIMessageChunk",
|
||||
"langchain_core.runnables.Runnable",
|
||||
# LiveKit dependencies
|
||||
"livekit",
|
||||
"livekit.rtc",
|
||||
"livekit_api",
|
||||
"livekit_protocol",
|
||||
"tenacity",
|
||||
"tenacity.retry",
|
||||
"tenacity.stop_after_attempt",
|
||||
"tenacity.wait_exponential",
|
||||
"rtc",
|
||||
"rtc.Room",
|
||||
"rtc.RoomOptions",
|
||||
"rtc.AudioSource",
|
||||
"rtc.LocalAudioTrack",
|
||||
"rtc.TrackPublishOptions",
|
||||
"rtc.TrackSource",
|
||||
"rtc.AudioStream",
|
||||
"rtc.AudioFrameEvent",
|
||||
"rtc.AudioFrame",
|
||||
"rtc.Track",
|
||||
"rtc.TrackKind",
|
||||
"rtc.RemoteParticipant",
|
||||
"rtc.RemoteTrackPublication",
|
||||
"rtc.DataPacket",
|
||||
# Riva dependencies
|
||||
"riva",
|
||||
"riva.client",
|
||||
"riva.client.Auth",
|
||||
"riva.client.ASRService",
|
||||
"riva.client.StreamingRecognitionConfig",
|
||||
"riva.client.RecognitionConfig",
|
||||
"riva.client.AudioEncoding",
|
||||
"riva.client.proto.riva_tts_pb2",
|
||||
"riva.client.SpeechSynthesisService",
|
||||
# Local CoreML Smart Turn dependencies
|
||||
"coremltools",
|
||||
"coremltools.models",
|
||||
"coremltools.models.MLModel",
|
||||
"torch",
|
||||
"torch.nn",
|
||||
"torch.nn.functional",
|
||||
"transformers",
|
||||
"transformers.AutoFeatureExtractor",
|
||||
# Also add specific classes that are imported
|
||||
"AutoFeatureExtractor",
|
||||
# Sentry dependencies
|
||||
"sentry_sdk",
|
||||
# AWS Nova Sonic dependencies
|
||||
"aws_sdk_bedrock_runtime",
|
||||
"aws_sdk_bedrock_runtime.client",
|
||||
"aws_sdk_bedrock_runtime.config",
|
||||
"aws_sdk_bedrock_runtime.models",
|
||||
"smithy_aws_core",
|
||||
"smithy_aws_core.credentials_resolvers",
|
||||
"smithy_aws_core.credentials_resolvers.static",
|
||||
"smithy_aws_core.identity",
|
||||
"smithy_core",
|
||||
"smithy_core.aio",
|
||||
"smithy_core.aio.eventstream",
|
||||
# MCP dependencies (you may already have these)
|
||||
"mcp",
|
||||
"mcp.client",
|
||||
"mcp.client.session_group",
|
||||
"mcp.client.sse",
|
||||
"mcp.client.stdio",
|
||||
"mcp.ClientSession",
|
||||
"mcp.StdioServerParameters",
|
||||
# gstreamer
|
||||
"gi",
|
||||
"gi.require_version",
|
||||
"gi.repository",
|
||||
# Protobuf mocks
|
||||
"pipecat.frames.protobufs.frames_pb2",
|
||||
"pipecat.serializers.protobuf",
|
||||
"google.protobuf",
|
||||
"google.protobuf.descriptor",
|
||||
"google.protobuf.descriptor_pool",
|
||||
"google.protobuf.runtime_version",
|
||||
"google.protobuf.symbol_database",
|
||||
"google.protobuf.internal.builder",
|
||||
"pydantic.BaseModel",
|
||||
"pydantic.Field",
|
||||
"pydantic._internal._model_construction",
|
||||
"pydantic._internal._fields",
|
||||
]
|
||||
|
||||
# HTML output settings
|
||||
html_theme = "sphinx_rtd_theme"
|
||||
html_static_path = ["_static"]
|
||||
autodoc_typehints = "signature" # Show type hints in the signature only, not in the docstring
|
||||
autodoc_typehints = "description"
|
||||
html_show_sphinx = False
|
||||
|
||||
|
||||
def import_core_modules():
|
||||
"""Import core pipecat modules for autodoc to discover."""
|
||||
core_modules = [
|
||||
"pipecat",
|
||||
"pipecat.frames",
|
||||
"pipecat.pipeline",
|
||||
"pipecat.processors",
|
||||
"pipecat.services",
|
||||
"pipecat.transports",
|
||||
"pipecat.audio",
|
||||
"pipecat.adapters",
|
||||
"pipecat.clocks",
|
||||
"pipecat.metrics",
|
||||
"pipecat.observers",
|
||||
"pipecat.serializers",
|
||||
"pipecat.sync",
|
||||
"pipecat.transcriptions",
|
||||
"pipecat.utils",
|
||||
]
|
||||
def verify_modules():
|
||||
"""Verify that required modules are available."""
|
||||
required_modules = {
|
||||
"services": [
|
||||
"assemblyai",
|
||||
"aws",
|
||||
"cartesia",
|
||||
"deepgram",
|
||||
"google",
|
||||
"lmnt",
|
||||
"riva",
|
||||
"simli",
|
||||
],
|
||||
"serializers": ["livekit"],
|
||||
"vad": ["silero", "vad_analyzer"],
|
||||
"transports": {
|
||||
"services": ["daily", "livekit"],
|
||||
"local": ["audio", "tk"],
|
||||
"network": ["fastapi_websocket", "websocket_server"],
|
||||
},
|
||||
}
|
||||
|
||||
for module_name in core_modules:
|
||||
try:
|
||||
__import__(module_name)
|
||||
logger.info(f"Successfully imported {module_name}")
|
||||
except ImportError as e:
|
||||
logger.warning(f"Failed to import {module_name}: {e}")
|
||||
missing = []
|
||||
for category, modules in required_modules.items():
|
||||
if isinstance(modules, dict):
|
||||
# Handle nested structure
|
||||
for subcategory, submodules in modules.items():
|
||||
for module in submodules:
|
||||
try:
|
||||
__import__(f"pipecat.{category}.{subcategory}.{module}")
|
||||
logger.info(
|
||||
f"Successfully imported pipecat.{category}.{subcategory}.{module}"
|
||||
)
|
||||
except (ImportError, TypeError, NameError) as e:
|
||||
missing.append(f"pipecat.{category}.{subcategory}.{module}")
|
||||
logger.warning(
|
||||
f"Optional module not available: pipecat.{category}.{subcategory}.{module} - {str(e)}"
|
||||
)
|
||||
else:
|
||||
# Handle flat structure
|
||||
for module in modules:
|
||||
try:
|
||||
__import__(f"pipecat.{category}.{module}")
|
||||
logger.info(f"Successfully imported pipecat.{category}.{module}")
|
||||
except (ImportError, TypeError, NameError) as e:
|
||||
missing.append(f"pipecat.{category}.{module}")
|
||||
logger.warning(
|
||||
f"Optional module not available: pipecat.{category}.{module} - {str(e)}"
|
||||
)
|
||||
|
||||
if missing:
|
||||
logger.warning(f"Some optional modules are not available: {missing}")
|
||||
|
||||
|
||||
def clean_title(title: str) -> str:
|
||||
@@ -225,7 +159,36 @@ def clean_title(title: str) -> str:
|
||||
parts = title.split(".")
|
||||
title = parts[-1]
|
||||
|
||||
return title
|
||||
# Special cases for service names and common acronyms
|
||||
special_cases = {
|
||||
"ai": "AI",
|
||||
"aws": "AWS",
|
||||
"api": "API",
|
||||
"vad": "VAD",
|
||||
"assemblyai": "AssemblyAI",
|
||||
"deepgram": "Deepgram",
|
||||
"elevenlabs": "ElevenLabs",
|
||||
"openai": "OpenAI",
|
||||
"openpipe": "OpenPipe",
|
||||
"playht": "PlayHT",
|
||||
"xtts": "XTTS",
|
||||
"lmnt": "LMNT",
|
||||
}
|
||||
|
||||
# Check if the entire title is a special case
|
||||
if title.lower() in special_cases:
|
||||
return special_cases[title.lower()]
|
||||
|
||||
# Otherwise, capitalize each word
|
||||
words = title.split("_")
|
||||
cleaned_words = []
|
||||
for word in words:
|
||||
if word.lower() in special_cases:
|
||||
cleaned_words.append(special_cases[word.lower()])
|
||||
else:
|
||||
cleaned_words.append(word.capitalize())
|
||||
|
||||
return " ".join(cleaned_words)
|
||||
|
||||
|
||||
def setup(app):
|
||||
@@ -250,8 +213,9 @@ def setup(app):
|
||||
|
||||
excludes = [
|
||||
str(project_root / "src/pipecat/pipeline/to_be_updated"),
|
||||
str(project_root / "src/pipecat/examples"),
|
||||
str(project_root / "src/pipecat/tests"),
|
||||
str(project_root / "src/pipecat/processors/gstreamer"),
|
||||
str(project_root / "src/pipecat/services/to_be_updated"),
|
||||
str(project_root / "src/pipecat/vad"), # deprecated
|
||||
"**/test_*.py",
|
||||
"**/tests/*.py",
|
||||
]
|
||||
@@ -292,4 +256,5 @@ def setup(app):
|
||||
logger.error(f"Error generating API documentation: {e}", exc_info=True)
|
||||
|
||||
|
||||
import_core_modules()
|
||||
# Run module verification
|
||||
verify_modules()
|
||||
|
||||
@@ -1,17 +1,57 @@
|
||||
Pipecat API Reference
|
||||
=====================
|
||||
Pipecat API Reference Docs
|
||||
==========================
|
||||
|
||||
Welcome to the Pipecat API reference.
|
||||
Welcome to Pipecat's API reference documentation!
|
||||
|
||||
Use the navigation on the left to browse modules, or search using the search box.
|
||||
|
||||
**New to Pipecat?** Check out the `main documentation <https://docs.pipecat.ai>`_ for tutorials, guides, and client SDK information.
|
||||
Pipecat is an open source framework for building voice and multimodal assistants.
|
||||
It provides a flexible pipeline architecture for connecting various AI services,
|
||||
audio processing, and transport layers.
|
||||
|
||||
Quick Links
|
||||
-----------
|
||||
|
||||
* `GitHub Repository <https://github.com/pipecat-ai/pipecat>`_
|
||||
* `Join our Community <https://discord.gg/pipecat>`_
|
||||
* `Website <https://pipecat.ai>`_
|
||||
|
||||
API Reference
|
||||
-------------
|
||||
|
||||
Core Components
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
* :mod:`Frames <pipecat.frames>`
|
||||
* :mod:`Processors <pipecat.processors>`
|
||||
* :mod:`Pipeline <pipecat.pipeline>`
|
||||
|
||||
Audio Processing
|
||||
~~~~~~~~~~~~~~~~
|
||||
|
||||
* :mod:`Audio <pipecat.audio>`
|
||||
|
||||
Services
|
||||
~~~~~~~~
|
||||
|
||||
* :mod:`Services <pipecat.services>`
|
||||
|
||||
Transport & Serialization
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* :mod:`Transports <pipecat.transports>`
|
||||
* :mod:`Local <pipecat.transports.local>`
|
||||
* :mod:`Network <pipecat.transports.network>`
|
||||
* :mod:`Services <pipecat.transports.services>`
|
||||
* :mod:`Serializers <pipecat.serializers>`
|
||||
|
||||
Utilities
|
||||
~~~~~~~~~
|
||||
|
||||
* :mod:`Adapters <pipecat.adapters>`
|
||||
* :mod:`Clocks <pipecat.clocks>`
|
||||
* :mod:`Metrics <pipecat.metrics>`
|
||||
* :mod:`Observers <pipecat.observers>`
|
||||
* :mod:`Sync <pipecat.sync>`
|
||||
* :mod:`Transcriptions <pipecat.transcriptions>`
|
||||
* :mod:`Utils <pipecat.utils>`
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 3
|
||||
@@ -31,4 +71,11 @@ Quick Links
|
||||
Sync <api/pipecat.sync>
|
||||
Transcriptions <api/pipecat.transcriptions>
|
||||
Transports <api/pipecat.transports>
|
||||
Utils <api/pipecat.utils>
|
||||
Utils <api/pipecat.utils>
|
||||
|
||||
Indices and tables
|
||||
==================
|
||||
|
||||
* :ref:`genindex`
|
||||
* :ref:`modindex`
|
||||
* :ref:`search`
|
||||
@@ -10,6 +10,7 @@ pipecat-ai[anthropic]
|
||||
pipecat-ai[assemblyai]
|
||||
pipecat-ai[aws]
|
||||
pipecat-ai[azure]
|
||||
pipecat-ai[canonical]
|
||||
pipecat-ai[cartesia]
|
||||
pipecat-ai[cerebras]
|
||||
pipecat-ai[deepseek]
|
||||
@@ -25,29 +26,23 @@ pipecat-ai[grok]
|
||||
pipecat-ai[groq]
|
||||
# pipecat-ai[krisp] # Mocked
|
||||
pipecat-ai[koala]
|
||||
# pipecat-ai[langchain] # Mocked
|
||||
# pipecat-ai[livekit] # Mocked
|
||||
pipecat-ai[langchain]
|
||||
pipecat-ai[livekit]
|
||||
pipecat-ai[lmnt]
|
||||
pipecat-ai[local]
|
||||
# pipecat-ai[local-smart-turn] # Mocked
|
||||
# pipecat-ai[mem0] # Mocked
|
||||
# pipecat-ai[mlx-whisper] # Mocked
|
||||
# pipecat-ai[moondream] # Mocked
|
||||
pipecat-ai[moondream]
|
||||
pipecat-ai[nim]
|
||||
# pipecat-ai[neuphonic] # Mocked
|
||||
pipecat-ai[noisereduce]
|
||||
pipecat-ai[openai]
|
||||
# pipecat-ai[openpipe]
|
||||
# pipecat-ai[playht] # Mocked due to grpcio conflict with riva
|
||||
pipecat-ai[qwen]
|
||||
pipecat-ai[remote-smart-turn]
|
||||
# pipecat-ai[riva] # Mocked
|
||||
pipecat-ai[sambanova]
|
||||
pipecat-ai[riva]
|
||||
pipecat-ai[silero]
|
||||
pipecat-ai[simli]
|
||||
pipecat-ai[soundfile]
|
||||
pipecat-ai[soniox]
|
||||
pipecat-ai[speechmatics]
|
||||
pipecat-ai[tavus]
|
||||
pipecat-ai[together]
|
||||
# pipecat-ai[ultravox] # Mocked
|
||||
|
||||
@@ -95,29 +95,5 @@ OPENROUTER_API_KEY=...
|
||||
PIPER_BASE_URL=...
|
||||
|
||||
# Smart turn
|
||||
LOCAL_SMART_TURN_MODEL_PATH=...
|
||||
FAL_SMART_TURN_API_KEY=...
|
||||
|
||||
# Twilio
|
||||
TWILIO_ACCOUNT_SID=...
|
||||
TWILIO_AUTH_TOKEN=...
|
||||
|
||||
# MiniMax
|
||||
MINIMAX_API_KEY=...
|
||||
MINIMAX_GROUP_ID=...
|
||||
|
||||
# Sarvam AI
|
||||
SARVAM_API_KEY=...
|
||||
|
||||
# Soniox
|
||||
SONIOX_API_KEY=
|
||||
|
||||
# Speechmatics
|
||||
SPEECHMATICS_API_KEY=...
|
||||
|
||||
|
||||
# SambaNova
|
||||
SAMBANOVA_API_KEY=...
|
||||
|
||||
# Sentry
|
||||
SENTRY_DSN=...
|
||||
LOCAL_SMART_TURN_MODEL_PATH=
|
||||
REMOTE_SMART_TURN_URL=
|
||||
@@ -1,60 +0,0 @@
|
||||
# AWS Strands Examples
|
||||
|
||||
This folder contains two Python examples demonstrating how to use Pipecat with the AWS Strands agent.
|
||||
|
||||
## Overview
|
||||
|
||||
These examples show how to delegate complex, multi-step tasks to a Strands agent, which can reason step-by-step and call tools to accomplish user requests.
|
||||
|
||||
These examples are intentionally simplified for demonstration, using mock API calls. They work best if you ask it:
|
||||
|
||||
> What's the weather where the Golden Gate Bridge is?
|
||||
|
||||
## Example Scripts
|
||||
|
||||
### `black-box.py`
|
||||
|
||||
A minimal example that demonstrates how to use the Strands agent with Pipecat. The agent can handle multi-step queries by calling tools, but does not explain its reasoning out loud.
|
||||
|
||||
### `explain-thinking.py`
|
||||
|
||||
An enhanced example where the Strands agent explains each step of its reasoning in clear, simple language as it works through a multi-step task.
|
||||
|
||||
## Quick Start
|
||||
|
||||
1. **Clone the repository and navigate to this example:**
|
||||
|
||||
```bash
|
||||
git clone https://github.com/pipecat-ai/pipecat.git
|
||||
cd pipecat/examples/aws-strands
|
||||
```
|
||||
|
||||
2. **Set up a virtual environment:**
|
||||
|
||||
```bash
|
||||
python -m venv venv
|
||||
source venv/bin/activate # On Windows: venv\Scripts\activate
|
||||
```
|
||||
|
||||
3. **Install dependencies:**
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
4. **Configure environment variables:**
|
||||
|
||||
Copy the provided `env.example` file to `.env` and fill in the necessary credentials:
|
||||
|
||||
```bash
|
||||
cp env.example .env
|
||||
# Then edit .env with your preferred editor
|
||||
```
|
||||
|
||||
5. **Run an example:**
|
||||
|
||||
```bash
|
||||
python black-box.py
|
||||
# or
|
||||
python explain-thinking.py
|
||||
```
|
||||
@@ -1,206 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from strands import Agent, tool
|
||||
from strands.models import BedrockModel
|
||||
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
"""This example demonstrates how to use the Strands agent with Pipecat.
|
||||
|
||||
You can delegate complex, multi-step tasks to the Strands agent, which can cycle through LLM-based reasoning and tool calls to accomplish the task.
|
||||
|
||||
Try asking: "What's the weather where the Golden Gate Bridge is?"
|
||||
"""
|
||||
|
||||
# Strands agent tools
|
||||
|
||||
|
||||
@tool
|
||||
def get_location_name_from_landmark(landmark: str) -> str:
|
||||
"""
|
||||
Get the location name from a landmark.
|
||||
|
||||
Args:
|
||||
landmark (str): The name of the landmark, e.g. "Golden Gate Bridge".
|
||||
"""
|
||||
# Simulate fetching location
|
||||
return "San Francisco, CA"
|
||||
|
||||
|
||||
@tool
|
||||
def get_lat_long_from_location_name(location: str) -> dict:
|
||||
"""
|
||||
Get the latitude and longitude for a location name.
|
||||
|
||||
Args:
|
||||
location (str): The city and state, e.g. "San Francisco, CA".
|
||||
"""
|
||||
# Simulate fetching lat/long from a geocoding service
|
||||
return {"lat": 37.7749, "long": -122.4194}
|
||||
|
||||
|
||||
@tool
|
||||
def get_current_weather_from_lat_long(lat: float, long: float) -> dict:
|
||||
"""
|
||||
Get the current weather for a specific latitude and longitude.
|
||||
|
||||
Args:
|
||||
lat (float): The latitude of the location.
|
||||
long (float): The longitude of the location.
|
||||
"""
|
||||
# Simulate fetching weather data from a weather service
|
||||
return {"conditions": "nice", "temperature": "75"}
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
strands_agent = Agent(
|
||||
model=BedrockModel(
|
||||
model_id="us.anthropic.claude-3-7-sonnet-20250219-v1:0", max_tokens=64000
|
||||
),
|
||||
tools=[
|
||||
get_location_name_from_landmark,
|
||||
get_lat_long_from_location_name,
|
||||
get_current_weather_from_lat_long,
|
||||
],
|
||||
system_prompt="""
|
||||
You are a helpful personal assistant who can look up information about places and weather.
|
||||
|
||||
Your key capabilities:
|
||||
1. Look up where landmarks are located.
|
||||
2. Find latitude and longitude for a location.
|
||||
3. Look up the current weather for a specific latitude and longitude.
|
||||
|
||||
Explain each step of your reasoning in clear, simple, and concise language. Your responses will be converted to audio, so avoid special characters and numbered lists.
|
||||
""",
|
||||
)
|
||||
|
||||
async def handle_location_or_weather_related_queries(params: FunctionCallParams, query: str):
|
||||
"""
|
||||
Handle location or weather related queries.
|
||||
|
||||
Args:
|
||||
query (str): The user's query, e.g. "What's the weather where the Golden Gate Bridge is?".
|
||||
"""
|
||||
# Run in a background thread
|
||||
# (Otherwise the agent blocks the event loop; one effect of that is that we don't hear
|
||||
# "let me check on that" until the agent finishes)
|
||||
loop = asyncio.get_running_loop()
|
||||
result = await loop.run_in_executor(None, strands_agent, query)
|
||||
await params.result_callback(result.message)
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
llm.register_direct_function(handle_location_or_weather_related_queries)
|
||||
|
||||
@llm.event_handler("on_function_calls_started")
|
||||
async def on_function_calls_started(service, function_calls):
|
||||
await tts.queue_frame(TTSSpeakFrame("Let me check on that."))
|
||||
|
||||
tools = ToolsSchema(standard_tools=[handle_location_or_weather_related_queries])
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way. Start by suggesting that the user ask about the weather where the Golden Gate Bridge is.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
@@ -1,8 +0,0 @@
|
||||
OPENAI_API_KEY=
|
||||
CARTESIA_API_KEY=
|
||||
DEEPGRAM_API_KEY=
|
||||
DAILY_API_KEY=
|
||||
DAILY_SAMPLE_ROOM_URL=
|
||||
AWS_SECRET_ACCESS_KEY=
|
||||
AWS_ACCESS_KEY_ID=
|
||||
AWS_REGION=
|
||||
@@ -1,249 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from strands import Agent, tool
|
||||
from strands.models import BedrockModel
|
||||
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
"""This example demonstrates how to use the Strands agent with Pipecat in a way where the agent explains its reasoning step-by-step.
|
||||
|
||||
You can delegate complex, multi-step tasks to the Strands agent, which can cycle through LLM-based reasoning and tool calls to accomplish the task.
|
||||
|
||||
Try asking: "What's the weather where the Golden Gate Bridge is?"
|
||||
"""
|
||||
|
||||
|
||||
# Strands agent tools
|
||||
|
||||
|
||||
@tool
|
||||
def get_location_name_from_landmark(landmark: str) -> str:
|
||||
"""
|
||||
Get the location name from a landmark.
|
||||
|
||||
Args:
|
||||
landmark (str): The name of the landmark, e.g. "Golden Gate Bridge".
|
||||
"""
|
||||
# Simulate fetching location (slowly)
|
||||
time.sleep(3)
|
||||
return "San Francisco, CA"
|
||||
|
||||
|
||||
@tool
|
||||
def get_lat_long_from_location_name(location: str) -> dict:
|
||||
"""
|
||||
Get the latitude and longitude for a location name.
|
||||
|
||||
Args:
|
||||
location (str): The city and state, e.g. "San Francisco, CA".
|
||||
"""
|
||||
# Simulate fetching lat/long from a geocoding service (slowly)
|
||||
time.sleep(3)
|
||||
return {"lat": 37.7749, "long": -122.4194}
|
||||
|
||||
|
||||
@tool
|
||||
def get_current_weather_from_lat_long(lat: float, long: float) -> dict:
|
||||
"""
|
||||
Get the current weather for a specific latitude and longitude.
|
||||
|
||||
Args:
|
||||
lat (float): The latitude of the location.
|
||||
long (float): The longitude of the location.
|
||||
"""
|
||||
# Simulate fetching weather data from a weather service (slowly)
|
||||
time.sleep(3)
|
||||
return {"conditions": "nice", "temperature": "75"}
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
next_strands_message_is_last = False
|
||||
strands_messages_queue = asyncio.Queue()
|
||||
|
||||
def strands_callback_handler(**kwargs):
|
||||
"""
|
||||
Handle events from the Strands agent.
|
||||
"""
|
||||
nonlocal next_strands_message_is_last
|
||||
if "event" in kwargs:
|
||||
event_obj = kwargs["event"]
|
||||
if event_obj and "messageStop" in event_obj:
|
||||
message_stop = event_obj["messageStop"]
|
||||
if message_stop and "stopReason" in message_stop:
|
||||
stop_reason = message_stop["stopReason"]
|
||||
if stop_reason == "end_turn":
|
||||
next_strands_message_is_last = True
|
||||
elif "message" in kwargs:
|
||||
message_obj = kwargs["message"]
|
||||
if message_obj and "content" in message_obj and "role" in message_obj:
|
||||
role = message_obj["role"]
|
||||
content = message_obj["content"]
|
||||
if role == "assistant" and isinstance(content, list):
|
||||
for content_obj in content:
|
||||
if isinstance(content_obj, dict) and "text" in content_obj:
|
||||
message = content_obj["text"]
|
||||
if not next_strands_message_is_last:
|
||||
strands_messages_queue.put_nowait(message)
|
||||
|
||||
async def process_strands_messages():
|
||||
while True:
|
||||
message = await strands_messages_queue.get()
|
||||
await tts.queue_frame(TTSSpeakFrame(message))
|
||||
strands_messages_queue.task_done()
|
||||
|
||||
asyncio.create_task(process_strands_messages())
|
||||
|
||||
strands_agent = Agent(
|
||||
model=BedrockModel(
|
||||
model_id="us.anthropic.claude-3-7-sonnet-20250219-v1:0", max_tokens=64000
|
||||
),
|
||||
tools=[
|
||||
get_location_name_from_landmark,
|
||||
get_lat_long_from_location_name,
|
||||
get_current_weather_from_lat_long,
|
||||
],
|
||||
system_prompt="""
|
||||
You are a helpful personal assistant who can look up information about places and weather.
|
||||
|
||||
Your key capabilities:
|
||||
1. Look up where landmarks are located.
|
||||
2. Find latitude and longitude for a location.
|
||||
3. Look up the current weather for a specific latitude and longitude.
|
||||
|
||||
Explain each step of your reasoning in clear, simple, and concise language. Your responses will be converted to audio, so avoid special characters and numbered lists.
|
||||
""",
|
||||
callback_handler=strands_callback_handler,
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
async def handle_location_or_weather_related_queries(params: FunctionCallParams, query: str):
|
||||
"""
|
||||
Handle location or weather related queries.
|
||||
|
||||
Args:
|
||||
query (str): The user's query, e.g. "What's the weather where the Golden Gate Bridge is?".
|
||||
"""
|
||||
# Run in a background thread
|
||||
# (Otherwise the agent blocks the event loop; one effect of that is that we don't hear
|
||||
# the agent's "thinking" messages until the agent finishes)
|
||||
loop = asyncio.get_running_loop()
|
||||
result = await loop.run_in_executor(None, strands_agent, query)
|
||||
await params.result_callback(result.message)
|
||||
|
||||
llm.register_direct_function(handle_location_or_weather_related_queries)
|
||||
|
||||
@llm.event_handler("on_function_calls_started")
|
||||
async def on_function_calls_started(service, function_calls):
|
||||
await tts.queue_frame(TTSSpeakFrame("Let me check on that."))
|
||||
|
||||
tools = ToolsSchema(standard_tools=[handle_location_or_weather_related_queries])
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way. Start by suggesting that the user ask about the weather where the Golden Gate Bridge is.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
@@ -1,6 +0,0 @@
|
||||
fastapi
|
||||
uvicorn
|
||||
python-dotenv
|
||||
pipecat-ai[webrtc,daily,deepgram,cartesia]
|
||||
pipecat-ai-small-webrtc-prebuilt
|
||||
strands-agents
|
||||
@@ -12,7 +12,7 @@
|
||||
"license": "ISC",
|
||||
"description": "",
|
||||
"devDependencies": {
|
||||
"vite": "^6.3.5"
|
||||
"vite": "^6.0.9"
|
||||
},
|
||||
"dependencies": {
|
||||
"@daily-co/daily-js": "0.74.0"
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
recordings/
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
@@ -1,12 +1,7 @@
|
||||
FROM python:3.10-bullseye
|
||||
|
||||
RUN mkdir /app
|
||||
RUN mkdir /app/assets
|
||||
RUN mkdir /app/utils
|
||||
COPY *.py /app/
|
||||
COPY requirements.txt /app/
|
||||
|
||||
|
||||
WORKDIR /app
|
||||
RUN pip3 install -r requirements.txt
|
||||
|
||||
66
examples/canonical-metrics/README.md
Normal file
@@ -0,0 +1,66 @@
|
||||
# Chatbot with canonical-metrics
|
||||
|
||||
This project implements a chatbot using a pipeline architecture that integrates audio processing, transcription, and a language model for conversational interactions. The chatbot operates within a daily communication environment, utilizing various services for text-to-speech and language model responses.
|
||||
|
||||
## Features
|
||||
|
||||
- **Audio Input and Output**: Captures microphone input and plays back audio responses.
|
||||
- **Voice Activity Detection**: Utilizes Silero VAD to manage audio input intelligently.
|
||||
- **Text-to-Speech**: Integrates ElevenLabs TTS service to convert text responses into audio.
|
||||
- **Language Model Interaction**: Uses OpenAI's GPT-4 model to generate responses based on user input.
|
||||
- **Transcription Services**: Captures and transcribes participant speech for analytics.
|
||||
- **Metrics Collection**: Sends audio data for analysis via Canonical Metrics Service.
|
||||
|
||||
## Requirements
|
||||
|
||||
- Python 3.10+
|
||||
- `python-dotenv`
|
||||
- Additional libraries from the `pipecat` package.
|
||||
|
||||
## Setup
|
||||
|
||||
1. Clone the repository.
|
||||
2. Install the required packages.
|
||||
3. Set up environment variables for API keys:
|
||||
- `OPENAI_API_KEY`
|
||||
- `ELEVENLABS_API_KEY`
|
||||
- `CANONICAL_API_KEY`
|
||||
- `CANONICAL_API_URL`
|
||||
4. Run the script.
|
||||
|
||||
## Usage
|
||||
|
||||
The chatbot introduces itself and engages in conversations, providing brief and creative responses. Designed for flexibility, it can support multiple languages with appropriate configuration.
|
||||
|
||||
## Events
|
||||
|
||||
- Participants joining or leaving the call are handled dynamically, adjusting the chatbot's behavior accordingly.
|
||||
|
||||
|
||||
ℹ️ The first time, things might take extra time to get started since VAD (Voice Activity Detection) model needs to be downloaded.
|
||||
|
||||
## Get started
|
||||
|
||||
```python
|
||||
python3 -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
|
||||
cp env.example .env # and add your credentials
|
||||
|
||||
```
|
||||
|
||||
## Run the server
|
||||
|
||||
```bash
|
||||
python server.py
|
||||
```
|
||||
|
||||
Then, visit `http://localhost:7860/` in your browser to start a chatbot session.
|
||||
|
||||
## Build and test the Docker image
|
||||
|
||||
```
|
||||
docker build -t chatbot .
|
||||
docker run --env-file .env -p 7860:7860 chatbot
|
||||
```
|
||||
148
examples/canonical-metrics/bot.py
Normal file
@@ -0,0 +1,148 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
import uuid
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
|
||||
from pipecat.services.canonical.metrics import CanonicalMetricsService
|
||||
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_in_enabled=True,
|
||||
camera_out_enabled=False,
|
||||
vad_enabled=True,
|
||||
vad_audio_passthrough=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
# transcription_settings=DailyTranscriptionSettings(
|
||||
# language="es",
|
||||
# tier="nova",
|
||||
# model="2-general"
|
||||
# )
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
#
|
||||
# English
|
||||
#
|
||||
voice_id="cgSgspJ2msm6clMCkdW9",
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
# model="eleven_multilingual_v2",
|
||||
# voice_id="gD1IexrzCvsXPHUuT0s3",
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
#
|
||||
# English
|
||||
#
|
||||
"content": "You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by introducing yourself. Keep all your responses to 12 words or fewer.",
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
# "content": "Eres Chatbot, un amigable y útil robot. Tu objetivo es demostrar tus capacidades de una manera breve. Tus respuestas se convertiran a audio así que nunca no debes incluir caracteres especiales. Contesta a lo que el usuario pregunte de una manera creativa, útil y breve. Empieza por presentarte a ti mismo.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
"""
|
||||
CanonicalMetrics uses AudioBufferProcessor under the hood to buffer the audio. On
|
||||
call completion, CanonicalMetrics will send the audio buffer to Canonical for
|
||||
analysis. Visit https://voice.canonical.chat to learn more.
|
||||
"""
|
||||
audio_buffer_processor = AudioBufferProcessor(num_channels=2)
|
||||
canonical = CanonicalMetricsService(
|
||||
audio_buffer_processor=audio_buffer_processor,
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("CANONICAL_API_KEY"),
|
||||
call_id=str(uuid.uuid4()),
|
||||
assistant="pipecat-chatbot",
|
||||
assistant_speaks_first=True,
|
||||
context=context,
|
||||
)
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # microphone
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
canonical, # uploads audio buffer to Canonical AI for metrics
|
||||
audio_buffer_processor, # captures audio into a buffer
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await audio_buffer_processor.start_recording()
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
print(f"Participant left: {participant}")
|
||||
await task.cancel()
|
||||
|
||||
@transport.event_handler("on_call_state_updated")
|
||||
async def on_call_state_updated(transport, state):
|
||||
if state == "left":
|
||||
# Here we don't want to cancel, we just want to finish sending
|
||||
# whatever is queued, so we use an EndFrame().
|
||||
await task.queue_frame(EndFrame())
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -1,5 +1,6 @@
|
||||
DAILY_SAMPLE_ROOM_URL=https://yourdomain.daily.co/yourroom # (for joining the bot to the same room repeatedly for local dev)
|
||||
DAILY_API_KEY=7df...
|
||||
OPENAI_API_KEY=sk-PL...
|
||||
DEEPGRAM_API_KEY=efb...
|
||||
CARTESIA_API_KEY=aeb...
|
||||
ELEVENLABS_API_KEY=aeb...
|
||||
CANONICAL_API_KEY=can...
|
||||
CANONICAL_API_URL=
|
||||
5
examples/canonical-metrics/requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
python-dotenv
|
||||
fastapi[all]
|
||||
uvicorn
|
||||
pipecat-ai[daily,openai,silero,elevenlabs,canonical]
|
||||
|
||||
@@ -66,7 +66,9 @@ async def main():
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_in_enabled=True,
|
||||
video_out_enabled=False,
|
||||
camera_out_enabled=False,
|
||||
vad_enabled=True,
|
||||
vad_audio_passthrough=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
#
|
||||
@@ -128,15 +130,7 @@ async def main():
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
audio_in_sample_rate=16000,
|
||||
audio_out_sample_rate=16000,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
@audiobuffer.event_handler("on_audio_data")
|
||||
async def on_audio_data(buffer, audio, sample_rate, num_channels):
|
||||
|
||||
@@ -53,3 +53,4 @@ async def configure(aiohttp_session: aiohttp.ClientSession):
|
||||
token = await daily_rest_helper.get_token(url, expiry_time)
|
||||
|
||||
return (url, token)
|
||||
return (url, token)
|
||||
|
||||
@@ -1,39 +0,0 @@
|
||||
# Daily Custom Tracks
|
||||
|
||||
This example shows how to send and receive Daily custom tracks. We will run a simple `daily-python` application to send an audio file with a custom track (named "pipecat") to a room. Then, the Pipecat bot will mirror that custom track into another custom track (named "pipecat-mirror") in the same room.
|
||||
|
||||
## Get started
|
||||
|
||||
```python
|
||||
python3 -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
## Run the bot
|
||||
|
||||
Start the bot by giving it a Daily room URL.
|
||||
|
||||
```bash
|
||||
python bot.py -u ROOM_URL
|
||||
```
|
||||
|
||||
The bot will wait for the first participant to join. Then, it will mirror a custom track named "pipecat" into a new custom track named "pipecat-mirror".
|
||||
|
||||
## Run the sender
|
||||
|
||||
Now, run the custom track sender. This is a simple `daily-python` application that opens and audio file and sends it as a custom track to the same Daily room.
|
||||
|
||||
```bash
|
||||
python custom_track_sender.py -u ROOM_URL -i office-ambience-mono-16000.mp3
|
||||
```
|
||||
|
||||
## Open client
|
||||
|
||||
Finally, open the client so you can hear both custom tracks.
|
||||
|
||||
```bash
|
||||
open index.html
|
||||
```
|
||||
|
||||
Once the client is opened, copy the URL of the Daily room and join it. You should be able to select which custom track you want to hear.
|
||||
@@ -1,89 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import Frame, InputAudioRawFrame, OutputAudioRawFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class CustomTrackMirrorProcessor(FrameProcessor):
|
||||
def __init__(self, transport_destination: str, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self._transport_destination = transport_destination
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, InputAudioRawFrame) and frame.transport_source:
|
||||
output_frame = OutputAudioRawFrame(
|
||||
audio=frame.audio,
|
||||
sample_rate=frame.sample_rate,
|
||||
num_channels=frame.num_channels,
|
||||
)
|
||||
output_frame.transport_destination = self._transport_destination
|
||||
await self.push_frame(output_frame)
|
||||
else:
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
"Custom tracks mirror",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
microphone_out_enabled=False, # Disable since we just use custom tracks
|
||||
audio_out_destinations=["pipecat-mirror"],
|
||||
),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
CustomTrackMirrorProcessor("pipecat-mirror"),
|
||||
transport.output(), # Transport bot output
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
audio_in_sample_rate=16000,
|
||||
audio_out_sample_rate=16000,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_audio(participant["id"], audio_source="pipecat")
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -1,74 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import time
|
||||
|
||||
from daily import CallClient, CustomAudioSource, Daily
|
||||
from pydub import AudioSegment
|
||||
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
parser.add_argument("-u", "--url", type=str, required=True, help="URL of the Daily room to join")
|
||||
parser.add_argument(
|
||||
"-i", "--input", type=str, required=True, help="Input audio file (needs 16000 sample rate)"
|
||||
)
|
||||
|
||||
args, _ = parser.parse_known_args()
|
||||
|
||||
audio = AudioSegment.from_mp3(args.input)
|
||||
|
||||
raw_bytes = audio.raw_data
|
||||
sample_rate = audio.frame_rate
|
||||
channels = audio.channels
|
||||
|
||||
print(f"Length: {len(raw_bytes)} bytes")
|
||||
print(f"Sample rate: {sample_rate}, Channels: {channels}")
|
||||
|
||||
# Initialize the Daily context & create call client
|
||||
Daily.init()
|
||||
|
||||
client = CallClient()
|
||||
|
||||
# Join the room and indicate we have a custom track named "pipecat".
|
||||
client.join(
|
||||
args.url,
|
||||
client_settings={
|
||||
"publishing": {
|
||||
"camera": False,
|
||||
"microphone": False,
|
||||
"customAudio": {"pipecat": True},
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
# Just sleep for a couple of seconds. To do this well we should really use
|
||||
# completions.
|
||||
time.sleep(2)
|
||||
|
||||
# Create the custom audio source. This is where we will write our audio.
|
||||
audio_source = CustomAudioSource(sample_rate, channels)
|
||||
|
||||
# Create an audio track and assign it our audio source.
|
||||
client.add_custom_audio_track("pipecat", audio_source)
|
||||
|
||||
# Just sleep for a second. To do this well we should really use completions.
|
||||
time.sleep(1)
|
||||
|
||||
try:
|
||||
# Just write one second of audio until we have read all the file.
|
||||
chunk_size = sample_rate * channels * 2
|
||||
while len(raw_bytes) > 0:
|
||||
chunk = raw_bytes[:chunk_size]
|
||||
raw_bytes = raw_bytes[chunk_size:]
|
||||
audio_source.write_frames(chunk)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
client.leave()
|
||||
|
||||
# Just sleep for a second. To do this well we should really use completions.
|
||||
time.sleep(1)
|
||||
|
||||
client.release()
|
||||
@@ -1,173 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>daily custom tracks</title>
|
||||
</head>
|
||||
<script crossorigin src="https://unpkg.com/@daily-co/daily-js"></script>
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/fomantic-ui/2.8.6/semantic.min.js"></script>
|
||||
<link
|
||||
rel="stylesheet"
|
||||
type="text/css"
|
||||
href="https://cdnjs.cloudflare.com/ajax/libs/fomantic-ui/2.8.6/semantic.min.css"
|
||||
/>
|
||||
<script>
|
||||
function enableButton(buttonId, enable) {
|
||||
const button = document.getElementById(buttonId);
|
||||
button.disabled = !enable;
|
||||
}
|
||||
|
||||
function enableJoinButton(enable) {
|
||||
enableButton("join-button", enable);
|
||||
}
|
||||
|
||||
function enableLeaveButton(enable) {
|
||||
enableButton("leave-button", enable);
|
||||
}
|
||||
|
||||
function destroyPlayers(query) {
|
||||
const items = document.querySelectorAll(query);
|
||||
if (items) {
|
||||
for (const item of items) {
|
||||
item.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function destroyParticipantPlayers(participantId) {
|
||||
destroyPlayers(`audio[data-participant-id="${participantId}"]`);
|
||||
destroyPlayers(`button[data-participant-id="${participantId}"]`);
|
||||
}
|
||||
|
||||
async function startPlayer(player, track) {
|
||||
player.muted = false;
|
||||
player.autoplay = true;
|
||||
if (track != null) {
|
||||
player.srcObject = new MediaStream([track]);
|
||||
}
|
||||
}
|
||||
|
||||
async function buildAudioPlayer(track, participantId) {
|
||||
const audioContainer = document.getElementById("audio-container");
|
||||
const player = document.createElement("audio");
|
||||
player.dataset.participantId = participantId;
|
||||
|
||||
// Create a new button for controlling audio
|
||||
const audioControlButton = document.createElement("button");
|
||||
audioControlButton.className = "ui primary green button"
|
||||
audioControlButton.innerText = track._mediaTag == "cam-audio" ? "english" : track._mediaTag;
|
||||
audioControlButton.dataset.participantId = participantId;
|
||||
audioControlButton.onclick = () => {
|
||||
if (player.paused) {
|
||||
|
||||
player.play();
|
||||
audioControlButton.className = "ui primary red button"
|
||||
} else {
|
||||
player.pause();
|
||||
audioControlButton.className = "ui primary green button"
|
||||
}
|
||||
};
|
||||
|
||||
audioContainer.appendChild(player);
|
||||
audioContainer.appendChild(audioControlButton);
|
||||
|
||||
await startPlayer(player, track);
|
||||
player.pause()
|
||||
|
||||
return player;
|
||||
}
|
||||
|
||||
function subscribeToTracks(participantId) {
|
||||
console.log(`subscribing to track`);
|
||||
|
||||
if (participantId === "local") {
|
||||
return;
|
||||
}
|
||||
|
||||
callObject.updateParticipant(participantId, {
|
||||
setSubscribedTracks: {
|
||||
audio: true,
|
||||
video: false,
|
||||
custom: true,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
function startDaily() {
|
||||
enableJoinButton(true);
|
||||
enableLeaveButton(false);
|
||||
|
||||
window.callObject = window.DailyIframe.createCallObject({});
|
||||
|
||||
callObject.on("participant-joined", (e) => {
|
||||
if (!e.participant.local) {
|
||||
console.log("participant-joined", e.participant);
|
||||
subscribeToTracks(e.participant.session_id);
|
||||
}
|
||||
});
|
||||
|
||||
callObject.on("participant-left", (e) => {
|
||||
console.log("participant-left", e.participant.session_id);
|
||||
destroyParticipantPlayers(e.participant.session_id);
|
||||
});
|
||||
|
||||
callObject.on("track-started", async (e) => {
|
||||
console.log("track-started", e.track);
|
||||
if (e.track.kind === "audio") {
|
||||
await buildAudioPlayer(e.track, e.participant.session_id);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
async function joinRoom() {
|
||||
enableJoinButton(false);
|
||||
enableLeaveButton(true);
|
||||
|
||||
const meetingUrl = document.getElementById("meeting-url").value;
|
||||
|
||||
callObject.join({
|
||||
url: meetingUrl,
|
||||
startVideoOff: true,
|
||||
startAudioOff: true,
|
||||
subscribeToTracksAutomatically: false,
|
||||
receiveSettings: {
|
||||
base: { video: { layer: 0 } },
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
async function leaveRoom() {
|
||||
enableJoinButton(true);
|
||||
enableLeaveButton(false);
|
||||
|
||||
callObject.leave();
|
||||
|
||||
const audioContainer = document.getElementById("audio-container");
|
||||
audioContainer.replaceChildren();
|
||||
}
|
||||
</script>
|
||||
|
||||
<body onload="startDaily()">
|
||||
<div class="ui centered page grid" style="margin-top: 30px">
|
||||
<div class="ten wide column">
|
||||
<div class="ui form" style="margin-top: 30px">
|
||||
<div class="field">
|
||||
<label>Meeting URL</label>
|
||||
<input id="meeting-url" value="" />
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="ui centered aligned header" style="margin-top: 30px">
|
||||
<button id="join-button" class="ui primary button" onclick="joinRoom()">
|
||||
Join
|
||||
</button>
|
||||
<button id="leave-button" class="ui button" onclick="leaveRoom()">
|
||||
Leave
|
||||
</button>
|
||||
</div>
|
||||
<div id="tile" class="ui container" style="margin-top: 30px">
|
||||
<div id="tile" class="ui center aligned grid">
|
||||
<div id="audio-container"></div><br/>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,2 +0,0 @@
|
||||
pydub
|
||||
pipecat-ai[daily]
|
||||
@@ -1,39 +0,0 @@
|
||||
# Daily Multi Translation
|
||||
|
||||
This example shows how to use Daily to stream multiple simultaneous translations using a single transport. Daily provides custom tracks and in this example we will simultaneously translate incoming audio in English to Spanish, French and German, each of them being sent to a custom track.
|
||||
|
||||
## Get started
|
||||
|
||||
```python
|
||||
python3 -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
|
||||
cp env.example .env # and add your credentials
|
||||
|
||||
```
|
||||
|
||||
## Run the server
|
||||
|
||||
```bash
|
||||
python server.py
|
||||
```
|
||||
|
||||
Then, visit `http://localhost:7860/` in your browser. This will open a Daily Prebuilt room where you will speak in English (make sure you are not muted).
|
||||
|
||||
## Open client
|
||||
|
||||
Next, you need to open the client that will listen to the translations.
|
||||
|
||||
```bash
|
||||
open index.html
|
||||
```
|
||||
|
||||
Once the client is opened, copy the URL of the Daily room created above and join it. You should be able to select which translation you want to hear.
|
||||
|
||||
## Build and test the Docker image
|
||||
|
||||
```
|
||||
docker build -t daily-multi-translation .
|
||||
docker run --env-file .env -p 7860:7860 daily-multi-translation
|
||||
```
|
||||
@@ -1,163 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.mixers.soundfile_mixer import SoundfileMixer
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.observers.loggers.transcription_log_observer import TranscriptionLogObserver
|
||||
from pipecat.pipeline.parallel_pipeline import ParallelPipeline
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
BACKGROUND_SOUND_FILE = "office-ambience-mono-16000.mp3"
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Multi translation bot",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
audio_out_mixer={
|
||||
"spanish": SoundfileMixer(
|
||||
sound_files={"office": BACKGROUND_SOUND_FILE}, default_sound="office"
|
||||
),
|
||||
"french": SoundfileMixer(
|
||||
sound_files={"office": BACKGROUND_SOUND_FILE}, default_sound="office"
|
||||
),
|
||||
"german": SoundfileMixer(
|
||||
sound_files={"office": BACKGROUND_SOUND_FILE}, default_sound="office"
|
||||
),
|
||||
},
|
||||
audio_out_destinations=["spanish", "french", "german"],
|
||||
microphone_out_enabled=False, # Disable since we just use custom tracks
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts_spanish = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="cefcb124-080b-4655-b31f-932f3ee743de",
|
||||
transport_destination="spanish",
|
||||
)
|
||||
tts_french = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="8832a0b5-47b2-4751-bb22-6a8e2149303d",
|
||||
transport_destination="french",
|
||||
)
|
||||
tts_german = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="38aabb6a-f52b-4fb0-a3d1-988518f4dc06",
|
||||
transport_destination="german",
|
||||
)
|
||||
|
||||
messages_spanish = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You will be provided with a sentence in English, and your task is to only translate it into Spanish.",
|
||||
},
|
||||
]
|
||||
messages_french = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You will be provided with a sentence in English, and your task is to only translate it into French.",
|
||||
},
|
||||
]
|
||||
messages_german = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You will be provided with a sentence in English, and your task is to only translate it into German.",
|
||||
},
|
||||
]
|
||||
|
||||
llm_spanish = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
llm_french = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
llm_german = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
context_spanish = OpenAILLMContext(messages_spanish)
|
||||
context_aggregator_spanish = llm_spanish.create_context_aggregator(context_spanish)
|
||||
|
||||
context_french = OpenAILLMContext(messages_french)
|
||||
context_aggregator_french = llm_french.create_context_aggregator(context_french)
|
||||
|
||||
context_german = OpenAILLMContext(messages_german)
|
||||
context_aggregator_german = llm_german.create_context_aggregator(context_german)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt,
|
||||
ParallelPipeline(
|
||||
# Spanish pipeline.
|
||||
[
|
||||
context_aggregator_spanish.user(),
|
||||
llm_spanish,
|
||||
tts_spanish,
|
||||
context_aggregator_spanish.assistant(),
|
||||
],
|
||||
# French pipeline.
|
||||
[
|
||||
context_aggregator_french.user(),
|
||||
llm_french,
|
||||
tts_french,
|
||||
context_aggregator_french.assistant(),
|
||||
],
|
||||
# German pipeline.
|
||||
[
|
||||
context_aggregator_german.user(),
|
||||
llm_german,
|
||||
tts_german,
|
||||
context_aggregator_german.assistant(),
|
||||
],
|
||||
),
|
||||
transport.output(), # Transport bot output
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
audio_in_sample_rate=16000,
|
||||
audio_out_sample_rate=16000,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
observers=[TranscriptionLogObserver()],
|
||||
)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -1,202 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>daily multi translation</title>
|
||||
</head>
|
||||
<script crossorigin src="https://unpkg.com/@daily-co/daily-js"></script>
|
||||
<script
|
||||
src="https://code.jquery.com/jquery-3.1.1.min.js"
|
||||
integrity="sha256-hVVnYaiADRTO2PzUGmuLJr8BLUSjGIZsDYGmIJLv2b8="
|
||||
crossorigin="anonymous"
|
||||
></script>
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/fomantic-ui/2.8.6/semantic.min.js"></script>
|
||||
<link
|
||||
rel="stylesheet"
|
||||
type="text/css"
|
||||
href="https://cdnjs.cloudflare.com/ajax/libs/fomantic-ui/2.8.6/semantic.min.css"
|
||||
/>
|
||||
<script>
|
||||
function enableButton(buttonId, enable) {
|
||||
const button = document.getElementById(buttonId);
|
||||
button.disabled = !enable;
|
||||
}
|
||||
|
||||
function enableJoinButton(enable) {
|
||||
enableButton("join-button", enable);
|
||||
}
|
||||
|
||||
function enableLeaveButton(enable) {
|
||||
enableButton("leave-button", enable);
|
||||
}
|
||||
|
||||
function destroyPlayers(query) {
|
||||
const items = document.querySelectorAll(query);
|
||||
if (items) {
|
||||
for (const item of items) {
|
||||
item.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function destroyParticipantPlayers(participantId) {
|
||||
destroyPlayers(`video[data-participant-id="${participantId}"]`);
|
||||
destroyPlayers(`audio[data-participant-id="${participantId}"]`);
|
||||
destroyPlayers(`button[data-participant-id="${participantId}"]`);
|
||||
}
|
||||
|
||||
async function startPlayer(player, track) {
|
||||
player.muted = false;
|
||||
player.autoplay = true;
|
||||
if (track != null) {
|
||||
player.srcObject = new MediaStream([track]);
|
||||
}
|
||||
}
|
||||
|
||||
async function buildVideoPlayer(track, participantId) {
|
||||
const videoContainer = document.getElementById("video-container");
|
||||
const player = document.createElement("video");
|
||||
player.dataset.participantId = participantId;
|
||||
|
||||
videoContainer.appendChild(player);
|
||||
|
||||
await startPlayer(player, track);
|
||||
await player.play();
|
||||
|
||||
return player;
|
||||
}
|
||||
|
||||
async function buildAudioPlayer(track, participantId) {
|
||||
const audioContainer = document.getElementById("audio-container");
|
||||
const player = document.createElement("audio");
|
||||
player.dataset.participantId = participantId;
|
||||
|
||||
// Create a new button for controlling audio
|
||||
const audioControlButton = document.createElement("button");
|
||||
audioControlButton.className = "ui primary green button"
|
||||
audioControlButton.innerText = track._mediaTag == "cam-audio" ? "english" : track._mediaTag;
|
||||
audioControlButton.dataset.participantId = participantId;
|
||||
audioControlButton.onclick = () => {
|
||||
if (player.paused) {
|
||||
|
||||
player.play();
|
||||
audioControlButton.className = "ui primary red button"
|
||||
} else {
|
||||
player.pause();
|
||||
audioControlButton.className = "ui primary green button"
|
||||
}
|
||||
};
|
||||
|
||||
audioContainer.appendChild(player);
|
||||
audioContainer.appendChild(audioControlButton);
|
||||
|
||||
await startPlayer(player, track);
|
||||
player.pause()
|
||||
|
||||
return player;
|
||||
}
|
||||
|
||||
function subscribeToTracks(participantId) {
|
||||
console.log(`subscribing to track`);
|
||||
|
||||
if (participantId === "local") {
|
||||
return;
|
||||
}
|
||||
|
||||
callObject.updateParticipant(participantId, {
|
||||
setSubscribedTracks: {
|
||||
audio: true,
|
||||
video: true,
|
||||
custom: true,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
function startDaily() {
|
||||
enableJoinButton(true);
|
||||
enableLeaveButton(false);
|
||||
|
||||
window.callObject = window.DailyIframe.createCallObject({});
|
||||
|
||||
callObject.on("participant-joined", (e) => {
|
||||
if (!e.participant.local) {
|
||||
console.log("participant-joined", e.participant);
|
||||
subscribeToTracks(e.participant.session_id);
|
||||
}
|
||||
});
|
||||
|
||||
callObject.on("participant-left", (e) => {
|
||||
console.log("participant-left", e.participant.session_id);
|
||||
destroyParticipantPlayers(e.participant.session_id);
|
||||
});
|
||||
|
||||
callObject.on("track-started", async (e) => {
|
||||
console.log("track-started", e.track);
|
||||
if (e.track.kind === "video") {
|
||||
await buildVideoPlayer(e.track, e.participant.session_id);
|
||||
} else if (e.track.kind === "audio") {
|
||||
await buildAudioPlayer(e.track, e.participant.session_id);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
async function joinRoom() {
|
||||
enableJoinButton(false);
|
||||
enableLeaveButton(true);
|
||||
|
||||
const meetingUrl = document.getElementById("meeting-url").value;
|
||||
|
||||
callObject.join({
|
||||
url: meetingUrl,
|
||||
startVideoOff: true,
|
||||
startAudioOff: true,
|
||||
subscribeToTracksAutomatically: false,
|
||||
receiveSettings: {
|
||||
base: { video: { layer: 0 } },
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
async function leaveRoom() {
|
||||
enableJoinButton(true);
|
||||
enableLeaveButton(false);
|
||||
|
||||
callObject.leave();
|
||||
|
||||
const videoContainer = document.getElementById("video-container");
|
||||
videoContainer.replaceChildren();
|
||||
|
||||
const audioContainer = document.getElementById("audio-container");
|
||||
audioContainer.replaceChildren();
|
||||
}
|
||||
</script>
|
||||
|
||||
<body onload="startDaily()">
|
||||
<div class="ui centered page grid" style="margin-top: 30px">
|
||||
<div class="ten wide column">
|
||||
<div class="ui form" style="margin-top: 30px">
|
||||
<div class="field">
|
||||
<label>Meeting URL</label>
|
||||
<input id="meeting-url" value="" />
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="ui centered aligned header" style="margin-top: 30px">
|
||||
<button id="join-button" class="ui primary button" onclick="joinRoom()">
|
||||
Join
|
||||
</button>
|
||||
<button id="leave-button" class="ui button" onclick="leaveRoom()">
|
||||
Leave
|
||||
</button>
|
||||
</div>
|
||||
<div id="tile" class="ui container" style="margin-top: 30px">
|
||||
<div id="tile" class="ui center aligned grid">
|
||||
<div id="audio-container"></div><br/>
|
||||
</div>
|
||||
</div>
|
||||
<div id="tile" class="ui container" style="margin-top: 30px">
|
||||
<div id="tile" class="ui center aligned grid">
|
||||
<div id="video-container" class="ui segment"></div>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,5 +0,0 @@
|
||||
aiofiles
|
||||
python-dotenv
|
||||
fastapi[all]
|
||||
uvicorn
|
||||
pipecat-ai[daily,deepgram,openai,silero,cartesia,soundfile]
|
||||
@@ -41,7 +41,8 @@ async def main(room_url: str, token: str):
|
||||
api_key=daily_api_key,
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
video_out_enabled=False,
|
||||
camera_out_enabled=False,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
),
|
||||
@@ -75,13 +76,7 @@ async def main(room_url: str, token: str):
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
|
||||
3
examples/deployment/modal-example/.gitignore
vendored
@@ -1,6 +1,3 @@
|
||||
# Modal clone
|
||||
modal-examples
|
||||
|
||||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
|
||||
@@ -1,91 +1,37 @@
|
||||
# Deploying Pipecat to Modal.com
|
||||
|
||||
Deployment example for [modal.com](https://www.modal.com). This example demonstrates how to deploy a FastAPI webapp to Modal with an RTVI compatible `/connect` endpoint that launches a Pipecat pipeline in a separate Modal container and returns a room/token for the client to join. This example also supports providing a parameter to the `/connect` endpoint for specifying which Pipecat pipeline to launch; openai, gemini, or vllm. The vllm pipeline points to a self-hosted OpenAI compatible LLM, using a llama model (neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16), deployed to Modal.
|
||||
Barebones deployment example for [modal.com](https://www.modal.com)
|
||||
|
||||

|
||||
1. Install dependencies
|
||||
|
||||
# Running this Example
|
||||
```bash
|
||||
python -m venv venv
|
||||
source venv/bin/active # or OS equivalent
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
## Install the Modal CLI
|
||||
2. Setup .env
|
||||
|
||||
Setup a Modal account and install it on your machine if you have not already, following their easy 3 steps in their [Getting Started Guide](https://modal.com/docs/guide#getting-started)
|
||||
```bash
|
||||
cp env.example .env
|
||||
```
|
||||
|
||||
## Deploy a self-serve LLM
|
||||
Alternatively, you can configure your Modal app to use [secrets](https://modal.com/docs/guide/secrets)
|
||||
|
||||
1. Deploy Modal's OpenAI-compatible LLM service:
|
||||
3. Test the app locally
|
||||
|
||||
```bash
|
||||
git clone https://github.com/modal-labs/modal-examples
|
||||
cd modal-examples
|
||||
modal deploy 06_gpu_and_ml/llm-serving/vllm_inference.py
|
||||
```
|
||||
|
||||
Refer to Modal's guide and example for [Deploying an OpenAI-compatible LLM service with vLLM](https://modal.com/docs/examples/vllm_inference) for more details.
|
||||
|
||||
2. Take note of the endpoint URL from the previous step, which will look like:
|
||||
```
|
||||
https://{your-workspace}--example-vllm-openai-compatible-serve.modal.run
|
||||
```
|
||||
You'll need this for the `bot_vllm.py` file in the next section.
|
||||
|
||||
**Note:** The default Modal LLM example uses Llama-3.1 and will shut down after 15 minutes of inactivity. Cold starts take 5-10 minutes. To prepare the service, we recommend visiting the `/docs` endpoint (`https://<Modal workspace>--example-vllm-openai-compatible-serve.modal.run/docs`) for your deployed LLM and wait for it to fully load before connecting your client.
|
||||
|
||||
## Deploy FastAPI App and Pipecat pipeline to Modal
|
||||
|
||||
1. Setup environment variables
|
||||
|
||||
```bash
|
||||
cd server
|
||||
cp env.example .env
|
||||
# Modify .env to provide your service API Keys
|
||||
```
|
||||
|
||||
Alternatively, you can configure your Modal app to use [secrets](https://modal.com/docs/guide/secrets)
|
||||
|
||||
2. Update the `modal_url` in `server/src/bot_vllm.py` to point to the url produced from the self-serve llm deploy, mentioned above.
|
||||
|
||||
3. From within the `server` directory, test the app locally:
|
||||
|
||||
```bash
|
||||
modal serve app.py
|
||||
```
|
||||
```bash
|
||||
modal serve app.py
|
||||
```
|
||||
|
||||
4. Deploy to production
|
||||
|
||||
```bash
|
||||
modal deploy app.py
|
||||
```
|
||||
```bash
|
||||
modal deploy app.py
|
||||
```
|
||||
|
||||
5. Note the endpoint URL produced from this deployment. It will look like:
|
||||
## Configuration options
|
||||
|
||||
```bash
|
||||
https://{your-workspace}--pipecat-modal-fastapi-app.modal.run
|
||||
```
|
||||
This app sets some sensible defaults for reducing cold starts, such as `minkeep_warm=1`, which will keep at least 1 warm instance ready for your bot function.
|
||||
|
||||
You'll need this URL for the client's `app.js` configuration mentioned in its README.
|
||||
|
||||
## Launch your bots on Modal
|
||||
|
||||
### Option 1: Direct Link
|
||||
|
||||
Simply click on the url displayed after running the server or deploy step to launch an agent and be redirected to a Daily room to talk with the launched bot. This will use the OpenAI pipeline.
|
||||
|
||||
### Option 2: Connect via an RTVI Client
|
||||
|
||||
Follow the instructions provided in the [client folder's README](client/javascript/README.md) for building and running a custom client that connects to your Modal endpoint. The provided client provides a dropdown for choosing which bot pipeline to run.
|
||||
|
||||
# Navigating your llm, server, and Pipecat logs
|
||||
|
||||
In your [Modal dashboard](https://modal.com/apps), you should have two Apps listed under Live Apps:
|
||||
|
||||
1. `example-vllm-openai-compatible`: This App contains the containers and logs used to run your self-hosted LLM. There will be just one App Function listed: `serve`. Click on this function to view logs for your LLM.
|
||||
2. `pipecat-modal`: This App contains the containers and logs used to run your `connect` endpoints and Pipecat pipelines. It will list two App Functions:
|
||||
1. `fastapi_app`: This function is running the endpoints that your client will interact with and initiate starting a new pipeline (`/`, `/connect`, `/status`). Click on this function to see logs for each endpoint hit.
|
||||
2. `bot_runner`: This function handles launching and running a bot pipeline. Click on this function to get a list of all pipeline runs and access each run's logs.
|
||||
|
||||
# Modal + Pipecat Tips
|
||||
|
||||
- In most other Pipecat examples, we use `Popen` to launch the pipeline process from the `/connect` endpoint. In this example, we use a Modal function instead. This allows us to run the pipelines using a separately defined Modal image as well as run each pipeline in an isolated container.
|
||||
- For the FastAPI and most common Pipecat Pipeline containers, a default `debian_slim` CPU-only should be all that's required to run. GPU containers are needed for self-hosted services.
|
||||
- To minimize cold starts of the pipeline and reduce latency for users, set `min_containers=1` on the Modal Function that launches the pipeline to ensure at least one warm instance of your function is always available.
|
||||
- For next steps on running a self-hosted llm and reducing latency, check out all of [Modal's LLM examples](https://modal.com/docs/examples/vllm_inference).
|
||||
It has been configured to only allow a concurrency of 1 (`max_inputs=1`) as each user will require their own running function.
|
||||
83
examples/deployment/modal-example/app.py
Normal file
@@ -0,0 +1,83 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import os
|
||||
|
||||
import aiohttp
|
||||
import modal
|
||||
from bot import _voice_bot_process
|
||||
from fastapi import HTTPException
|
||||
from fastapi.responses import RedirectResponse
|
||||
from loguru import logger
|
||||
|
||||
MAX_SESSION_TIME = 15 * 60 # 15 minutes
|
||||
|
||||
image = (
|
||||
modal.Image.debian_slim(python_version="3.13")
|
||||
.apt_install("ffmpeg")
|
||||
.pip_install_from_requirements("requirements.txt")
|
||||
.pip_install("pipecat-ai[daily,silero,cartesia,openai]")
|
||||
.add_local_python_source("bot")
|
||||
)
|
||||
|
||||
app = modal.App("pipecat-modal", image=image)
|
||||
|
||||
|
||||
@app.function(
|
||||
image=image,
|
||||
cpu=1.0,
|
||||
secrets=[modal.Secret.from_dotenv()],
|
||||
min_containers=1,
|
||||
enable_memory_snapshot=True,
|
||||
max_inputs=1, # Do not reuse instances across requests
|
||||
retries=0,
|
||||
)
|
||||
def launch_bot_process(room_url: str, token: str):
|
||||
_voice_bot_process(room_url, token)
|
||||
|
||||
|
||||
@app.function(
|
||||
image=image,
|
||||
secrets=[modal.Secret.from_dotenv()],
|
||||
)
|
||||
@modal.fastapi_endpoint(method="GET")
|
||||
async def start():
|
||||
from pipecat.transports.services.helpers.daily_rest import (
|
||||
DailyRESTHelper,
|
||||
DailyRoomParams,
|
||||
)
|
||||
|
||||
logger.info("Request received")
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
daily_rest_helper = DailyRESTHelper(
|
||||
daily_api_key=os.getenv("DAILY_API_KEY", ""),
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=session,
|
||||
)
|
||||
|
||||
# Create new Daily room
|
||||
room = await daily_rest_helper.create_room(DailyRoomParams())
|
||||
if not room.url:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Unable to create room",
|
||||
)
|
||||
logger.info(f"Created room: {room.url}")
|
||||
|
||||
# Create bot token for room
|
||||
token = await daily_rest_helper.get_token(room.url, MAX_SESSION_TIME)
|
||||
if not token:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room.url}")
|
||||
|
||||
logger.info(f"Bot token created: {token}")
|
||||
|
||||
# Spawn a new bot process
|
||||
launch_bot_process.spawn(room_url=room.url, token=token)
|
||||
|
||||
# Return room URL to the user to join
|
||||
# Note: in production, you would want to return a token to the user
|
||||
return RedirectResponse(room.url)
|
||||
95
examples/deployment/modal-example/bot.py
Normal file
@@ -0,0 +1,95 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main(room_url: str, token: str):
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY", ""), voice_id="71a7ad14-091c-4e8e-a314-022ece01c121"
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
def _voice_bot_process(room_url: str, token: str):
|
||||
asyncio.run(main(room_url, token))
|
||||
@@ -1 +0,0 @@
|
||||
node_modules
|
||||
@@ -1,29 +0,0 @@
|
||||
# JavaScript Implementation
|
||||
|
||||
Basic implementation using the [Pipecat JavaScript SDK](https://docs.pipecat.ai/client/js/introduction).
|
||||
|
||||
## Setup
|
||||
|
||||
1. Deploy the Modal server. See the main [README](../../README).
|
||||
|
||||
2. Navigate to the `client/javascript` directory:
|
||||
|
||||
```bash
|
||||
cd client/javascript
|
||||
```
|
||||
|
||||
3. Modify the baseUrl in src/app.js to point to your deployed Modal endpoint
|
||||
|
||||
4. Install dependencies:
|
||||
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
5. Run the client app:
|
||||
|
||||
```
|
||||
npm run dev
|
||||
```
|
||||
|
||||
6. Visit http://localhost:5173 in your browser.
|
||||
@@ -1,49 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>AI Chatbot</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<div class="container">
|
||||
<div class="status-bar">
|
||||
<div class="status">
|
||||
Status: <span id="connection-status">Disconnected</span>
|
||||
</div>
|
||||
<div class="controls">
|
||||
<select id="bot-selector">
|
||||
<option value="openai">OpenAI</option>
|
||||
<option value="gemini">Gemini</option>
|
||||
<option value="vllm">Llama</option>
|
||||
</select>
|
||||
<button id="connect-btn">Connect</button>
|
||||
<button id="disconnect-btn" disabled>Disconnect</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="main-content">
|
||||
<div class="bot-container">
|
||||
<div id="bot-video-container"></div>
|
||||
<audio id="bot-audio" autoplay></audio>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="device-bar">
|
||||
<div class="device-controls">
|
||||
<select id="device-selector"></select>
|
||||
<button id="mic-toggle-btn">Mute Mic</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="debug-panel">
|
||||
<h3>Debug Info</h3>
|
||||
<div id="debug-log"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script type="module" src="/src/app.js"></script>
|
||||
<link rel="stylesheet" href="/src/style.css" />
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,21 +0,0 @@
|
||||
{
|
||||
"name": "client",
|
||||
"version": "1.0.0",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"dev": "vite",
|
||||
"build": "vite build",
|
||||
"preview": "vite preview"
|
||||
},
|
||||
"keywords": [],
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"description": "",
|
||||
"devDependencies": {
|
||||
"vite": "^6.3.5"
|
||||
},
|
||||
"dependencies": {
|
||||
"@pipecat-ai/client-js": "^1.0.0",
|
||||
"@pipecat-ai/daily-transport": "^1.0.0"
|
||||
}
|
||||
}
|
||||
@@ -1,376 +0,0 @@
|
||||
/**
|
||||
* Copyright (c) 2024–2025, Daily
|
||||
*
|
||||
* SPDX-License-Identifier: BSD 2-Clause License
|
||||
*/
|
||||
|
||||
/**
|
||||
* Pipecat Client Implementation
|
||||
*
|
||||
* This client connects to an RTVI-compatible bot server using WebRTC (via Daily).
|
||||
* It handles audio/video streaming and manages the connection lifecycle.
|
||||
*
|
||||
* Requirements:
|
||||
* - A running RTVI bot server (defaults to http://localhost:7860)
|
||||
* - The server must implement the /connect endpoint that returns Daily.co room credentials
|
||||
* - Browser with WebRTC support
|
||||
*/
|
||||
|
||||
import { PipecatClient, RTVIEvent } from '@pipecat-ai/client-js';
|
||||
import { DailyTransport } from '@pipecat-ai/daily-transport';
|
||||
|
||||
/**
|
||||
* ChatbotClient handles the connection and media management for a real-time
|
||||
* voice and video interaction with an AI bot.
|
||||
*/
|
||||
class ChatbotClient {
|
||||
constructor() {
|
||||
// Initialize client state
|
||||
this.pcClient = null;
|
||||
this.setupDOMElements();
|
||||
this.initializeClientAndTransport();
|
||||
this.setupEventListeners();
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up references to DOM elements and create necessary media elements
|
||||
*/
|
||||
setupDOMElements() {
|
||||
// Get references to UI control elements
|
||||
this.connectBtn = document.getElementById('connect-btn');
|
||||
this.disconnectBtn = document.getElementById('disconnect-btn');
|
||||
this.statusSpan = document.getElementById('connection-status');
|
||||
this.debugLog = document.getElementById('debug-log');
|
||||
this.botVideoContainer = document.getElementById('bot-video-container');
|
||||
this.deviceSelector = document.getElementById('device-selector');
|
||||
|
||||
// Create an audio element for bot's voice output
|
||||
this.botAudio = document.createElement('audio');
|
||||
this.botAudio.autoplay = true;
|
||||
this.botAudio.playsInline = true;
|
||||
document.body.appendChild(this.botAudio);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up event listeners for connect/disconnect buttons
|
||||
*/
|
||||
setupEventListeners() {
|
||||
this.connectBtn.addEventListener('click', () => this.connect());
|
||||
this.disconnectBtn.addEventListener('click', () => this.disconnect());
|
||||
|
||||
// Populate device selector
|
||||
this.pcClient.getAllMics().then((mics) => {
|
||||
console.log('Available mics:', mics);
|
||||
mics.forEach((device) => {
|
||||
const option = document.createElement('option');
|
||||
option.value = device.deviceId;
|
||||
option.textContent = device.label || `Microphone ${device.deviceId}`;
|
||||
this.deviceSelector.appendChild(option);
|
||||
});
|
||||
});
|
||||
this.deviceSelector.addEventListener('change', (event) => {
|
||||
const selectedDeviceId = event.target.value;
|
||||
console.log('Selected device ID:', selectedDeviceId);
|
||||
this.pcClient.updateMic(selectedDeviceId);
|
||||
});
|
||||
|
||||
// Handle mic mute/unmute toggle
|
||||
const micToggleBtn = document.getElementById('mic-toggle-btn');
|
||||
|
||||
micToggleBtn.addEventListener('click', () => {
|
||||
let micEnabled = this.pcClient.isMicEnabled;
|
||||
micToggleBtn.textContent = micEnabled ? 'Unmute Mic' : 'Mute Mic';
|
||||
this.pcClient.enableMic(!micEnabled);
|
||||
// Add logic to mute/unmute the mic
|
||||
if (micEnabled) {
|
||||
console.log('Mic muted');
|
||||
// Add code to mute the mic
|
||||
} else {
|
||||
console.log('Mic unmuted');
|
||||
// Add code to unmute the mic
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up the Pipecat client and Daily transport
|
||||
*/
|
||||
async initializeClientAndTransport() {
|
||||
// Initialize the Pipecat client with a DailyTransport and our configuration
|
||||
this.pcClient = new PipecatClient({
|
||||
transport: new DailyTransport(),
|
||||
enableMic: true, // Enable microphone for user input
|
||||
enableCam: false,
|
||||
callbacks: {
|
||||
// Handle connection state changes
|
||||
onConnected: () => {
|
||||
this.updateStatus('Connected');
|
||||
this.connectBtn.disabled = true;
|
||||
this.disconnectBtn.disabled = false;
|
||||
this.log('Client connected');
|
||||
},
|
||||
onDisconnected: () => {
|
||||
this.updateStatus('Disconnected');
|
||||
this.connectBtn.disabled = false;
|
||||
this.disconnectBtn.disabled = true;
|
||||
this.log('Client disconnected');
|
||||
},
|
||||
// Handle transport state changes
|
||||
onTransportStateChanged: (state) => {
|
||||
this.updateStatus(`Transport: ${state}`);
|
||||
this.log(`Transport state changed: ${state}`);
|
||||
if (state === 'connecting') {
|
||||
window.startTime = Date.now();
|
||||
}
|
||||
if (state === 'ready') {
|
||||
this.setupMediaTracks();
|
||||
console.warn('TIME TO BOT READY:', Date.now() - window.startTime);
|
||||
}
|
||||
},
|
||||
// Handle bot connection events
|
||||
onBotConnected: (participant) => {
|
||||
this.log(`Bot connected: ${JSON.stringify(participant)}`);
|
||||
},
|
||||
onBotDisconnected: (participant) => {
|
||||
this.log(`Bot disconnected: ${JSON.stringify(participant)}`);
|
||||
},
|
||||
onBotReady: (data) => {
|
||||
this.log(`Bot ready: ${JSON.stringify(data)}`);
|
||||
this.setupMediaTracks();
|
||||
},
|
||||
// Transcript events
|
||||
onUserTranscript: (data) => {
|
||||
// Only log final transcripts
|
||||
if (data.final) {
|
||||
this.log(`User: ${data.text}`);
|
||||
}
|
||||
},
|
||||
onBotTranscript: (data) => {
|
||||
this.log(`Bot: ${data.text}`);
|
||||
},
|
||||
// Error handling
|
||||
onMessageError: (error) => {
|
||||
console.log('Message error:', error);
|
||||
},
|
||||
onMicUpdated: (data) => {
|
||||
console.log('Mic updated:', data);
|
||||
this.deviceSelector.value = data.deviceId;
|
||||
},
|
||||
onError: (error) => {
|
||||
console.log('Error:', JSON.stringify(error));
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
// Set up listeners for media track events
|
||||
this.setupTrackListeners();
|
||||
|
||||
await this.pcClient.initDevices();
|
||||
window.client = this.pcClient;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a timestamped message to the debug log
|
||||
*/
|
||||
log(message) {
|
||||
const entry = document.createElement('div');
|
||||
entry.textContent = `${new Date().toISOString()} - ${message}`;
|
||||
|
||||
// Add styling based on message type
|
||||
if (message.startsWith('User: ')) {
|
||||
entry.style.color = '#2196F3'; // blue for user
|
||||
} else if (message.startsWith('Bot: ')) {
|
||||
entry.style.color = '#4CAF50'; // green for bot
|
||||
}
|
||||
|
||||
this.debugLog.appendChild(entry);
|
||||
this.debugLog.scrollTop = this.debugLog.scrollHeight;
|
||||
console.log(message);
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the connection status display
|
||||
*/
|
||||
updateStatus(status) {
|
||||
this.statusSpan.textContent = status;
|
||||
this.log(`Status: ${status}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check for available media tracks and set them up if present
|
||||
* This is called when the bot is ready or when the transport state changes to ready
|
||||
*/
|
||||
setupMediaTracks() {
|
||||
if (!this.pcClient) return;
|
||||
|
||||
// Get current tracks from the client
|
||||
const tracks = this.pcClient.tracks();
|
||||
|
||||
// Set up any available bot tracks
|
||||
if (tracks.bot?.audio) {
|
||||
this.setupAudioTrack(tracks.bot.audio);
|
||||
}
|
||||
if (tracks.bot?.video) {
|
||||
this.setupVideoTrack(tracks.bot.video);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up listeners for track events (start/stop)
|
||||
* This handles new tracks being added during the session
|
||||
*/
|
||||
setupTrackListeners() {
|
||||
if (!this.pcClient) return;
|
||||
|
||||
// Listen for new tracks starting
|
||||
this.pcClient.on(RTVIEvent.TrackStarted, (track, participant) => {
|
||||
// Only handle non-local (bot) tracks
|
||||
if (!participant?.local) {
|
||||
if (track.kind === 'audio') {
|
||||
this.setupAudioTrack(track);
|
||||
} else if (track.kind === 'video') {
|
||||
this.setupVideoTrack(track);
|
||||
}
|
||||
this.log(
|
||||
`Track started event: ${track.kind} from ${
|
||||
participant?.name || 'unknown'
|
||||
}`
|
||||
);
|
||||
} else {
|
||||
this.log('Local mic unmuted');
|
||||
}
|
||||
});
|
||||
|
||||
// Listen for tracks stopping
|
||||
this.pcClient.on(RTVIEvent.TrackStopped, (track, participant) => {
|
||||
if (participant.local) {
|
||||
this.log('Local mic muted');
|
||||
return;
|
||||
}
|
||||
this.log(
|
||||
`Track stopped event: ${track.kind} from ${
|
||||
participant?.name || 'unknown'
|
||||
}`
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up an audio track for playback
|
||||
* Handles both initial setup and track updates
|
||||
*/
|
||||
setupAudioTrack(track) {
|
||||
this.log('Setting up audio track');
|
||||
// Check if we're already playing this track
|
||||
if (this.botAudio.srcObject) {
|
||||
const oldTrack = this.botAudio.srcObject.getAudioTracks()[0];
|
||||
if (oldTrack?.id === track.id) return;
|
||||
}
|
||||
// Create a new MediaStream with the track and set it as the audio source
|
||||
this.botAudio.srcObject = new MediaStream([track]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up a video track for display
|
||||
* Handles both initial setup and track updates
|
||||
*/
|
||||
setupVideoTrack(track) {
|
||||
this.log('Setting up video track');
|
||||
const videoEl = document.createElement('video');
|
||||
videoEl.autoplay = true;
|
||||
videoEl.playsInline = true;
|
||||
videoEl.muted = true;
|
||||
videoEl.style.width = '100%';
|
||||
videoEl.style.height = '100%';
|
||||
videoEl.style.objectFit = 'cover';
|
||||
|
||||
// Check if we're already displaying this track
|
||||
if (this.botVideoContainer.querySelector('video')?.srcObject) {
|
||||
const oldTrack = this.botVideoContainer
|
||||
.querySelector('video')
|
||||
.srcObject.getVideoTracks()[0];
|
||||
if (oldTrack?.id === track.id) return;
|
||||
}
|
||||
|
||||
// Create a new MediaStream with the track and set it as the video source
|
||||
videoEl.srcObject = new MediaStream([track]);
|
||||
this.botVideoContainer.innerHTML = '';
|
||||
this.botVideoContainer.appendChild(videoEl);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize and connect to the bot
|
||||
* This sets up the Pipecat client, initializes devices, and establishes the connection
|
||||
*/
|
||||
async connect() {
|
||||
try {
|
||||
const botSelector = document.getElementById('bot-selector');
|
||||
const selectedBot = botSelector.value;
|
||||
|
||||
// Initialize audio/video devices
|
||||
this.log('Initializing devices...');
|
||||
await this.pcClient.initDevices();
|
||||
|
||||
// Connect to the bot
|
||||
this.log(`Connecting to bot: ${selectedBot}`);
|
||||
await this.pcClient.connect({
|
||||
// REPLACE WITH YOUR MODAL URL ENDPOINT
|
||||
endpoint:
|
||||
'https://<your-workspace>--pipecat-modal-fastapi-app.modal.run/connect',
|
||||
requestData: {
|
||||
bot_name: selectedBot,
|
||||
},
|
||||
});
|
||||
|
||||
this.log('Connection complete');
|
||||
} catch (error) {
|
||||
// Handle any errors during connection
|
||||
console.error('Connection error:', error);
|
||||
this.log(`Error connecting: ${JSON.stringify(error.message)}`);
|
||||
this.log(`Error stack: ${error.stack}`);
|
||||
this.updateStatus('Error');
|
||||
|
||||
// Clean up if there's an error
|
||||
if (this.pcClient) {
|
||||
try {
|
||||
await this.pcClient.disconnect();
|
||||
} catch (disconnectError) {
|
||||
this.log(`Error during disconnect: ${disconnectError.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Disconnect from the bot and clean up media resources
|
||||
*/
|
||||
async disconnect() {
|
||||
if (this.pcClient) {
|
||||
try {
|
||||
// Disconnect the Pipecat client
|
||||
await this.pcClient.disconnect();
|
||||
|
||||
// Clean up audio
|
||||
if (this.botAudio.srcObject) {
|
||||
this.botAudio.srcObject.getTracks().forEach((track) => track.stop());
|
||||
this.botAudio.srcObject = null;
|
||||
}
|
||||
|
||||
// Clean up video
|
||||
if (this.botVideoContainer.querySelector('video')?.srcObject) {
|
||||
const video = this.botVideoContainer.querySelector('video');
|
||||
video.srcObject.getTracks().forEach((track) => track.stop());
|
||||
video.srcObject = null;
|
||||
}
|
||||
this.botVideoContainer.innerHTML = '';
|
||||
} catch (error) {
|
||||
this.log(`Error disconnecting: ${error.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize the client when the page loads
|
||||
window.addEventListener('DOMContentLoaded', () => {
|
||||
new ChatbotClient();
|
||||
});
|
||||
@@ -1,135 +0,0 @@
|
||||
body {
|
||||
margin: 0;
|
||||
padding: 20px;
|
||||
font-family: Arial, sans-serif;
|
||||
background-color: #f0f0f0;
|
||||
}
|
||||
|
||||
.container {
|
||||
max-width: 1200px;
|
||||
margin: 0 auto;
|
||||
}
|
||||
|
||||
.status-bar,
|
||||
.device-bar {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
padding: 10px;
|
||||
background-color: #fff;
|
||||
border-radius: 8px;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
.controls,
|
||||
.device-controls {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 10px; /* Adds spacing between elements */
|
||||
}
|
||||
|
||||
.device-controls {
|
||||
margin-left: auto;
|
||||
}
|
||||
|
||||
.controls button,
|
||||
.device-controls button {
|
||||
padding: 8px 16px;
|
||||
margin-left: 10px;
|
||||
border: none;
|
||||
border-radius: 4px;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
#bot-selector,
|
||||
#device-selector {
|
||||
padding: 8px 16px;
|
||||
padding-right: 40px;
|
||||
border: none;
|
||||
border-radius: 4px;
|
||||
background-color: #6c757d; /* Gray background */
|
||||
color: white; /* White text */
|
||||
cursor: pointer;
|
||||
appearance: none; /* Removes default browser styling for dropdowns */
|
||||
background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' fill='white'%3E%3Cpath d='M7 10l5 5 5-5z'/%3E%3C/svg%3E"); /* Custom arrow */
|
||||
background-repeat: no-repeat;
|
||||
background-position: right 8px center; /* Position the arrow */
|
||||
}
|
||||
|
||||
#bot-selector:focus,
|
||||
#device-selector:focus {
|
||||
outline: none;
|
||||
box-shadow: 0 0 4px rgba(0, 0, 0, 0.3); /* Add a subtle focus effect */
|
||||
}
|
||||
|
||||
#connect-btn {
|
||||
background-color: #4caf50;
|
||||
color: white;
|
||||
}
|
||||
|
||||
#disconnect-btn {
|
||||
background-color: #f44336;
|
||||
color: white;
|
||||
}
|
||||
|
||||
#mic-toggle-btn {
|
||||
}
|
||||
|
||||
button:disabled {
|
||||
opacity: 0.5;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
|
||||
.main-content {
|
||||
background-color: #fff;
|
||||
border-radius: 8px;
|
||||
padding: 20px;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
.bot-container {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
#bot-video-container {
|
||||
width: 640px;
|
||||
height: 360px;
|
||||
background-color: #e0e0e0;
|
||||
border-radius: 8px;
|
||||
margin: 20px auto;
|
||||
overflow: hidden;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
#bot-video-container video {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
object-fit: cover;
|
||||
}
|
||||
|
||||
.debug-panel {
|
||||
background-color: #fff;
|
||||
border-radius: 8px;
|
||||
padding: 20px;
|
||||
}
|
||||
|
||||
.debug-panel h3 {
|
||||
margin: 0 0 10px 0;
|
||||
font-size: 16px;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
#debug-log {
|
||||
height: 200px;
|
||||
overflow-y: auto;
|
||||
background-color: #f8f8f8;
|
||||
padding: 10px;
|
||||
border-radius: 4px;
|
||||
font-family: monospace;
|
||||
font-size: 12px;
|
||||
line-height: 1.4;
|
||||
}
|
||||
|
Before Width: | Height: | Size: 114 KiB |
3
examples/deployment/modal-example/env.example
Normal file
@@ -0,0 +1,3 @@
|
||||
DAILY_API_KEY=
|
||||
OPENAI_API_KEY=
|
||||
CARTESIA_API_KEY=
|
||||
4
examples/deployment/modal-example/requirements.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
python-dotenv==1.0.1
|
||||
modal==0.71.3
|
||||
fastapi==0.115.6
|
||||
aiohttp==3.11.11
|
||||
@@ -1,307 +0,0 @@
|
||||
"""modal_example.
|
||||
|
||||
This module shows a simple example of how to deploy a bot using Modal and FastAPI.
|
||||
|
||||
It includes:
|
||||
- FastAPI endpoints for starting agents and checking bot statuses.
|
||||
- Dynamic loading of bot implementations.
|
||||
- Use of a Daily transport for bot communication.
|
||||
"""
|
||||
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import importlib
|
||||
import os
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import Any, Dict, Literal
|
||||
|
||||
import aiohttp
|
||||
import modal
|
||||
from fastapi import APIRouter, FastAPI, HTTPException
|
||||
from fastapi.responses import JSONResponse, RedirectResponse
|
||||
from pydantic import BaseModel
|
||||
|
||||
# container specifications for the FastAPI web server
|
||||
web_image = (
|
||||
modal.Image.debian_slim(python_version="3.13")
|
||||
.pip_install_from_requirements("requirements.txt")
|
||||
.pip_install("pipecat-ai[daily]")
|
||||
.add_local_dir("src", remote_path="/root/src")
|
||||
)
|
||||
|
||||
# container specifications for the Pipecat pipeline
|
||||
bot_image = (
|
||||
modal.Image.debian_slim(python_version="3.13")
|
||||
.apt_install("ffmpeg")
|
||||
.pip_install_from_requirements("requirements.txt")
|
||||
.pip_install("pipecat-ai[daily,elevenlabs,openai,silero,google]")
|
||||
.add_local_dir("src", remote_path="/root/src")
|
||||
)
|
||||
|
||||
app = modal.App("pipecat-modal", secrets=[modal.Secret.from_dotenv()])
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
bot_jobs = {}
|
||||
daily_helpers = {}
|
||||
|
||||
# Names of all supported bot implementations
|
||||
# These correspond to the bot files in the src directory
|
||||
BotName = Literal["openai", "gemini", "vllm"]
|
||||
|
||||
|
||||
def cleanup():
|
||||
"""Cleanup function to terminate all bot processes.
|
||||
|
||||
Called during server shutdown.
|
||||
"""
|
||||
for entry in bot_jobs.values():
|
||||
func = modal.FunctionCall.from_id(entry[0])
|
||||
if func:
|
||||
func.cancel()
|
||||
|
||||
|
||||
def get_bot_file(bot_name: BotName) -> str:
|
||||
"""Retrieve the bot file name corresponding to the provided bot_name.
|
||||
|
||||
Args:
|
||||
bot_name (BotName): The name of the bot (e.g., 'openai', 'gemini', 'vllm').
|
||||
|
||||
Returns:
|
||||
str: The file name corresponding to the bot implementation.
|
||||
|
||||
Raises:
|
||||
ValueError: If the bot name is invalid or not supported.
|
||||
"""
|
||||
# bot_implementation = os.getenv("BOT_IMPLEMENTATION", "openai").lower().strip()
|
||||
bot_implementation = bot_name.lower().strip()
|
||||
if not bot_implementation:
|
||||
bot_implementation = "openai"
|
||||
if bot_implementation not in ["openai", "gemini", "vllm"]:
|
||||
raise ValueError(
|
||||
f"Invalid BOT_IMPLEMENTATION: {bot_implementation}. Must be 'openai' or 'gemini' or 'vllm'"
|
||||
)
|
||||
|
||||
return f"bot_{bot_implementation}"
|
||||
|
||||
|
||||
def get_runner(path: str, bot_file: str) -> callable:
|
||||
"""Dynamically import the run_bot function based on the bot name.
|
||||
|
||||
Args:
|
||||
path (str): The path to the bot files (e.g., 'src').
|
||||
bot_file (str): The file name of the bot implementation (e.g., 'openai', 'gemini', 'vllm').
|
||||
|
||||
Returns:
|
||||
function: The run_bot function from the specified bot module.
|
||||
|
||||
Raises:
|
||||
ImportError: If the specified bot module or run_bot function is not found.
|
||||
"""
|
||||
try:
|
||||
# Dynamically construct the module name
|
||||
module_name = f"{path}.{bot_file}"
|
||||
# Import the module
|
||||
module = importlib.import_module(module_name)
|
||||
# Get the run_bot function from the module
|
||||
return getattr(module, "run_bot")
|
||||
except (ImportError, AttributeError) as e:
|
||||
raise ImportError(f"Failed to import run_bot from {module_name}: {e}")
|
||||
|
||||
|
||||
async def create_room_and_token() -> tuple[str, str]:
|
||||
"""Create a Daily room and generate an authentication token.
|
||||
|
||||
This function checks for existing room URL and token in the environment variables.
|
||||
If not found, it creates a new room using the Daily API and generates a token for it.
|
||||
|
||||
Returns:
|
||||
tuple[str, str]: A tuple containing the room URL and the authentication token.
|
||||
|
||||
Raises:
|
||||
HTTPException: If room creation or token generation fails.
|
||||
"""
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRoomParams
|
||||
|
||||
room_url = os.getenv("DAILY_SAMPLE_ROOM_URL", None)
|
||||
token = os.getenv("DAILY_SAMPLE_ROOM_TOKEN", None)
|
||||
if not room_url:
|
||||
room = await daily_helpers["rest"].create_room(DailyRoomParams())
|
||||
if not room.url:
|
||||
raise HTTPException(status_code=500, detail="Failed to create room")
|
||||
room_url = room.url
|
||||
|
||||
token = await daily_helpers["rest"].get_token(room_url)
|
||||
if not token:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room_url}")
|
||||
|
||||
return room_url, token
|
||||
|
||||
|
||||
@app.function(image=bot_image, min_containers=1)
|
||||
async def bot_runner(room_url, token, bot_name: BotName = "openai"):
|
||||
"""Launch the provided bot process, providing the given room URL and token for the bot to join.
|
||||
|
||||
Args:
|
||||
room_url (str): The URL of the Daily room where the bot and client will communicate.
|
||||
token (str): The authentication token for the room.
|
||||
bot_name (BotName): The name of the bot implementation to use. Defaults to "openai".
|
||||
|
||||
Raises:
|
||||
HTTPException: If the bot pipeline fails to start.
|
||||
"""
|
||||
try:
|
||||
path = "src"
|
||||
bot_file = get_bot_file(bot_name)
|
||||
run_bot = get_runner(path, bot_file)
|
||||
|
||||
print(f"Starting bot process: {bot_file} -u {room_url} -t {token}")
|
||||
await run_bot(room_url, token)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to start bot pipeline: {e}")
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""FastAPI lifespan manager that handles startup and shutdown tasks.
|
||||
|
||||
- Creates aiohttp session
|
||||
- Initializes Daily API helper
|
||||
- Cleans up resources on shutdown
|
||||
"""
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
|
||||
|
||||
aiohttp_session = aiohttp.ClientSession()
|
||||
daily_helpers["rest"] = DailyRESTHelper(
|
||||
daily_api_key=os.getenv("DAILY_API_KEY", ""),
|
||||
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||
aiohttp_session=aiohttp_session,
|
||||
)
|
||||
yield
|
||||
await aiohttp_session.close()
|
||||
cleanup()
|
||||
|
||||
|
||||
class ConnectData(BaseModel):
|
||||
"""Data provided by client to specify the bot pipeline.
|
||||
|
||||
Attributes:
|
||||
bot_name (BotName): The name of the bot to connect to. Defaults to "openai".
|
||||
"""
|
||||
|
||||
bot_name: BotName = "openai"
|
||||
|
||||
|
||||
async def start(data: ConnectData):
|
||||
"""Internal method to start a bot agent and return the room URL and token.
|
||||
|
||||
Args:
|
||||
data (ConnectData): The data containing the bot name to use.
|
||||
|
||||
Returns:
|
||||
tuple[str, str]: A tuple containing the room URL and token.
|
||||
"""
|
||||
room_url, token = await create_room_and_token()
|
||||
launch_bot_func = modal.Function.from_name("pipecat-modal", "bot_runner")
|
||||
function_id = launch_bot_func.spawn(room_url, token, data.bot_name)
|
||||
bot_jobs[function_id] = (function_id, room_url)
|
||||
|
||||
return room_url, token
|
||||
|
||||
|
||||
@router.get("/")
|
||||
async def start_agent():
|
||||
"""A user endpoint for launching a bot agent and redirecting to the created room URL.
|
||||
|
||||
This function retrieves the bot implementation from the environment,
|
||||
starts the bot agent, and redirects the user to the room URL to
|
||||
interact with the bot through a Daily Prebuilt Interface.
|
||||
|
||||
Returns:
|
||||
RedirectResponse: A response that redirects to the room URL.
|
||||
"""
|
||||
bot_name = os.getenv("BOT_IMPLEMENTATION", "openai").lower().strip()
|
||||
print(f"Starting bot: {bot_name}")
|
||||
room_url, token = await start(ConnectData(bot_name=bot_name))
|
||||
|
||||
return RedirectResponse(room_url)
|
||||
|
||||
|
||||
@router.post("/connect")
|
||||
async def rtvi_connect(data: ConnectData) -> Dict[Any, Any]:
|
||||
"""A user endpoint for launching a bot agent and retrieving the room/token credentials.
|
||||
|
||||
This function retrieves the bot implementation from the request, if provided,
|
||||
starts the bot agent, and returns the room URL and token for the bot. This allows the
|
||||
client to then connect to the bot using their own RTVI interface.
|
||||
|
||||
Args:
|
||||
data (ConnectData): Optional. The data containing the bot name to use.
|
||||
|
||||
Returns:
|
||||
Dict[Any, Any]: A dictionary containing the room URL and token.
|
||||
"""
|
||||
print(f"Starting bot: {data.bot_name}")
|
||||
if data is None or not data.bot_name:
|
||||
data.bot_name = os.getenv("BOT_IMPLEMENTATION", "openai").lower().strip()
|
||||
room_url, token = await start(data)
|
||||
|
||||
return {"room_url": room_url, "token": token}
|
||||
|
||||
|
||||
@router.get("/status/{fid}")
|
||||
def get_status(fid: str):
|
||||
"""Retrieve the status of a bot process by its function ID.
|
||||
|
||||
Args:
|
||||
fid (str): The function ID of the bot process.
|
||||
|
||||
Returns:
|
||||
JSONResponse: A JSON response containing the bot's status and result code.
|
||||
|
||||
Raises:
|
||||
HTTPException: If the bot process with the given ID is not found.
|
||||
"""
|
||||
func = modal.FunctionCall.from_id(fid)
|
||||
if not func:
|
||||
raise HTTPException(status_code=404, detail=f"Bot with process id: {fid} not found")
|
||||
|
||||
try:
|
||||
result = func.get(timeout=0)
|
||||
return JSONResponse({"bot_id": fid, "status": "finished", "code": result})
|
||||
except modal.exception.OutputExpiredError:
|
||||
return JSONResponse({"bot_id": fid, "status": "finished", "code": 404})
|
||||
except TimeoutError:
|
||||
return JSONResponse({"bot_id": fid, "status": "running", "code": 202})
|
||||
|
||||
|
||||
@app.function(image=web_image, min_containers=1)
|
||||
@modal.concurrent(max_inputs=1)
|
||||
@modal.asgi_app()
|
||||
def fastapi_app():
|
||||
"""Create and configure the FastAPI application.
|
||||
|
||||
This function initializes the FastAPI app with middleware, routes, and lifespan management.
|
||||
It is decorated to be used as a Modal ASGI app.
|
||||
"""
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
# Initialize FastAPI app
|
||||
web_app = FastAPI(lifespan=lifespan)
|
||||
|
||||
web_app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Include the endpoints from this file
|
||||
web_app.include_router(router)
|
||||
|
||||
return web_app
|
||||
@@ -1,14 +0,0 @@
|
||||
DAILY_API_KEY=
|
||||
|
||||
# determines which bot file to default to: 'openai', 'gemini', or 'vllm'
|
||||
BOT_IMPLEMENTATION=openai
|
||||
|
||||
# needed for the openai bot pipeline
|
||||
OPENAI_API_KEY=
|
||||
ELEVENLABS_API_KEY=
|
||||
|
||||
# needed for the gemini live bot pipeline
|
||||
GOOGLE_API_KEY=
|
||||
|
||||
# needed if you modified the API Key for your self-hosted LLM
|
||||
VLLM_API_KEY=
|
||||
@@ -1,3 +0,0 @@
|
||||
python-dotenv==1.0.1
|
||||
modal==1.0.5
|
||||
fastapi[all]
|
||||
|
Before Width: | Height: | Size: 759 KiB |
|
Before Width: | Height: | Size: 884 KiB |
|
Before Width: | Height: | Size: 876 KiB |
|
Before Width: | Height: | Size: 881 KiB |
|
Before Width: | Height: | Size: 866 KiB |
|
Before Width: | Height: | Size: 874 KiB |
|
Before Width: | Height: | Size: 882 KiB |
|
Before Width: | Height: | Size: 885 KiB |
|
Before Width: | Height: | Size: 888 KiB |
|
Before Width: | Height: | Size: 890 KiB |
|
Before Width: | Height: | Size: 898 KiB |
|
Before Width: | Height: | Size: 836 KiB |
|
Before Width: | Height: | Size: 903 KiB |
|
Before Width: | Height: | Size: 908 KiB |
|
Before Width: | Height: | Size: 908 KiB |
|
Before Width: | Height: | Size: 905 KiB |
|
Before Width: | Height: | Size: 903 KiB |
|
Before Width: | Height: | Size: 866 KiB |
|
Before Width: | Height: | Size: 849 KiB |
|
Before Width: | Height: | Size: 866 KiB |
|
Before Width: | Height: | Size: 866 KiB |
|
Before Width: | Height: | Size: 864 KiB |
|
Before Width: | Height: | Size: 858 KiB |
|
Before Width: | Height: | Size: 875 KiB |
|
Before Width: | Height: | Size: 881 KiB |
@@ -1,197 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Gemini Bot Implementation.
|
||||
|
||||
This module implements a chatbot using Google's Gemini Multimodal Live model.
|
||||
It includes:
|
||||
- Real-time audio/video interaction through Daily
|
||||
- Animated robot avatar
|
||||
- Speech-to-speech model
|
||||
|
||||
The bot runs as part of a pipeline that processes audio/video frames and manages
|
||||
the conversation flow using Gemini's streaming capabilities.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from PIL import Image
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import (
|
||||
BotStartedSpeakingFrame,
|
||||
BotStoppedSpeakingFrame,
|
||||
Frame,
|
||||
OutputImageRawFrame,
|
||||
SpriteFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.processors.frameworks.rtvi import RTVIConfig, RTVIObserver, RTVIProcessor
|
||||
from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
try:
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
except ValueError:
|
||||
# Handle the case where logger is already initialized
|
||||
pass
|
||||
|
||||
sprites = []
|
||||
script_dir = os.path.dirname(__file__)
|
||||
|
||||
for i in range(1, 26):
|
||||
# Build the full path to the image file
|
||||
full_path = os.path.join(script_dir, f"assets/robot0{i}.png")
|
||||
# Get the filename without the extension to use as the dictionary key
|
||||
# Open the image and convert it to bytes
|
||||
with Image.open(full_path) as img:
|
||||
sprites.append(OutputImageRawFrame(image=img.tobytes(), size=img.size, format=img.format))
|
||||
|
||||
# Create a smooth animation by adding reversed frames
|
||||
flipped = sprites[::-1]
|
||||
sprites.extend(flipped)
|
||||
|
||||
# Define static and animated states
|
||||
quiet_frame = sprites[0] # Static frame for when bot is listening
|
||||
talking_frame = SpriteFrame(images=sprites) # Animation sequence for when bot is talking
|
||||
|
||||
|
||||
class TalkingAnimation(FrameProcessor):
|
||||
"""Manages the bot's visual animation states.
|
||||
|
||||
Switches between static (listening) and animated (talking) states based on
|
||||
the bot's current speaking status.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self._is_talking = False
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process incoming frames and update animation state.
|
||||
|
||||
Args:
|
||||
frame: The incoming frame to process
|
||||
direction: The direction of frame flow in the pipeline
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
# Switch to talking animation when bot starts speaking
|
||||
if isinstance(frame, BotStartedSpeakingFrame):
|
||||
if not self._is_talking:
|
||||
await self.push_frame(talking_frame)
|
||||
self._is_talking = True
|
||||
# Return to static frame when bot stops speaking
|
||||
elif isinstance(frame, BotStoppedSpeakingFrame):
|
||||
await self.push_frame(quiet_frame)
|
||||
self._is_talking = False
|
||||
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def run_bot(room_url: str, token: str):
|
||||
"""Main bot execution function.
|
||||
|
||||
Sets up and runs the bot pipeline including:
|
||||
- Daily video transport with specific audio parameters
|
||||
- Gemini Live multimodal model integration
|
||||
- Voice activity detection
|
||||
- Animation processing
|
||||
- RTVI event handling
|
||||
"""
|
||||
# Set up Daily transport with specific audio/video parameters for Gemini
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=576,
|
||||
vad_enabled=True,
|
||||
vad_audio_passthrough=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)),
|
||||
),
|
||||
)
|
||||
|
||||
# Initialize the Gemini Multimodal Live model
|
||||
llm = GeminiMultimodalLiveLLMService(
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
voice_id="Puck", # Aoede, Charon, Fenrir, Kore, Puck
|
||||
transcribe_user_audio=True,
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by introducing yourself.",
|
||||
},
|
||||
]
|
||||
|
||||
# Set up conversation context and management
|
||||
# The context_aggregator will automatically collect conversation context
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
ta = TalkingAnimation()
|
||||
|
||||
#
|
||||
# RTVI events for Pipecat client UI
|
||||
#
|
||||
rtvi = RTVIProcessor(config=RTVIConfig(config=[]))
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
rtvi,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
ta,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
observers=[RTVIObserver(rtvi)],
|
||||
)
|
||||
await task.queue_frame(quiet_frame)
|
||||
|
||||
@rtvi.event_handler("on_client_ready")
|
||||
async def on_client_ready(rtvi):
|
||||
await rtvi.set_bot_ready()
|
||||
# Kick off the conversation
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
print(f"Participant left: {participant}")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
@@ -1,225 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""OpenAI Bot Implementation.
|
||||
|
||||
This module implements a chatbot using OpenAI's GPT-4 model for natural language
|
||||
processing. It includes:
|
||||
- Real-time audio/video interaction through Daily
|
||||
- Animated robot avatar
|
||||
- Text-to-speech using ElevenLabs
|
||||
- Support for both English and Spanish
|
||||
|
||||
The bot runs as part of a pipeline that processes audio/video frames and manages
|
||||
the conversation flow.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from PIL import Image
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import (
|
||||
BotStartedSpeakingFrame,
|
||||
BotStoppedSpeakingFrame,
|
||||
Frame,
|
||||
OutputImageRawFrame,
|
||||
SpriteFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.processors.frameworks.rtvi import RTVIConfig, RTVIObserver, RTVIProcessor
|
||||
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
try:
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
except ValueError:
|
||||
# Handle the case where logger is already initialized
|
||||
pass
|
||||
|
||||
sprites = []
|
||||
script_dir = os.path.dirname(__file__)
|
||||
|
||||
# Load sequential animation frames
|
||||
for i in range(1, 26):
|
||||
# Build the full path to the image file
|
||||
full_path = os.path.join(script_dir, f"assets/robot0{i}.png")
|
||||
# Get the filename without the extension to use as the dictionary key
|
||||
# Open the image and convert it to bytes
|
||||
with Image.open(full_path) as img:
|
||||
sprites.append(OutputImageRawFrame(image=img.tobytes(), size=img.size, format=img.format))
|
||||
|
||||
# Create a smooth animation by adding reversed frames
|
||||
flipped = sprites[::-1]
|
||||
sprites.extend(flipped)
|
||||
|
||||
# Define static and animated states
|
||||
quiet_frame = sprites[0] # Static frame for when bot is listening
|
||||
talking_frame = SpriteFrame(images=sprites) # Animation sequence for when bot is talking
|
||||
|
||||
|
||||
class TalkingAnimation(FrameProcessor):
|
||||
"""Manages the bot's visual animation states.
|
||||
|
||||
Switches between static (listening) and animated (talking) states based on
|
||||
the bot's current speaking status.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self._is_talking = False
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process incoming frames and update animation state.
|
||||
|
||||
Args:
|
||||
frame: The incoming frame to process
|
||||
direction: The direction of frame flow in the pipeline
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
# Switch to talking animation when bot starts speaking
|
||||
if isinstance(frame, BotStartedSpeakingFrame):
|
||||
if not self._is_talking:
|
||||
await self.push_frame(talking_frame)
|
||||
self._is_talking = True
|
||||
# Return to static frame when bot stops speaking
|
||||
elif isinstance(frame, BotStoppedSpeakingFrame):
|
||||
await self.push_frame(quiet_frame)
|
||||
self._is_talking = False
|
||||
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def run_bot(room_url: str, token: str):
|
||||
"""Main bot execution function.
|
||||
|
||||
Sets up and runs the bot pipeline including:
|
||||
- Daily video transport
|
||||
- Speech-to-text and text-to-speech services
|
||||
- Language model integration
|
||||
- Animation processing
|
||||
- RTVI event handling
|
||||
"""
|
||||
# Set up Daily transport with video/audio parameters
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=576,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
# transcription_settings=DailyTranscriptionSettings(
|
||||
# language="es",
|
||||
# tier="nova",
|
||||
# model="2-general"
|
||||
# )
|
||||
),
|
||||
)
|
||||
|
||||
# Initialize text-to-speech service
|
||||
tts = ElevenLabsTTSService(
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
#
|
||||
# English
|
||||
#
|
||||
voice_id="SAz9YHcvj6GT2YYXdXww",
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
# model="eleven_multilingual_v2",
|
||||
# voice_id="gD1IexrzCvsXPHUuT0s3",
|
||||
)
|
||||
|
||||
# Initialize LLM service
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
#
|
||||
# English
|
||||
#
|
||||
"content": "You are an incessant one-upper. Start by asking the user how their day is going.",
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
# "content": "Eres Chatbot, un amigable y útil robot. Tu objetivo es demostrar tus capacidades de una manera breve. Tus respuestas se convertiran a audio así que nunca no debes incluir caracteres especiales. Contesta a lo que el usuario pregunte de una manera creativa, útil y breve. Empieza por presentarte a ti mismo.",
|
||||
},
|
||||
]
|
||||
|
||||
# Set up conversation context and management
|
||||
# The context_aggregator will automatically collect conversation context
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
ta = TalkingAnimation()
|
||||
|
||||
#
|
||||
# RTVI events for Pipecat client UI
|
||||
#
|
||||
rtvi = RTVIProcessor(config=RTVIConfig(config=[]))
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
rtvi,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
ta,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
observers=[RTVIObserver(rtvi)],
|
||||
)
|
||||
await task.queue_frame(quiet_frame)
|
||||
|
||||
@rtvi.event_handler("on_client_ready")
|
||||
async def on_client_ready(rtvi):
|
||||
await rtvi.set_bot_ready()
|
||||
# Kick off the conversation
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
print(f"Participant left: {participant}")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
@@ -1,238 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""OpenAI Bot Implementation.
|
||||
|
||||
This module implements a chatbot using OpenAI's GPT-4 model for natural language
|
||||
processing. It includes:
|
||||
- Real-time audio/video interaction through Daily
|
||||
- Animated robot avatar
|
||||
- Text-to-speech using ElevenLabs
|
||||
- Support for both English and Spanish
|
||||
|
||||
The bot runs as part of a pipeline that processes audio/video frames and manages
|
||||
the conversation flow.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from typing import List
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from openai.types.chat import ChatCompletionMessageParam
|
||||
from PIL import Image
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import (
|
||||
BotStartedSpeakingFrame,
|
||||
BotStoppedSpeakingFrame,
|
||||
Frame,
|
||||
OutputImageRawFrame,
|
||||
SpriteFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.processors.frameworks.rtvi import RTVIConfig, RTVIObserver, RTVIProcessor
|
||||
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
try:
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
except ValueError:
|
||||
# Handle the case where logger is already initialized
|
||||
pass
|
||||
|
||||
# REPLACE WITH YOUR MODAL URL ENDPOINT
|
||||
modal_url = "https://<Modal workspace>--example-vllm-openai-compatible-serve.modal.run"
|
||||
api_key = os.getenv("VLLM_API_KEY", "super-secret-key")
|
||||
|
||||
|
||||
sprites = []
|
||||
script_dir = os.path.dirname(__file__)
|
||||
|
||||
# Load sequential animation frames
|
||||
for i in range(1, 26):
|
||||
# Build the full path to the image file
|
||||
full_path = os.path.join(script_dir, f"assets/robot0{i}.png")
|
||||
# Get the filename without the extension to use as the dictionary key
|
||||
# Open the image and convert it to bytes
|
||||
with Image.open(full_path) as img:
|
||||
sprites.append(OutputImageRawFrame(image=img.tobytes(), size=img.size, format=img.format))
|
||||
|
||||
# Create a smooth animation by adding reversed frames
|
||||
flipped = sprites[::-1]
|
||||
sprites.extend(flipped)
|
||||
|
||||
# Define static and animated states
|
||||
quiet_frame = sprites[0] # Static frame for when bot is listening
|
||||
talking_frame = SpriteFrame(images=sprites) # Animation sequence for when bot is talking
|
||||
|
||||
|
||||
class TalkingAnimation(FrameProcessor):
|
||||
"""Manages the bot's visual animation states.
|
||||
|
||||
Switches between static (listening) and animated (talking) states based on
|
||||
the bot's current speaking status.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self._is_talking = False
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process incoming frames and update animation state.
|
||||
|
||||
Args:
|
||||
frame: The incoming frame to process
|
||||
direction: The direction of frame flow in the pipeline
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
# Switch to talking animation when bot starts speaking
|
||||
if isinstance(frame, BotStartedSpeakingFrame):
|
||||
if not self._is_talking:
|
||||
await self.push_frame(talking_frame)
|
||||
self._is_talking = True
|
||||
# Return to static frame when bot stops speaking
|
||||
elif isinstance(frame, BotStoppedSpeakingFrame):
|
||||
await self.push_frame(quiet_frame)
|
||||
self._is_talking = False
|
||||
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def run_bot(room_url: str, token: str):
|
||||
"""Main bot execution function.
|
||||
|
||||
Sets up and runs the bot pipeline including:
|
||||
- Daily video transport
|
||||
- Speech-to-text and text-to-speech services
|
||||
- Language model integration
|
||||
- Animation processing
|
||||
- RTVI event handling
|
||||
"""
|
||||
# Set up Daily transport with video/audio parameters
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=576,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
# transcription_settings=DailyTranscriptionSettings(
|
||||
# language="es",
|
||||
# tier="nova",
|
||||
# model="2-general"
|
||||
# )
|
||||
),
|
||||
)
|
||||
|
||||
# Initialize text-to-speech service
|
||||
tts = ElevenLabsTTSService(
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
#
|
||||
# English
|
||||
#
|
||||
voice_id="D38z5RcWu1voky8WS1ja",
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
# model="eleven_multilingual_v2",
|
||||
# voice_id="gD1IexrzCvsXPHUuT0s3",
|
||||
)
|
||||
|
||||
# Initialize LLM service
|
||||
llm = OpenAILLMService(
|
||||
# To use OpenAI
|
||||
api_key=api_key,
|
||||
# Or, to use a local vLLM (or similar) api server
|
||||
model="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
|
||||
base_url=f"{modal_url}/v1",
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
#
|
||||
# English
|
||||
#
|
||||
"content": "You are a salesman for Modal, the cloud-native serverless Python computing platform.",
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
# "content": "Eres Chatbot, un amigable y útil robot. Tu objetivo es demostrar tus capacidades de una manera breve. Tus respuestas se convertiran a audio así que nunca no debes incluir caracteres especiales. Contesta a lo que el usuario pregunte de una manera creativa, útil y breve. Empieza por presentarte a ti mismo.",
|
||||
},
|
||||
]
|
||||
|
||||
# Set up conversation context and management
|
||||
# The context_aggregator will automatically collect conversation context
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
ta = TalkingAnimation()
|
||||
|
||||
#
|
||||
# RTVI events for Pipecat client UI
|
||||
#
|
||||
rtvi = RTVIProcessor(config=RTVIConfig(config=[]))
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
rtvi,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
ta,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
observers=[RTVIObserver(rtvi)],
|
||||
)
|
||||
await task.queue_frame(quiet_frame)
|
||||
|
||||
@rtvi.event_handler("on_client_ready")
|
||||
async def on_client_ready(rtvi):
|
||||
await rtvi.set_bot_ready()
|
||||
# Kick off the conversation
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
print(f"Participant left: {participant}")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
@@ -1,84 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import importlib
|
||||
import os
|
||||
|
||||
|
||||
def get_bot_file(arg_bot: str | None) -> str:
|
||||
bot_implementation = arg_bot or os.getenv("BOT_IMPLEMENTATION", "openai").lower().strip()
|
||||
if not bot_implementation:
|
||||
bot_implementation = "openai"
|
||||
if bot_implementation not in ["openai", "gemini", "vllm"]:
|
||||
raise ValueError(
|
||||
f"Invalid BOT_IMPLEMENTATION: {bot_implementation}. Must be 'openai' or 'gemini'"
|
||||
)
|
||||
return f"bot_{bot_implementation}"
|
||||
|
||||
|
||||
def get_runner(bot_file: str):
|
||||
"""Dynamically import the run_bot function based on the bot name.
|
||||
|
||||
Args:
|
||||
bot_name (str): The name of the bot implementation (e.g., 'openai', 'gemini').
|
||||
|
||||
Returns:
|
||||
function: The run_bot function from the specified bot module.
|
||||
|
||||
Raises:
|
||||
ImportError: If the specified bot module or run_bot function is not found.
|
||||
"""
|
||||
try:
|
||||
# Dynamically construct the module name
|
||||
module_name = f"{bot_file}"
|
||||
# Import the module
|
||||
module = importlib.import_module(module_name)
|
||||
# Get the run_bot function from the module
|
||||
return getattr(module, "run_bot")
|
||||
except (ImportError, AttributeError) as e:
|
||||
raise ImportError(f"Failed to import run_bot from {module_name}: {e}")
|
||||
|
||||
|
||||
def main():
|
||||
"""Parse the args to launch the appropriate bot using the given room/token."""
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
parser.add_argument(
|
||||
"-u", "--url", type=str, required=False, help="URL of the Daily room to join"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-t",
|
||||
"--token",
|
||||
type=str,
|
||||
required=False,
|
||||
help="Daily room token",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-b",
|
||||
"--bot",
|
||||
type=str,
|
||||
required=False,
|
||||
help="Bot runner to use (e.g., openai, gemini)",
|
||||
)
|
||||
|
||||
args, unknown = parser.parse_known_args()
|
||||
|
||||
url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL")
|
||||
token = args.token or os.getenv("DAILY_SAMPLE_ROOM_TOKEN")
|
||||
bot_file = get_bot_file(args.bot)
|
||||
|
||||
if not url:
|
||||
raise Exception(
|
||||
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL."
|
||||
)
|
||||
|
||||
run_bot = get_runner(bot_file)
|
||||
asyncio.run(run_bot(url, token))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -100,28 +100,7 @@ phone numbers with valid values for your use case.
|
||||
|
||||
### Dialin Request
|
||||
|
||||
The server will receive a request when a call is received from Daily.
|
||||
The payload that the webhook received is as follows:
|
||||
```json
|
||||
{
|
||||
// for dial-in from webhook
|
||||
"To": "+14152251493",
|
||||
"From": "+14158483432",
|
||||
"callId": "string-contains-uuid",
|
||||
"callDomain": "string-contains-uuid",
|
||||
"sipHeaders": {
|
||||
"X-My-Custom-Header": "value",
|
||||
"x-caller": "+1234567890",
|
||||
"x-called": "+1987654321",
|
||||
},
|
||||
}
|
||||
```
|
||||
The `To`, `From`, `callId`, `callDomain` fields are converted to
|
||||
`snake_case` and mapped to `dialin_settings`. In addition, `sipHeader`
|
||||
contains any custom SIP headers received by Daily on the SIP
|
||||
interconnect address (`sip_uri`). These are headers sent from
|
||||
Twilio or other external SIP platforms, for example, to send the
|
||||
caller's phone number.
|
||||
The server will receive a request when a call is received from Daily.
|
||||
|
||||
### Dialout Request
|
||||
|
||||
@@ -179,7 +158,6 @@ curl -X POST http://localhost:3000/api/dial \
|
||||
"From": "+1987654321",
|
||||
"callId": "call-uuid-123",
|
||||
"callDomain": "domain-uuid-456",
|
||||
"sipHeader": {},
|
||||
"dialout_settings": [
|
||||
{
|
||||
"phoneNumber": "+1234567890",
|
||||
|
||||
@@ -39,11 +39,6 @@ class RoomRequest(BaseModel):
|
||||
None, description="A flag to perform voicemail or answeing-machine detection"
|
||||
)
|
||||
call_transfer: Optional[Dict[str, Any]] = Field(None, description="to initiate a call transfer")
|
||||
sipHeaders: Optional[Dict[str, Any]] = Field(
|
||||
None,
|
||||
alias="sip_headers",
|
||||
description="Custom SIP headers received from the external SIP provider",
|
||||
)
|
||||
|
||||
class Config:
|
||||
populate_by_name = True
|
||||
@@ -62,14 +57,6 @@ class RoomRequest(BaseModel):
|
||||
"callDomain": "string-contains-uuid"
|
||||
These need to be remapped to dialin_settings
|
||||
|
||||
In addition, we may receive in the body that can be
|
||||
sent to the bot as a custom field, sip_headers
|
||||
"sipHeaders": {
|
||||
"X-My-Custom-Header": "value",
|
||||
"x-caller": "+14158483432",
|
||||
"x-called": "+14152251493",
|
||||
},
|
||||
|
||||
"dialout_settings": [
|
||||
{"phoneNumber": "+14158483432", "callerId": "+14152251493"},
|
||||
{"sipUri": "sip:username@sip.hostname"}
|
||||
@@ -170,7 +157,6 @@ async def dial(request: RoomRequest, raw_request: Request):
|
||||
"dialout_settings": request.dialout_settings,
|
||||
"voicemail_detection": request.voicemail_detection,
|
||||
"call_transfer": request.call_transfer,
|
||||
"sip_headers": request.sipHeaders, # passing the SIP headers to the bot
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
"name": "my-daily-app",
|
||||
"version": "0.1.0",
|
||||
"dependencies": {
|
||||
"axios": "^1.11.0",
|
||||
"axios": "^1.6.0",
|
||||
"next": "^14.0.0",
|
||||
"pino": "^8.15.0",
|
||||
"react": "^18.2.0",
|
||||
@@ -215,9 +215,10 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/env": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.30.tgz",
|
||||
"integrity": "sha512-KBiBKrDY6kxTQWGzKjQB7QirL3PiiOkV7KW98leHFjtVRKtft76Ra5qSA/SL75xT44dp6hOcqiiJ6iievLOYug=="
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.26.tgz",
|
||||
"integrity": "sha512-vO//GJ/YBco+H7xdQhzJxF7ub3SUwft76jwaeOyVVQFHCi5DCnkP16WHB+JBylo4vOKPoZBlR94Z8xBxNBdNJA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@next/eslint-plugin-next": {
|
||||
"version": "14.2.25",
|
||||
@@ -230,12 +231,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-darwin-arm64": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-14.2.30.tgz",
|
||||
"integrity": "sha512-EAqfOTb3bTGh9+ewpO/jC59uACadRHM6TSA9DdxJB/6gxOpyV+zrbqeXiFTDy9uV6bmipFDkfpAskeaDcO+7/g==",
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-14.2.26.tgz",
|
||||
"integrity": "sha512-zDJY8gsKEseGAxG+C2hTMT0w9Nk9N1Sk1qV7vXYz9MEiyRoF5ogQX2+vplyUMIfygnjn9/A04I6yrUTRTuRiyQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
@@ -245,12 +247,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-darwin-x64": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-14.2.30.tgz",
|
||||
"integrity": "sha512-TyO7Wz1IKE2kGv8dwQ0bmPL3s44EKVencOqwIY69myoS3rdpO1NPg5xPM5ymKu7nfX4oYJrpMxv8G9iqLsnL4A==",
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-14.2.26.tgz",
|
||||
"integrity": "sha512-U0adH5ryLfmTDkahLwG9sUQG2L0a9rYux8crQeC92rPhi3jGQEY47nByQHrVrt3prZigadwj/2HZ1LUUimuSbg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
@@ -260,12 +263,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-arm64-gnu": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-14.2.30.tgz",
|
||||
"integrity": "sha512-I5lg1fgPJ7I5dk6mr3qCH1hJYKJu1FsfKSiTKoYwcuUf53HWTrEkwmMI0t5ojFKeA6Vu+SfT2zVy5NS0QLXV4Q==",
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-14.2.26.tgz",
|
||||
"integrity": "sha512-SINMl1I7UhfHGM7SoRiw0AbwnLEMUnJ/3XXVmhyptzriHbWvPPbbm0OEVG24uUKhuS1t0nvN/DBvm5kz6ZIqpg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
@@ -275,12 +279,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-arm64-musl": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-14.2.30.tgz",
|
||||
"integrity": "sha512-8GkNA+sLclQyxgzCDs2/2GSwBc92QLMrmYAmoP2xehe5MUKBLB2cgo34Yu242L1siSkwQkiV4YLdCnjwc/Micw==",
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-14.2.26.tgz",
|
||||
"integrity": "sha512-s6JaezoyJK2DxrwHWxLWtJKlqKqTdi/zaYigDXUJ/gmx/72CrzdVZfMvUc6VqnZ7YEvRijvYo+0o4Z9DencduA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
@@ -290,12 +295,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-x64-gnu": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-14.2.30.tgz",
|
||||
"integrity": "sha512-8Ly7okjssLuBoe8qaRCcjGtcMsv79hwzn/63wNeIkzJVFVX06h5S737XNr7DZwlsbTBDOyI6qbL2BJB5n6TV/w==",
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-14.2.26.tgz",
|
||||
"integrity": "sha512-FEXeUQi8/pLr/XI0hKbe0tgbLmHFRhgXOUiPScz2hk0hSmbGiU8aUqVslj/6C6KA38RzXnWoJXo4FMo6aBxjzg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
@@ -305,12 +311,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-x64-musl": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-14.2.30.tgz",
|
||||
"integrity": "sha512-dBmV1lLNeX4mR7uI7KNVHsGQU+OgTG5RGFPi3tBJpsKPvOPtg9poyav/BYWrB3GPQL4dW5YGGgalwZ79WukbKQ==",
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-14.2.26.tgz",
|
||||
"integrity": "sha512-BUsomaO4d2DuXhXhgQCVt2jjX4B4/Thts8nDoIruEJkhE5ifeQFtvW5c9JkdOtYvE5p2G0hcwQ0UbRaQmQwaVg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
@@ -320,12 +327,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-arm64-msvc": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-14.2.30.tgz",
|
||||
"integrity": "sha512-6MMHi2Qc1Gkq+4YLXAgbYslE1f9zMGBikKMdmQRHXjkGPot1JY3n5/Qrbg40Uvbi8//wYnydPnyvNhI1DMUW1g==",
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-14.2.26.tgz",
|
||||
"integrity": "sha512-5auwsMVzT7wbB2CZXQxDctpWbdEnEW/e66DyXO1DcgHxIyhP06awu+rHKshZE+lPLIGiwtjo7bsyeuubewwxMw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
@@ -335,12 +343,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-ia32-msvc": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-14.2.30.tgz",
|
||||
"integrity": "sha512-pVZMnFok5qEX4RT59mK2hEVtJX+XFfak+/rjHpyFh7juiT52r177bfFKhnlafm0UOSldhXjj32b+LZIOdswGTg==",
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-14.2.26.tgz",
|
||||
"integrity": "sha512-GQWg/Vbz9zUGi9X80lOeGsz1rMH/MtFO/XqigDznhhhTfDlDoynCM6982mPCbSlxJ/aveZcKtTlwfAjwhyxDpg==",
|
||||
"cpu": [
|
||||
"ia32"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
@@ -350,12 +359,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-x64-msvc": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-14.2.30.tgz",
|
||||
"integrity": "sha512-4KCo8hMZXMjpTzs3HOqOGYYwAXymXIy7PEPAXNEcEOyKqkjiDlECumrWziy+JEF0Oi4ILHGxzgQ3YiMGG2t/Lg==",
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-14.2.26.tgz",
|
||||
"integrity": "sha512-2rdB3T1/Gp7bv1eQTTm9d1Y1sv9UuJ2LAwOE0Pe2prHKe32UNscj7YS13fRB37d0GAiGNR+Y7ZcW8YjDI8Ns0w==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
@@ -610,10 +620,11 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@typescript-eslint/typescript-estree/node_modules/brace-expansion": {
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
|
||||
"integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
||||
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0"
|
||||
}
|
||||
@@ -1165,13 +1176,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/axios": {
|
||||
"version": "1.11.0",
|
||||
"resolved": "https://registry.npmjs.org/axios/-/axios-1.11.0.tgz",
|
||||
"integrity": "sha512-1Lx3WLFQWm3ooKDYZD1eXmoGO9fxYQjrycfHFC8P0sCfQVXyROp0p9PFWBehewBOdCwHc+f/b8I0fMto5eSfwA==",
|
||||
"version": "1.8.4",
|
||||
"resolved": "https://registry.npmjs.org/axios/-/axios-1.8.4.tgz",
|
||||
"integrity": "sha512-eBSYY4Y68NNlHbHBMdeDmKNtDgXWhQsJcGqzO3iLUM0GraQFSS9cVgPX5I9b3lbdFKyYoAEGAZF1DwhTaljNAw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"follow-redirects": "^1.15.6",
|
||||
"form-data": "^4.0.4",
|
||||
"form-data": "^4.0.0",
|
||||
"proxy-from-env": "^1.1.0"
|
||||
}
|
||||
},
|
||||
@@ -1213,10 +1224,11 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/brace-expansion": {
|
||||
"version": "1.1.12",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
|
||||
"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
|
||||
"version": "1.1.11",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
|
||||
"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0",
|
||||
"concat-map": "0.0.1"
|
||||
@@ -2436,15 +2448,14 @@
|
||||
}
|
||||
},
|
||||
"node_modules/form-data": {
|
||||
"version": "4.0.4",
|
||||
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.4.tgz",
|
||||
"integrity": "sha512-KrGhL9Q4zjj0kiUt5OO4Mr/A/jlI2jDYs5eHBpYHPcBEVSiipAvn2Ko2HnPe20rmcuuvMHNdZFp+4IlGTMF0Ow==",
|
||||
"version": "4.0.2",
|
||||
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.2.tgz",
|
||||
"integrity": "sha512-hGfm/slu0ZabnNt4oaRZ6uREyfCj6P4fT/n6A1rGV+Z0VdGXjfOhVUpkn6qVQONHGIFwmveGXyDs75+nr6FM8w==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"asynckit": "^0.4.0",
|
||||
"combined-stream": "^1.0.8",
|
||||
"es-set-tostringtag": "^2.1.0",
|
||||
"hasown": "^2.0.2",
|
||||
"mime-types": "^2.1.12"
|
||||
},
|
||||
"engines": {
|
||||
@@ -2603,10 +2614,11 @@
|
||||
}
|
||||
},
|
||||
"node_modules/glob/node_modules/brace-expansion": {
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
|
||||
"integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
||||
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0"
|
||||
}
|
||||
@@ -3601,11 +3613,12 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/next": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/next/-/next-14.2.30.tgz",
|
||||
"integrity": "sha512-+COdu6HQrHHFQ1S/8BBsCag61jZacmvbuL2avHvQFbWa2Ox7bE+d8FyNgxRLjXQ5wtPyQwEmk85js/AuaG2Sbg==",
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/next/-/next-14.2.26.tgz",
|
||||
"integrity": "sha512-b81XSLihMwCfwiUVRRja3LphLo4uBBMZEzBBWMaISbKTwOmq3wPknIETy/8000tr7Gq4WmbuFYPS7jOYIf+ZJw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@next/env": "14.2.30",
|
||||
"@next/env": "14.2.26",
|
||||
"@swc/helpers": "0.5.5",
|
||||
"busboy": "1.6.0",
|
||||
"caniuse-lite": "^1.0.30001579",
|
||||
@@ -3620,15 +3633,15 @@
|
||||
"node": ">=18.17.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@next/swc-darwin-arm64": "14.2.30",
|
||||
"@next/swc-darwin-x64": "14.2.30",
|
||||
"@next/swc-linux-arm64-gnu": "14.2.30",
|
||||
"@next/swc-linux-arm64-musl": "14.2.30",
|
||||
"@next/swc-linux-x64-gnu": "14.2.30",
|
||||
"@next/swc-linux-x64-musl": "14.2.30",
|
||||
"@next/swc-win32-arm64-msvc": "14.2.30",
|
||||
"@next/swc-win32-ia32-msvc": "14.2.30",
|
||||
"@next/swc-win32-x64-msvc": "14.2.30"
|
||||
"@next/swc-darwin-arm64": "14.2.26",
|
||||
"@next/swc-darwin-x64": "14.2.26",
|
||||
"@next/swc-linux-arm64-gnu": "14.2.26",
|
||||
"@next/swc-linux-arm64-musl": "14.2.26",
|
||||
"@next/swc-linux-x64-gnu": "14.2.26",
|
||||
"@next/swc-linux-x64-musl": "14.2.26",
|
||||
"@next/swc-win32-arm64-msvc": "14.2.26",
|
||||
"@next/swc-win32-ia32-msvc": "14.2.26",
|
||||
"@next/swc-win32-x64-msvc": "14.2.26"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@opentelemetry/api": "^1.1.0",
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"lint": "next lint"
|
||||
},
|
||||
"dependencies": {
|
||||
"axios": "^1.11.0",
|
||||
"axios": "^1.6.0",
|
||||
"next": "^14.0.0",
|
||||
"pino": "^8.15.0",
|
||||
"react": "^18.2.0",
|
||||
|
||||
@@ -65,7 +65,6 @@ export default async function handler(req, res) {
|
||||
From,
|
||||
callId,
|
||||
callDomain,
|
||||
sipHeaders,
|
||||
dialout_settings,
|
||||
voicemail_detection,
|
||||
call_transfer
|
||||
@@ -103,7 +102,7 @@ export default async function handler(req, res) {
|
||||
const sip_config = {
|
||||
display_name: From,
|
||||
sip_mode: 'dial-in',
|
||||
num_endpoints: (call_transfer !== undefined && call_transfer !== null) ? 2 : 1,
|
||||
num_endpoints: call_transfer !== null ? 2 : 1,
|
||||
codecs: {"audio": ["OPUS"]},
|
||||
};
|
||||
daily_room_properties.sip = sip_config;
|
||||
@@ -118,7 +117,6 @@ export default async function handler(req, res) {
|
||||
dialout_settings,
|
||||
voicemail_detection,
|
||||
call_transfer,
|
||||
sip_headers: sipHeaders,
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
import aiohttp
|
||||
@@ -22,23 +21,44 @@ from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
# Check if we're in local development mode
|
||||
LOCAL_RUN = os.getenv("LOCAL_RUN")
|
||||
if LOCAL_RUN:
|
||||
import asyncio
|
||||
import webbrowser
|
||||
|
||||
try:
|
||||
from local_runner import configure
|
||||
except ImportError:
|
||||
logger.error("Could not import local_runner module. Local development mode may not work.")
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv(override=True)
|
||||
|
||||
# Check if we're in local development mode
|
||||
LOCAL_RUN = os.getenv("LOCAL_RUN")
|
||||
|
||||
|
||||
async def main(transport: DailyTransport):
|
||||
async def main(room_url: str, token: str):
|
||||
"""Main pipeline setup and execution function.
|
||||
|
||||
Args:
|
||||
transport: The DailyTransport object for the bot
|
||||
room_url: The Daily room URL
|
||||
token: The Daily room token
|
||||
"""
|
||||
logger.debug("Starting bot")
|
||||
logger.debug("Starting bot in room: {}", room_url)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"), voice_id="71a7ad14-091c-4e8e-a314-022ece01c121"
|
||||
api_key=os.getenv("CARTESIA_API_KEY"), voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22"
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
@@ -67,8 +87,10 @@ async def main(transport: DailyTransport):
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -90,7 +112,7 @@ async def main(transport: DailyTransport):
|
||||
logger.info("Participant left: {}", participant)
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=False, force_gc=True)
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
@@ -104,25 +126,10 @@ async def bot(args: DailySessionArguments):
|
||||
body: The configuration object from the request body
|
||||
session_id: The session ID for logging
|
||||
"""
|
||||
from pipecat.audio.filters.krisp_filter import KrispFilter
|
||||
|
||||
logger.info(f"Bot process initialized {args.room_url} {args.token}")
|
||||
|
||||
transport = DailyTransport(
|
||||
args.room_url,
|
||||
args.token,
|
||||
"Pipecat Bot",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_in_filter=None if LOCAL_RUN else KrispFilter(),
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
try:
|
||||
await main(transport)
|
||||
await main(args.room_url, args.token)
|
||||
logger.info("Bot process completed")
|
||||
except Exception as e:
|
||||
logger.exception(f"Error in bot process: {str(e)}")
|
||||
@@ -130,27 +137,18 @@ async def bot(args: DailySessionArguments):
|
||||
|
||||
|
||||
# Local development functions
|
||||
async def local_daily():
|
||||
async def local_main():
|
||||
"""Function for local development testing."""
|
||||
from local_runner import configure
|
||||
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Pipecat Bot",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
await main(transport)
|
||||
|
||||
logger.warning("_")
|
||||
logger.warning("_")
|
||||
logger.warning(f"Talk to your voice agent here: {room_url}")
|
||||
logger.warning("_")
|
||||
logger.warning("_")
|
||||
webbrowser.open(room_url)
|
||||
await main(room_url, token)
|
||||
except Exception as e:
|
||||
logger.exception(f"Error in local development mode: {e}")
|
||||
|
||||
@@ -158,6 +156,6 @@ async def local_daily():
|
||||
# Local development entry point
|
||||
if LOCAL_RUN and __name__ == "__main__":
|
||||
try:
|
||||
asyncio.run(local_daily())
|
||||
asyncio.run(local_main())
|
||||
except Exception as e:
|
||||
logger.exception(f"Failed to run in local mode: {e}")
|
||||
|
||||
@@ -1,4 +1,2 @@
|
||||
CARTESIA_API_KEY=
|
||||
OPENAI_API_KEY=
|
||||
# Local dev only
|
||||
DAILY_API_KEY=
|
||||
OPENAI_API_KEY=
|
||||
@@ -7,7 +7,6 @@
|
||||
import os
|
||||
|
||||
import aiohttp
|
||||
from fastapi import HTTPException
|
||||
|
||||
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomParams
|
||||
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
agent_name = "my-first-agent"
|
||||
image = "your-username/my-first-agent:0.1"
|
||||
image_credentials = "your-dockerhub-creds"
|
||||
secret_set = "my-first-agent-secrets"
|
||||
enable_krisp = true
|
||||
|
||||
[scaling]
|
||||
min_instances = 0
|
||||
|
||||