Compare commits

..

1 Commits

Author SHA1 Message Date
Mark Backman
4249abc849 Fix missing await in simple chatbot example 2024-11-15 21:24:07 -05:00
566 changed files with 5707 additions and 44752 deletions

View File

@@ -1,48 +0,0 @@
name: android
on:
push:
branches:
- main
paths:
- "examples/simple-chatbot/client/android/**"
pull_request:
branches:
- "**"
paths:
- "examples/simple-chatbot/client/android/**"
workflow_dispatch:
inputs:
sdk_git_ref:
type: string
description: "Which git ref of the app to build"
concurrency:
group: build-android-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
sdk:
name: "Simple chatbot demo"
runs-on: ubuntu-latest
steps:
- name: Checkout repo
uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.sdk_git_ref || github.ref }}
- name: "Install Java"
uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: '17'
- name: Build demo app
working-directory: examples/simple-chatbot/client/android
run: ./gradlew :simple-chatbot-client:assembleDebug
- name: Upload demo APK
uses: actions/upload-artifact@v4
with:
name: Simple Chatbot Android Client
path: examples/simple-chatbot/client/android/simple-chatbot-client/build/outputs/apk/debug/simple-chatbot-client-debug.apk

View File

@@ -1,54 +0,0 @@
name: coverage
on:
workflow_dispatch:
push:
branches:
- main
pull_request:
branches:
- "**"
paths-ignore:
- "docs/**"
jobs:
coverage:
name: "Coverage"
runs-on: ubuntu-latest
steps:
- name: Checkout repo
uses: actions/checkout@v4
- name: Set up Python
id: setup_python
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Cache virtual environment
uses: actions/cache@v3
with:
# We are hashing dev-requirements.txt and test-requirements.txt which
# contain all dependencies needed to run the tests.
key: venv-${{ runner.os }}-${{ steps.setup_python.outputs.python-version}}-${{ hashFiles('dev-requirements.txt') }}-${{ hashFiles('test-requirements.txt') }}
path: .venv
- name: Install system packages
id: install_system_packages
run: |
sudo apt-get install -y portaudio19-dev
- name: Setup virtual environment
run: |
python -m venv .venv
- name: Install basic Python dependencies
run: |
source .venv/bin/activate
python -m pip install --upgrade pip
pip install -r dev-requirements.txt -r test-requirements.txt
- name: Run tests with coverage
run: |
source .venv/bin/activate
coverage run
coverage xml
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v5
with:
token: ${{ secrets.CODECOV_TOKEN }}
slug: pipecat-ai/pipecat

View File

@@ -35,12 +35,7 @@ jobs:
python -m pip install --upgrade pip
pip install -r dev-requirements.txt
- name: Ruff formatter
id: ruff-format
id: ruff
run: |
source .venv/bin/activate
ruff format --diff
- name: Ruff import linter
id: ruff-check
run: |
source .venv/bin/activate
ruff check --select I

View File

@@ -1,4 +1,4 @@
name: tests
name: test
on:
workflow_dispatch:
@@ -49,4 +49,4 @@ jobs:
- name: Test with pytest
run: |
source .venv/bin/activate
pytest
pytest --ignore-glob="*to_be_updated*" --ignore-glob=*pipeline_source* src tests

9
.gitignore vendored
View File

@@ -28,11 +28,4 @@ share/python-wheels/
MANIFEST
.DS_Store
.env
fly.toml
# Example files
pipecat/examples/twilio-chatbot/templates/streams.xml
# Documentation
docs/api/_build/
docs/api/api
fly.toml

View File

@@ -1,7 +0,0 @@
repos:
- repo: local
hooks:
- id: ruff-format-hook
name: Check ruff formatting
entry: sh scripts/pre-commit.sh
language: system

View File

@@ -1,36 +0,0 @@
version: 2
build:
os: ubuntu-22.04
tools:
python: '3.12'
apt_packages:
- portaudio19-dev
- python3-dev
- libasound2-dev
jobs:
pre_build:
- python -m pip install --upgrade pip
- pip install wheel setuptools
post_build:
- echo "Build completed"
sphinx:
configuration: docs/api/conf.py
fail_on_warning: false
python:
install:
- requirements: docs/api/requirements.txt
- method: pip
path: .
search:
ranking:
api/*: 5
getting-started/*: 4
guides/*: 3
submodules:
include: all
recursive: true

View File

@@ -5,768 +5,18 @@ All notable changes to **Pipecat** will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [0.0.57] - 2025-02-14
## Unreleased
### Added
- Added new `AudioContextWordTTSService`. This is a TTS base class for TTS
services that handling multiple separate audio requests.
- Added new frames `EmulateUserStartedSpeakingFrame` and
`EmulateUserStoppedSpeakingFrame` which can be used to emulated VAD behavior
without VAD being present or not being triggered.
- Added a new `audio_in_stream_on_start` field to `TransportParams`.
- Added a new method `start_audio_in_streaming` in the `BaseInputTransport`.
- This method should be used to start receiving the input audio in case the
field `audio_in_stream_on_start` is set to `false`.
- Added support for the `RTVIProcessor` to handle buffered audio in `base64`
format, converting it into InputAudioRawFrame for transport.
- Added support for the `RTVIProcessor` to trigger `start_audio_in_streaming`
only after the `client-ready` message.
- Added new `MUTE_UNTIL_FIRST_BOT_COMPLETE` strategy to `STTMuteStrategy`. This
strategy starts muted and remains muted until the first bot speech completes,
ensuring the bot's first response cannot be interrupted. This complements the
existing `FIRST_SPEECH` strategy which only mutes during the first detected
bot speech.
- Added support for Google Cloud Speech-to-Text V2 through `GoogleSTTService`.
- Added `RimeTTSService`, a new `WordTTSService`. Updated the foundational
example `07q-interruptible-rime.py` to use `RimeTTSService`.
- Added support for Groq's Whisper API through the new `GroqSTTService` and
OpenAI's Whisper API through the new `OpenAISTTService`. Introduced a new
base class `BaseWhisperSTTService` to handle common Whisper API
functionality.
- Added `PerplexityLLMService` for Perplexity NIM API integration, with an
OpenAI-compatible interface. Also, added foundational example
`14n-function-calling-perplexity.py`.
- Added `DailyTransport.update_remote_participants()`. This allows you to update
remote participant's settings, like their permissions or which of their
devices are enabled. Requires that the local participant have participant
admin permission.
### Changed
- We don't consider a colon `:` and end of sentence any more.
- Updated `DailyTransport` to respect the `audio_in_stream_on_start` field,
ensuring it only starts receiving the audio input if it is enabled.
- Updated `FastAPIWebsocketOutputTransport` to send `TransportMessageFrame` and
`TransportMessageUrgentFrame` to the serializer.
- Updated `WebsocketServerOutputTransport` to send `TransportMessageFrame` and
`TransportMessageUrgentFrame` to the serializer.
- Enhanced `STTMuteConfig` to validate strategy combinations, preventing
`MUTE_UNTIL_FIRST_BOT_COMPLETE` and `FIRST_SPEECH` from being used together
as they handle first bot speech differently.
- Updated foundational example `07n-interruptible-google.py` to use all Google
services.
- `RimeHttpTTSService` now uses the `mistv2` model by default.
- Improved error handling in `AzureTTSService` to properly detect and log
synthesis cancellation errors.
- Enhanced `WhisperSTTService` with full language support and improved model
documentation.
- Updated foundation example `14f-function-calling-groq.py` to use
`GroqSTTService` for transcription.
- Updated `GroqLLMService` to use `llama-3.3-70b-versatile` as the default
model.
- `RTVIObserver` doesn't handle `LLMSearchResponseFrame` frames anymore. For
now, to handle those frames you need to create a `GoogleRTVIObserver`
instead.
### Deprecated
- `STTMuteFilter` constructor's `stt_service` parameter is now deprecated and
will be removed in a future version. The filter now manages mute state
internally instead of querying the STT service.
- `RTVI.observer()` is now deprecated, instantiate an `RTVIObserver` directly
instead.
- All RTVI frame processors (e.g. `RTVISpeakingProcessor`,
`RTVIBotLLMProcessor`) are now deprecated, instantiate an `RTVIObserver`
instead.
### Fixed
- Fixed a `FalImageGenService` issue that was causing the event loop to be
blocked while loading the downloadded image.
- Fixed a `CartesiaTTSService` service issue that would cause audio overlapping
in some cases.
- Fixed a websocket-based service issue (e.g. `CartesiaTTSService`) that was
preventing a reconnection after the server disconnected cleanly, which was
causing an inifite loop instead.
- Fixed a `BaseOutputTransport` issue that was causing upstream frames to no be
pushed upstream.
- Fixed multiple issue where user transcriptions where not being handled
properly. It was possible for short utterances to not trigger VAD which would
cause user transcriptions to be ignored. It was also possible for one or more
transcriptions to be generated after VAD in which case they would also be
ignored.
- Fixed an issue that was causing `BotStoppedSpeakingFrame` to be generated too
late. This could then cause issues unblocking `STTMuteFilter` later than
desired.
- Fixed an issue that was causing `AudioBufferProcessor` to not record
synchronized audio.
- Fixed an `RTVI` issue that was causing `bot-tts-text` messages to be sent
before being processed by the output transport.
- Fixed an issue[#1192] in 11labs where we are trying to reconnect/disconnect
the websocket connection even when the connection is already closed.
- Fixed an issue where `has_regular_messages` condition was always true in
`GoogleLLMContext` due to `Part` having `function_call` & `function_response`
with `None` values.
### Other
- Added new `instant-voice` example. This example showcases how to enable
instant voice communication as soon as a user connects.
- Added new `local-input-select-stt` example. This examples allows you to play
with local audio inputs by slecting them through a nice text interface.
## [0.0.56] - 2025-02-06
### Changed
- Use `gemini-2.0-flash-001` as the default model for `GoogleLLMSerivce`.
- Improved foundational examples 22b, 22c, and 22d to support function calling.
With these base examples, `FunctionCallInProgressFrame` and
`FunctionCallResultFrame` will no longer be blocked by the gates.
### Fixed
- Fixed a `TkLocalTransport` and `LocalAudioTransport` issues that was causing
errors on cleanup.
- Fixed an issue that was causing `tests.utils` import to fail because of
logging setup.
- Fixed a `SentryMetrics` issue that was preventing any metrics to be sent to
Sentry and also was preventing from metrics frames to be pushed to the
pipeline.
- Fixed an issue in `BaseOutputTransport` where incoming audio would not be
resampled to the desired output sample rate.
- Fixed an issue with the `TwilioFrameSerializer` and `TelnyxFrameSerializer`
where `twilio_sample_rate` and `telnyx_sample_rate` were incorrectly
initialized to `audio_in_sample_rate`. Those values currently default to 8000
and should be set manually from the serializer constructor if a different
value is needed.
### Other
- Added a new `sentry-metrics` example.
## [0.0.55] - 2025-02-05
### Added
- Added a new `start_metadata` field to `PipelineParams`. The provided metadata
will be set to the initial `StartFrame` being pushed from the `PipelineTask`.
- Added new fields to `PipelineParams` to control audio input and output sample
rates for the whole pipeline. This allows controlling sample rates from a
single place instead of having to specify sample rates in each
service. Setting a sample rate to a service is still possible and will
override the value from `PipelineParams`.
- Introduce audio resamplers (`BaseAudioResampler`). This is just a base class
to implement audio resamplers. Currently, two implementations are provided
`SOXRAudioResampler` and `ResampyResampler`. A new
`create_default_resampler()` has been added (replacing the now deprecated
`resample_audio()`).
- It is now possible to specify the asyncio event loop that a `PipelineTask` and
all the processors should run on by passing it as a new argument to the
`PipelineRunner`. This could allow running pipelines in multiple threads each
one with its own event loop.
- Added a new `utils.TaskManager`. Instead of a global task manager we now have
a task manager per `PipelineTask`. In the previous version the task manager
was global, so running multiple simultaneous `PipelineTask`s could result in
dangling task warnings which were not actually true. In order, for all the
processors to know about the task manager, we pass it through the
`StartFrame`. This means that processors should create tasks when they receive
a `StartFrame` but not before (because they don't have a task manager yet).
- Added `TelnyxFrameSerializer` to support Telnyx calls. A full running example
has also been added to `examples/telnyx-chatbot`.
- Allow pushing silence audio frames before `TTSStoppedFrame`. This might be
useful for testing purposes, for example, passing bot audio to an STT service
which usually needs additional audio data to detect the utterance stopped.
- `TwilioSerializer` now supports transport message frames. With this we can
create Twilio emulators.
- Added a new transport: `WebsocketClientTransport`.
- Added a `metadata` field to `Frame` which makes it possible to pass custom
data to all frames.
- Added `test/utils.py` inside of pipecat package.
### Changed
- `GatedOpenAILLMContextAggregator` now require keyword arguments. Also, a new
`start_open` argument has been added to set the initial state of the gate.
- Added `organization` and `project` level authentication to
`OpenAILLMService`.
- Improved the language checking logic in `ElevenLabsTTSService` and
`ElevenLabsHttpTTSService` to properly handle language codes based on model
compatibility, with appropriate warnings when language codes cannot be
applied.
- Updated `GoogleLLMContext` to support pushing `LLMMessagesUpdateFrame`s that
contain a combination of function calls, function call responses, system
messages, or just messages.
- `InputDTMFFrame` is now based on `DTMFFrame`. There's also a new
`OutputDTMFFrame` frame.
### Deprecated
- `resample_audio()` is now deprecated, use `create_default_resampler()`
instead.
### Removed
- `AudioBufferProcessor.reset_audio_buffers()` has been removed, use
`AudioBufferProcessor.start_recording()` and
`AudioBufferProcessor.stop_recording()` instead.
### Fixed
- Fixed a `AudioBufferProcessor` that would cause crackling in some recordings.
- Fixed an issue in `AudioBufferProcessor` where user callback would not be
called on task cancellation.
- Fixed an issue in `AudioBufferProcessor` that would cause wrong silence
padding in some cases.
- Fixed an issue where `ElevenLabsTTSService` messages would return a 1009
websocket error by increasing the max message size limit to 16MB.
- Fixed a `DailyTransport` issue that would cause events to be triggered before
join finished.
- Fixed a `PipelineTask` issue that was preventing processors to be cleaned up
after cancelling the task.
- Fixed an issue where queuing a `CancelFrame` to a pipeline task would not
cause the task to finish. However, using `PipelineTask.cancel()` is still the
recommended way to cancel a task.
### Other
- Improved Unit Test `run_test()` to use `PipelineTask` and
`PipelineRunner`. There's now also some control around `StartFrame` and
`EndFrame`. The `EndTaskFrame` has been removed since it doesn't seem
necessary with this new approach.
- Updated `twilio-chatbot` with a few new features: use 8000 sample rate and
avoid resampling, a new client useful for stress testing and testing locally
without the need to make phone calls. Also, added audio recording on both the
client and the server to make sure the audio sounds good.
- Updated examples to use `task.cancel()` to immediately exit the example when a
participant leaves or disconnects, instead of pushing an `EndFrame`. Pushing
an `EndFrame` causes the bot to run through everything that is internally
queued (which could take some seconds). Note that using `task.cancel()` might
not always be the best option and pushing an `EndFrame` could still be
desirable to make sure all the pipeline is flushed.
## [0.0.54] - 2025-01-27
### Added
- In order to create tasks in Pipecat frame processors it is now recommended to
use `FrameProcessor.create_task()` (which uses the new
`utils.asyncio.create_task()`). It takes care of uncaught exceptions, task
cancellation handling and task management. To cancel or wait for a task there
is `FrameProcessor.cancel_task()` and `FrameProcessor.wait_for_task()`. All of
Pipecat processors have been updated accordingly. Also, when a pipeline runner
finishes, a warning about dangling tasks might appear, which indicates if any
of the created tasks was never cancelled or awaited for (using these new
functions).
- It is now possible to specify the period of the `PipelineTask` heartbeat
frames with `heartbeats_period_secs`.
- Added `DailyMeetingTokenProperties` and `DailyMeetingTokenParams` Pydantic models
for meeting token creation in `get_token` method of `DailyRESTHelper`.
- Added `enable_recording` and `geo` parameters to `DailyRoomProperties`.
- Added `RecordingsBucketConfig` to `DailyRoomProperties` to upload recordings
to a custom AWS bucket.
### Changed
- Enhanced `UserIdleProcessor` with retry functionality and control over idle
monitoring via new callback signature `(processor, retry_count) -> bool`.
Updated the `17-detect-user-idle.py` to show how to use the `retry_count`.
- Add defensive error handling for `OpenAIRealtimeBetaLLMService`'s audio
truncation. Audio truncation errors during interruptions now log a warning
and allow the session to continue instead of throwing an exception.
- Modified `TranscriptProcessor` to use TTS text frames for more accurate assistant
transcripts. Assistant messages are now aggregated based on bot speaking boundaries
rather than LLM context, providing better handling of interruptions and partial
utterances.
- Updated foundational examples `28a-transcription-processor-openai.py`,
`28b-transcript-processor-anthropic.py`, and
`28c-transcription-processor-gemini.py` to use the updated
`TranscriptProcessor`.
### Fixed
- Fixed an `GeminiMultimodalLiveLLMService` issue that was preventing the user
to push initial LLM assistant messages (using `LLMMessagesAppendFrame`).
- Added missing `FrameProcessor.cleanup()` calls to `Pipeline`,
`ParallelPipeline` and `UserIdleProcessor`.
- Fixed a type error when using `voice_settings` in `ElevenLabsHttpTTSService`.
- Fixed an issue where `OpenAIRealtimeBetaLLMService` function calling resulted
in an error.
- Fixed an issue in `AudioBufferProcessor` where the last audio buffer was not
being processed, in cases where the `_user_audio_buffer` was smaller than the
buffer size.
### Performance
- Replaced audio resampling library `resampy` with `soxr`. Resampling a 2:21s
audio file from 24KHz to 16KHz took 1.41s with `resampy` and 0.031s with
`soxr` with similar audio quality.
### Other
- Added initial unit test infrastructure.
## [0.0.53] - 2025-01-18
### Added
- Added `ElevenLabsHttpTTSService` which uses EleveLabs' HTTP API instead of the
websocket one.
- Introduced pipeline frame observers. Observers can view all the frames that go
through the pipeline without the need to inject processors in the
pipeline. This can be useful, for example, to implement frame loggers or
debuggers among other things. The example
`examples/foundational/30-observer.py` shows how to add an observer to a
pipeline for debugging.
- Introduced heartbeat frames. The pipeline task can now push periodic
heartbeats down the pipeline when `enable_heartbeats=True`. Heartbeats are
system frames that are supposed to make it all the way to the end of the
pipeline. When a heartbeat frame is received the traversing time (i.e. the
time it took to go through the whole pipeline) will be displayed (with TRACE
logging) otherwise a warning will be shown. The example
`examples/foundational/31-heartbeats.py` shows how to enable heartbeats and
forces warnings to be displayed.
- Added `LLMTextFrame` and `TTSTextFrame` which should be pushed by LLM and TTS
services respectively instead of `TextFrame`s.
- Added `OpenRouter` for OpenRouter integration with an OpenAI-compatible
interface. Added foundational example `14m-function-calling-openrouter.py`.
- Added a new `WebsocketService` based class for TTS services, containing
base functions and retry logic.
- Added `DeepSeekLLMService` for DeepSeek integration with an OpenAI-compatible
interface. Added foundational example `14l-function-calling-deepseek.py`.
- Added `FunctionCallResultProperties` dataclass to provide a structured way to
control function call behavior, including:
- `run_llm`: Controls whether to trigger LLM completion
- `on_context_updated`: Optional callback triggered after context update
- Added a new foundational example `07e-interruptible-playht-http.py` for easy
testing of `PlayHTHttpTTSService`.
- Added support for Google TTS Journey voices in `GoogleTTSService`.
- Added `29-livekit-audio-chat.py`, as a new foundational examples for
`LiveKitTransportLayer`.
- Added `enable_prejoin_ui`, `max_participants` and `start_video_off` params
to `DailyRoomProperties`.
- Added `session_timeout` to `FastAPIWebsocketTransport` and
`WebsocketServerTransport` for configuring session timeouts (in
seconds). Triggers `on_session_timeout` for custom timeout handling.
See [examples/websocket-server/bot.py](https://github.com/pipecat-ai/pipecat/blob/main/examples/websocket-server/bot.py).
- Added the new modalities option and helper function to set Gemini output
modalities.
- Added `examples/foundational/26d-gemini-multimodal-live-text.py` which is
using Gemini as TEXT modality and using another TTS provider for TTS process.
### Changed
- Modified `UserIdleProcessor` to start monitoring only after first
conversation activity (`UserStartedSpeakingFrame` or
`BotStartedSpeakingFrame`) instead of immediately.
- Modified `OpenAIAssistantContextAggregator` to support controlled completions
and to emit context update callbacks via `FunctionCallResultProperties`.
- Added `aws_session_token` to the `PollyTTSService`.
- Changed the default model for `PlayHTHttpTTSService` to `Play3.0-mini-http`.
- `api_key`, `aws_access_key_id` and `region` are no longer required parameters
for the PollyTTSService (AWSTTSService)
- Added `session_timeout` example in `examples/websocket-server/bot.py` to
handle session timeout event.
- Changed `InputParams` in
`src/pipecat/services/gemini_multimodal_live/gemini.py` to support different
modalities.
- Changed `DeepgramSTTService` to send `finalize` event whenever VAD detects
`UserStoppedSpeakingFrame`. This helps in faster transcriptions and clearing
the `Deepgram` audio buffer.
### Fixed
- Fixed an issue where `DeepgramSTTService` was not generating metrics using
pipeline's VAD.
- Fixed `UserIdleProcessor` not properly propagating `EndFrame`s through the
pipeline.
- Fixed an issue where websocket based TTS services could incorrectly terminate
their connection due to a retry counter not resetting.
- Fixed a `PipelineTask` issue that would cause a dangling task after stopping
the pipeline with an `EndFrame`.
- Fixed an import issue for `PlayHTHttpTTSService`.
- Fixed an issue where languages couldn't be used with the `PlayHTHttpTTSService`.
- Fixed an issue where `OpenAIRealtimeBetaLLMService` audio chunks were hitting
an error when truncating audio content.
- Fixed an issue where setting the voice and model for `RimeHttpTTSService`
wasn't working.
- Fixed an issue where `IdleFrameProcessor` and `UserIdleProcessor` were getting
initialized before the start of the pipeline.
## [0.0.52] - 2024-12-24
### Added
- Constructor arguments for GoogleLLMService to directly set tools and tool_config.
- Smart turn detection example (`22d-natural-conversation-gemini-audio.py`) that
leverages Gemini 2.0 capabilities ().
(see https://x.com/kwindla/status/1870974144831275410)
- Added `DailyTransport.send_dtmf()` to send dial-out DTMF tones.
- Added `DailyTransport.sip_call_transfer()` to forward SIP and PSTN calls to
another address or number. For example, transfer a SIP call to a different
SIP address or transfer a PSTN phone number to a different PSTN phone number.
- Added `DailyTransport.sip_refer()` to transfer incoming SIP/PSTN calls from
outside Daily to another SIP/PSTN address.
- Added an `auto_mode` input parameter to `ElevenLabsTTSService`. `auto_mode`
is set to `True` by default. Enabling this setting disables the chunk
schedule and all buffers, which reduces latency.
- Added `KoalaFilter` which implement on device noise reduction using Koala
Noise Suppression.
(see https://picovoice.ai/platform/koala/)
- Added `CerebrasLLMService` for Cerebras integration with an OpenAI-compatible
interface. Added foundational example `14k-function-calling-cerebras.py`.
- Pipecat now supports Python 3.13. We had a dependency on the `audioop` package
which was deprecated and now removed on Python 3.13. We are now using
`audioop-lts` (https://github.com/AbstractUmbra/audioop) to provide the same
functionality.
- Added timestamped conversation transcript support:
- New `TranscriptProcessor` factory provides access to user and assistant
transcript processors.
- `UserTranscriptProcessor` processes user speech with timestamps from
transcription.
- `AssistantTranscriptProcessor` processes assistant responses with LLM
context timestamps.
- Messages emitted with ISO 8601 timestamps indicating when they were spoken.
- Supports all LLM formats (OpenAI, Anthropic, Google) via standard message
format.
- New examples: `28a-transcription-processor-openai.py`,
`28b-transcription-processor-anthropic.py`, and
`28c-transcription-processor-gemini.py`.
- Add support for more languages to ElevenLabs (Arabic, Croatian, Filipino,
Tamil) and PlayHT (Afrikans, Albanian, Amharic, Arabic, Bengali, Croatian,
Galician, Hebrew, Mandarin, Serbian, Tagalog, Urdu, Xhosa).
### Changed
- `PlayHTTTSService` uses the new v4 websocket API, which also fixes an issue
where text inputted to the TTS didn't return audio.
- The default model for `ElevenLabsTTSService` is now `eleven_flash_v2_5`.
- `OpenAIRealtimeBetaLLMService` now takes a `model` parameter in the
constructor.
- Updated the default model for the `OpenAIRealtimeBetaLLMService`.
- Room expiration (`exp`) in `DailyRoomProperties` is now optional (`None`) by
default instead of automatically setting a 5-minute expiration time. You must
explicitly set expiration time if desired.
### Deprecated
- `AWSTTSService` is now deprecated, use `PollyTTSService` instead.
### Fixed
- Fixed token counting in `GoogleLLMService`. Tokens were summed incorrectly
(double-counted in many cases).
- Fixed an issue that could cause the bot to stop talking if there was a user
interruption before getting any audio from the TTS service.
- Fixed an issue that would cause `ParallelPipeline` to handle `EndFrame`
incorrectly causing the main pipeline to not terminate or terminate too early.
- Fixed an audio stuttering issue in `FastPitchTTSService`.
- Fixed a `BaseOutputTransport` issue that was causing non-audio frames being
processed before the previous audio frames were played. This will allow, for
example, sending a frame `A` after a `TTSSpeakFrame` and the frame `A` will
only be pushed downstream after the audio generated from `TTSSpeakFrame` has
been spoken.
- Fixed a `DeepgramSTTService` issue that was causing language to be passed as
an object instead of a string resulting in the connection to fail.
## [0.0.51] - 2024-12-16
### Fixed
- Fixed an issue in websocket-based TTS services that was causing infinite
reconnections (Cartesia, ElevenLabs, PlayHT and LMNT).
## [0.0.50] - 2024-12-11
### Added
- Added `GeminiMultimodalLiveLLMService`. This is an integration for Google's
Gemini Multimodal Live API, supporting:
- Real-time audio and video input processing
- Streaming text responses with TTS
- Audio transcription for both user and bot speech
- Function calling
- System instructions and context management
- Dynamic parameter updates (temperature, top_p, etc.)
- Added `AudioTranscriber` utility class for handling audio transcription with
Gemini models.
- Added new context classes for Gemini:
- `GeminiMultimodalLiveContext`
- `GeminiMultimodalLiveUserContextAggregator`
- `GeminiMultimodalLiveAssistantContextAggregator`
- `GeminiMultimodalLiveContextAggregatorPair`
- Added new foundational examples for `GeminiMultimodalLiveLLMService`:
- `26-gemini-multimodal-live.py`
- `26a-gemini-multimodal-live-transcription.py`
- `26b-gemini-multimodal-live-video.py`
- `26c-gemini-multimodal-live-video.py`
- Added `SimliVideoService`. This is an integration for Simli AI avatars.
(see https://www.simli.com)
- Added NVIDIA Riva's `FastPitchTTSService` and `ParakeetSTTService`.
(see https://www.nvidia.com/en-us/ai-data-science/products/riva/)
- Added `IdentityFilter`. This is the simplest frame filter that lets through
all incoming frames.
- New `STTMuteStrategy` called `FUNCTION_CALL` which mutes the STT service
during LLM function calls.
- `DeepgramSTTService` now exposes two event handlers `on_speech_started` and
`on_utterance_end` that could be used to implement interruptions. See new
example `examples/foundational/07c-interruptible-deepgram-vad.py`.
- Added `GroqLLMService`, `GrokLLMService`, and `NimLLMService` for Groq, Grok,
and NVIDIA NIM API integration, with an OpenAI-compatible interface.
- New examples demonstrating function calling with Groq, Grok, Azure OpenAI,
Fireworks, and NVIDIA NIM: `14f-function-calling-groq.py`,
`14g-function-calling-grok.py`, `14h-function-calling-azure.py`,
`14i-function-calling-fireworks.py`, and `14j-function-calling-nvidia.py`.
- In order to obtain the audio stored by the `AudioBufferProcessor` you can now
also register an `on_audio_data` event handler. The `on_audio_data` handler
will be called every time `buffer_size` (a new constructor argument) is
reached. If `buffer_size` is 0 (default) you need to manually get the audio as
before using `AudioBufferProcessor.merge_audio_buffers()`.
```
@audiobuffer.event_handler("on_audio_data")
async def on_audio_data(processor, audio, sample_rate, num_channels):
await save_audio(audio, sample_rate, num_channels)
```
- Added a new RTVI message called `disconnect-bot`, which when handled pushes
an `EndFrame` to trigger the pipeline to stop.
### Changed
- `STTMuteFilter` now supports multiple simultaneous muting strategies.
- `XTTSService` language now defaults to `Language.EN`.
- `SoundfileMixer` doesn't resample input files anymore to avoid startup
delays. The sample rate of the provided sound files now need to match the
sample rate of the output transport.
- Input frames (audio, image and transport messages) are now system frames. This
means they are processed immediately by all processors instead of being queued
internally.
- Expanded the transcriptions.language module to support a superset of
languages.
- Updated STT and TTS services with language options that match the supported
languages for each service.
- Updated the `AzureLLMService` to use the `OpenAILLMService`. Updated the
`api_version` to `2024-09-01-preview`.
- Updated the `FireworksLLMService` to use the `OpenAILLMService`. Updated the
default model to `accounts/fireworks/models/firefunction-v2`.
- Updated the `simple-chatbot` example to include a Javascript and React client
example, using RTVI JS and React.
### Removed
- Removed `AppFrame`. This was used as a special user custom frame, but there's
actually no use case for that.
### Fixed
- Fixed a `ParallelPipeline` issue that would cause system frames to be queued.
- Fixed `FastAPIWebsocketTransport` so it can work with binary data (e.g. using
the protobuf serializer).
- Fixed an issue in `CartesiaTTSService` that could cause previous audio to be
received after an interruption.
- Fixed Cartesia, ElevenLabs, LMNT and PlayHT TTS websocket
reconnection. Before, if an error occurred no reconnection was happening.
- Fixed a `BaseOutputTransport` issue that was causing audio to be discarded
after an `EndFrame` was received.
- Fixed an issue in `WebsocketServerTransport` and `FastAPIWebsocketTransport`
that would cause a busy loop when using audio mixer.
- Fixed a `DailyTransport` and `LiveKitTransport` issue where connections were
being closed in the input transport prematurely. This was causing frames
queued inside the pipeline being discarded.
- Fixed an issue in `DailyTransport` that would cause some internal callbacks to
not be executed.
- Fixed an issue where other frames were being processed while a `CancelFrame`
was being pushed down the pipeline.
- `AudioBufferProcessor` now handles interruptions properly.
- Fixed a `WebsocketServerTransport` issue that would prevent interruptions with
`TwilioSerializer` from working.
- `DailyTransport.capture_participant_video` now allows capturing user's screen
share by simply passing `video_source="screenVideo"`.
- Fixed Google Gemini message handling to properly convert appended messages to
Gemini's required format.
- Fixed an issue with `FireworksLLMService` where chat completions were failing
by removing the `stream_options` from the chat completion options.
## [0.0.49] - 2024-11-17
### Added
- Added RTVI `on_bot_started` event which is useful in a single turn
interaction.
- Added `DailyTransport` events `dialin-connected`, `dialin-stopped`,
`dialin-error` and `dialin-warning`. Needs daily-python >= 0.13.0.
- Added `RimeHttpTTSService` and the `07q-interruptible-rime.py` foundational
example.
- Added `STTMuteFilter`, a general-purpose processor that combines STT
muting and interruption control. When active, it prevents both transcription
and interruptions during bot speech. The processor supports multiple
strategies: `FIRST_SPEECH` (mute only during bot's first
speech), `ALWAYS` (mute during all bot speech), or `CUSTOM` (using provided
callback).
- Added `STTMuteFrame`, a control frame that enables/disables speech
transcription in STT services.
@@ -1756,9 +1006,6 @@ async def on_connected(processor):
### Changed
- `FrameSerializer.serialize()` and `FrameSerializer.deserialize()` are now
`async`.
- `Filter` has been renamed to `FrameFilter` and it's now under
`processors/filters`.

View File

@@ -1,6 +1,6 @@
BSD 2-Clause License
Copyright (c) 20242025, Daily
Copyright (c) 2024, Daily
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

View File

@@ -2,7 +2,7 @@
 <img alt="pipecat" width="300px" height="auto" src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/pipecat.png">
</div></h1>
[![PyPI](https://img.shields.io/pypi/v/pipecat-ai)](https://pypi.org/project/pipecat-ai) ![Tests](https://github.com/pipecat-ai/pipecat/actions/workflows/tests.yaml/badge.svg) [![codecov](https://codecov.io/gh/pipecat-ai/pipecat/graph/badge.svg?token=LNVUIVO4Y9)](https://codecov.io/gh/pipecat-ai/pipecat) [![Docs](https://img.shields.io/badge/Documentation-blue)](https://docs.pipecat.ai) [![Discord](https://img.shields.io/discord/1239284677165056021)](https://discord.gg/pipecat)
[![PyPI](https://img.shields.io/pypi/v/pipecat-ai)](https://pypi.org/project/pipecat-ai) [![Discord](https://img.shields.io/discord/1239284677165056021)](https://discord.gg/pipecat) <a href="https://app.commanddash.io/agent/github_pipecat-ai_pipecat"><img src="https://img.shields.io/badge/AI-Code%20Agent-EB9FDA"></a>
Pipecat is an open source Python framework for building voice and multimodal conversational agents. It handles the complex orchestration of AI services, network transport, audio processing, and multimodal interactions, letting you focus on creating engaging experiences.
@@ -13,7 +13,6 @@ Pipecat is an open source Python framework for building voice and multimodal con
- **Multimodal Apps**: Combine voice, video, images, and text
- **Creative Tools**: [Story-telling experiences](https://storytelling-chatbot.fly.dev/) and social companions
- **Business Solutions**: [Customer intake flows](https://www.youtube.com/watch?v=lDevgsp9vn0) and support bots
- **Complex conversational flows**: [Refer to Pipecat Flows](https://github.com/pipecat-ai/pipecat-flows) to learn more
## See it in action
@@ -33,8 +32,6 @@ Pipecat is an open source Python framework for building voice and multimodal con
- **Real-time Processing**: Frame-based pipeline architecture for fluid interactions
- **Production Ready**: Enterprise-grade WebRTC and Websocket support
💡 Looking to build structured conversations? Check out [Pipecat Flows](https://github.com/pipecat-ai/pipecat-flows) for managing complex conversational states and transitions.
## Getting started
You can get started with Pipecat running on your local machine, then move your agent processes to the cloud when youre ready. You can also add a 📞 telephone number, 🖼️ image output, 📺 video input, use different LLMs, and more.
@@ -53,21 +50,21 @@ To keep things lightweight, only the core framework is included by default. If y
pip install "pipecat-ai[option,...]"
```
### Available services
Available options include:
| Category | Services | Install Command Example |
| ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------- |
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) | `pip install "pipecat-ai[deepgram]"` |
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Together AI](https://docs.pipecat.ai/server/services/llm/together) | `pip install "pipecat-ai[openai]"` |
| Text-to-Speech | [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) | `pip install "pipecat-ai[cartesia]"` |
| Speech-to-Speech | [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) | `pip install "pipecat-ai[google]"` |
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local | `pip install "pipecat-ai[daily]"` |
| Video | [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) | `pip install "pipecat-ai[tavus,simli]"` |
| Vision & Image | [Moondream](https://docs.pipecat.ai/server/services/vision/moondream), [fal](https://docs.pipecat.ai/server/services/image-generation/fal) | `pip install "pipecat-ai[moondream]"` |
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) | `pip install "pipecat-ai[silero]"` |
| Analytics & Metrics | [Canonical AI](https://docs.pipecat.ai/server/services/analytics/canonical), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) | `pip install "pipecat-ai[canonical]"` |
| Category | Services | Install Command Example |
| ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------- |
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/api-reference/services/stt/assemblyai), [Azure](https://docs.pipecat.ai/api-reference/services/stt/azure), [Deepgram](https://docs.pipecat.ai/api-reference/services/stt/deepgram), [Gladia](https://docs.pipecat.ai/api-reference/services/stt/gladia), [Whisper](https://docs.pipecat.ai/api-reference/services/stt/whisper) | `pip install "pipecat-ai[deepgram]"` |
| LLMs | [Anthropic](https://docs.pipecat.ai/api-reference/services/llm/anthropic), [Azure](https://docs.pipecat.ai/api-reference/services/llm/azure), [Fireworks AI](https://docs.pipecat.ai/api-reference/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/api-reference/services/llm/gemini), [Ollama](https://docs.pipecat.ai/api-reference/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/api-reference/services/llm/openai), [Together AI](https://docs.pipecat.ai/api-reference/services/llm/together) | `pip install "pipecat-ai[openai]"` |
| Text-to-Speech | [AWS](https://docs.pipecat.ai/api-reference/services/tts/aws), [Azure](https://docs.pipecat.ai/api-reference/services/tts/azure), [Cartesia](https://docs.pipecat.ai/api-reference/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/api-reference/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/api-reference/services/tts/elevenlabs), [Google](https://docs.pipecat.ai/api-reference/services/tts/google), [LMNT](https://docs.pipecat.ai/api-reference/services/tts/lmnt), [OpenAI](https://docs.pipecat.ai/api-reference/services/tts/openai), [PlayHT](https://docs.pipecat.ai/api-reference/services/tts/playht), [Rime](https://docs.pipecat.ai/api-reference/services/tts/rime), [XTTS](https://docs.pipecat.ai/api-reference/services/tts/xtts) | `pip install "pipecat-ai[cartesia]"` |
| Speech-to-Speech | [OpenAI Realtime](https://docs.pipecat.ai/api-reference/services/s2s/openai) | `pip install "pipecat-ai[openai]"` |
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/api-reference/services/transport/daily), WebSocket, Local | `pip install "pipecat-ai[daily]"` |
| Video | [Tavus](https://docs.pipecat.ai/api-reference/services/video/tavus) | `pip install "pipecat-ai[tavus]"` |
| Vision & Image | [Moondream](https://docs.pipecat.ai/api-reference/services/vision/moondream), [fal](https://docs.pipecat.ai/api-reference/services/image-generation/fal) | `pip install "pipecat-ai[moondream]"` |
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/api-reference/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/api-reference/utilities/audio/krisp-filter), [Noisereduce](https://docs.pipecat.ai/api-reference/utilities/audio/noisereduce-filter) | `pip install "pipecat-ai[silero]"` |
| Analytics & Metrics | [Canonical AI](https://docs.pipecat.ai/api-reference/services/analytics/canonical), [Sentry](https://docs.pipecat.ai/api-reference/services/analytics/sentry) | `pip install "pipecat-ai[canonical]"` |
📚 [View full services documentation →](https://docs.pipecat.ai/server/services/supported-services)
📚 [View full services documentation →](https://docs.pipecat.ai/api-reference/services/supported-services)
## Code examples
@@ -81,7 +78,7 @@ Here is a very basic Pipecat bot that greets a user when they join a real-time s
```python
import asyncio
from pipecat.frames.frames import TextFrame
from pipecat.frames.frames import EndFrame, TextFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.task import PipelineTask
from pipecat.pipeline.runner import PipelineRunner
@@ -122,7 +119,7 @@ async def main():
# Register an event handler to exit the application when the user leaves.
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
await task.queue_frame(EndFrame())
# Run the pipeline task
await runner.run(task)
@@ -149,40 +146,27 @@ Sign up [here](https://dashboard.daily.co/u/signup) and [create a room](https://
## Hacking on the framework itself
_Note: You may need to set up a virtual environment before following these instructions. From the root of the repo:_
_Note that you may need to set up a virtual environment before following the instructions below. For instance, you might need to run the following from the root of the repo:_
```shell
python3 -m venv venv
source venv/bin/activate
```
Install the development dependencies:
From the root of this repo, run the following:
```shell
pip install -r dev-requirements.txt
python -m build
```
Install the git pre-commit hooks (these help ensure your code follows project rules):
This builds the package. To use the package locally (e.g. to run sample files), run
```shell
pre-commit install
pip install --editable ".[option,...]"
```
Install the `pipecat-ai` package locally in editable mode:
```shell
pip install -e .
```
The `-e` or `--editable` option allows you to modify the code without reinstalling.
To include optional dependencies, add them to the install command. For example:
```shell
pip install -e ".[daily,deepgram,cartesia,openai,silero]" # Updated for the services you're using
```
If you want to use this package from another directory:
If you want to use this package from another directory, you can run:
```shell
pip install "path_to_this_repo[option,...]"
@@ -193,7 +177,7 @@ pip install "path_to_this_repo[option,...]"
From the root directory, run:
```shell
pytest
pytest --doctest-modules --ignore-glob="*to_be_updated*" --ignore-glob=*pipeline_source* src tests
```
## Setting up your editor
@@ -210,7 +194,9 @@ You can use [use-package](https://github.com/jwiegley/use-package) to install [e
:hook ((python-mode . lazy-ruff-mode))
:config
(setq lazy-ruff-format-command "ruff format")
(setq lazy-ruff-check-command "ruff check --select I"))
(setq lazy-ruff-only-format-block t)
(setq lazy-ruff-only-format-region t)
(setq lazy-ruff-only-format-buffer t))
```
`ruff` was installed in the `venv` environment described before, so you should be able to use [pyvenv-auto](https://github.com/ryotaro612/pyvenv-auto) to automatically load that environment inside Emacs.
@@ -220,6 +206,7 @@ You can use [use-package](https://github.com/jwiegley/use-package) to install [e
:ensure t
:defer t
:hook ((python-mode . pyvenv-auto-run)))
```
### Visual Studio Code
@@ -234,16 +221,6 @@ Install the
}
```
### PyCharm
`ruff` was installed in the `venv` environment described before, now to enable autoformatting on save, go to `File` -> `Settings` -> `Tools` -> `File Watchers` and add a new watcher with the following settings:
1. **Name**: `Ruff formatter`
2. **File type**: `Python`
3. **Working directory**: `$ContentRoot$`
4. **Arguments**: `format $FilePath$`
5. **Program**: `$PyInterpreterDirectory$/ruff`
## Contributing
We welcome contributions from the community! Whether you're fixing bugs, improving documentation, or adding new features, here's how you can help:

View File

@@ -1,11 +0,0 @@
coverage:
range: 50..90 # coverage lower than 50 is red, higher than 90 green, between color code
status:
project:
default:
target: auto # auto % coverage target
threshold: 5% # allow for 5% reduction of coverage without failing
# do not run coverage on patch nor changes
patch: false

View File

@@ -1,12 +1,8 @@
build~=1.2.2
coverage~=7.6.12
grpcio-tools~=1.67.1
build~=1.2.1
grpcio-tools~=1.62.2
pip-tools~=7.4.1
pre-commit~=4.0.1
pyright~=1.1.393
pytest~=8.3.4
pytest-asyncio~=0.25.2
ruff~=0.9.5
setuptools~=70.0.0
pyright~=1.1.376
pytest~=8.3.2
ruff~=0.6.7
setuptools~=72.2.0
setuptools_scm~=8.1.0
python-dotenv~=1.0.1

View File

@@ -1,20 +0,0 @@
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = .
BUILDDIR = _build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

View File

@@ -1,109 +0,0 @@
# Pipecat Documentation
This directory contains the source files for auto-generating Pipecat's server API reference documentation.
## Setup
1. Install documentation dependencies:
```bash
pip install -r requirements.txt
```
2. Make the build scripts executable:
```bash
chmod +x build-docs.sh rtd-test.py
```
## Building Documentation
From this directory, you can build the documentation in several ways:
### Local Build
```bash
# Using the build script (automatically opens docs when done)
./build-docs.sh
# Or directly with sphinx-build
sphinx-build -b html . _build/html -W --keep-going
```
### ReadTheDocs Test Build
To test the documentation build process exactly as it would run on ReadTheDocs:
```bash
./rtd-test.py
```
This script:
- Creates a fresh virtual environment
- Installs all dependencies as specified in requirements files
- Handles conflicting dependencies (like grpcio versions for Riva and PlayHT)
- Builds the documentation in an isolated environment
- Provides detailed logging of the build process
Use this script to verify your documentation will build correctly on ReadTheDocs before pushing changes.
## Viewing Documentation
The built documentation will be available at `_build/html/index.html`. To open:
```bash
# On MacOS
open _build/html/index.html
# On Linux
xdg-open _build/html/index.html
# On Windows
start _build/html/index.html
```
## Directory Structure
```
.
├── api/ # Auto-generated API documentation
├── _build/ # Built documentation
├── _static/ # Static files (images, css, etc.)
├── conf.py # Sphinx configuration
├── index.rst # Main documentation entry point
├── requirements-base.txt # Base documentation dependencies
├── requirements-riva.txt # Riva-specific dependencies
├── requirements-playht.txt # PlayHT-specific dependencies
├── build-docs.sh # Local build script
└── rtd-test.py # ReadTheDocs test build script
```
## Notes
- Documentation is auto-generated from Python docstrings
- Service modules are automatically detected and included
- The build process matches our ReadTheDocs configuration
- Warnings are treated as errors (-W flag) to maintain consistency
- The --keep-going flag ensures all errors are reported
- Dependencies are split into multiple requirements files to handle version conflicts
## Troubleshooting
If you encounter missing service modules:
1. Verify the service is installed with its extras: `pip install pipecat-ai[service-name]`
2. Check the build logs for import errors
3. Ensure the service module is properly initialized in the package
4. Run `./rtd-test.py` to test in an isolated environment matching ReadTheDocs
For dependency conflicts:
1. Check the requirements files for version specifications
2. Use `rtd-test.py` to verify dependency resolution
3. Consider adding service-specific requirements files if needed
For more information:
- [ReadTheDocs Configuration](.readthedocs.yaml)
- [Sphinx Documentation](https://www.sphinx-doc.org/)

View File

@@ -1,10 +0,0 @@
#!/bin/bash
# Clean previous build
rm -rf _build
# Build docs matching ReadTheDocs configuration
sphinx-build -b html -d _build/doctrees . _build/html -W --keep-going
# Open docs (MacOS)
open _build/html/index.html

View File

@@ -1,252 +0,0 @@
import logging
import sys
from pathlib import Path
# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger("sphinx-build")
# Add source directory to path
docs_dir = Path(__file__).parent
project_root = docs_dir.parent.parent
sys.path.insert(0, str(project_root / "src"))
# Project information
project = "pipecat-ai"
copyright = "2024, Daily"
author = "Daily"
# General configuration
extensions = [
"sphinx.ext.autodoc",
"sphinx.ext.napoleon",
"sphinx.ext.viewcode",
"sphinx.ext.intersphinx",
]
# Napoleon settings
napoleon_google_docstring = True
napoleon_numpy_docstring = False
napoleon_include_init_with_doc = True
# AutoDoc settings
autodoc_default_options = {
"members": True,
"member-order": "bysource",
"special-members": "__init__",
"undoc-members": True,
"exclude-members": "__weakref__",
"no-index": True,
"show-inheritance": True,
}
# Mock imports for optional dependencies
autodoc_mock_imports = [
"riva",
"livekit",
"pyht", # Base PlayHT package
"pyht.async_client", # PlayHT specific imports
"pyht.client",
"pyht.protos",
"pyht.protos.api_pb2",
"pipecat_ai_playht", # PlayHT wrapper
"anthropic",
"assemblyai",
"boto3",
"azure",
"cartesia",
"deepgram",
"elevenlabs",
"fal",
"gladia",
"google",
"krisp",
"langchain",
"lmnt",
"noisereduce",
"openai",
"openpipe",
"simli",
"soundfile",
# Existing mocks
"pipecat_ai_krisp",
"pyaudio",
"_tkinter",
"tkinter",
"daily",
"daily_python",
"pydantic.BaseModel",
"pydantic.Field",
"pydantic._internal._model_construction",
"pydantic._internal._fields",
]
# HTML output settings
html_theme = "sphinx_rtd_theme"
html_static_path = ["_static"]
autodoc_typehints = "description"
html_show_sphinx = False
def verify_modules():
"""Verify that required modules are available."""
required_modules = {
"services": [
"assemblyai",
"aws",
"cartesia",
"deepgram",
"google",
"lmnt",
"riva",
"simli",
],
"serializers": ["livekit"],
"vad": ["silero", "vad_analyzer"],
"transports": {
"services": ["daily", "livekit"],
"local": ["audio", "tk"],
"network": ["fastapi_websocket", "websocket_server"],
},
}
missing = []
for category, modules in required_modules.items():
if isinstance(modules, dict):
# Handle nested structure
for subcategory, submodules in modules.items():
for module in submodules:
try:
__import__(f"pipecat.{category}.{subcategory}.{module}")
logger.info(
f"Successfully imported pipecat.{category}.{subcategory}.{module}"
)
except (ImportError, TypeError, NameError) as e:
missing.append(f"pipecat.{category}.{subcategory}.{module}")
logger.warning(
f"Optional module not available: pipecat.{category}.{subcategory}.{module} - {str(e)}"
)
else:
# Handle flat structure
for module in modules:
try:
__import__(f"pipecat.{category}.{module}")
logger.info(f"Successfully imported pipecat.{category}.{module}")
except (ImportError, TypeError, NameError) as e:
missing.append(f"pipecat.{category}.{module}")
logger.warning(
f"Optional module not available: pipecat.{category}.{module} - {str(e)}"
)
if missing:
logger.warning(f"Some optional modules are not available: {missing}")
def clean_title(title: str) -> str:
"""Automatically clean module titles."""
# Remove everything after space (like 'module', 'processor', etc.)
title = title.split(" ")[0]
# Get the last part of the dot-separated path
parts = title.split(".")
title = parts[-1]
# Special cases for service names and common acronyms
special_cases = {
"ai": "AI",
"aws": "AWS",
"api": "API",
"vad": "VAD",
"assemblyai": "AssemblyAI",
"deepgram": "Deepgram",
"elevenlabs": "ElevenLabs",
"openai": "OpenAI",
"openpipe": "OpenPipe",
"playht": "PlayHT",
"xtts": "XTTS",
"lmnt": "LMNT",
}
# Check if the entire title is a special case
if title.lower() in special_cases:
return special_cases[title.lower()]
# Otherwise, capitalize each word
words = title.split("_")
cleaned_words = []
for word in words:
if word.lower() in special_cases:
cleaned_words.append(special_cases[word.lower()])
else:
cleaned_words.append(word.capitalize())
return " ".join(cleaned_words)
def setup(app):
"""Generate API documentation during Sphinx build."""
from sphinx.ext.apidoc import main
docs_dir = Path(__file__).parent
project_root = docs_dir.parent.parent
output_dir = str(docs_dir / "api")
source_dir = str(project_root / "src" / "pipecat")
# Clean existing files
if Path(output_dir).exists():
import shutil
shutil.rmtree(output_dir)
logger.info(f"Cleaned existing documentation in {output_dir}")
logger.info(f"Generating API documentation...")
logger.info(f"Output directory: {output_dir}")
logger.info(f"Source directory: {source_dir}")
excludes = [
str(project_root / "src/pipecat/pipeline/to_be_updated"),
str(project_root / "src/pipecat/processors/gstreamer"),
str(project_root / "src/pipecat/services/to_be_updated"),
str(project_root / "src/pipecat/vad"), # deprecated
"**/test_*.py",
"**/tests/*.py",
]
try:
main(
[
"-f", # Force overwriting
"-e", # Don't generate empty files
"-M", # Put module documentation before submodule documentation
"--no-toc", # Don't create a table of contents file
"--separate", # Put documentation for each module in its own page
"--module-first", # Module documentation before submodule documentation
"--implicit-namespaces", # Added: Handle implicit namespace packages
"-o",
output_dir,
source_dir,
]
+ excludes
)
logger.info("API documentation generated successfully!")
# Process generated RST files to update titles
for rst_file in Path(output_dir).glob("**/*.rst"): # Changed to recursive glob
content = rst_file.read_text()
lines = content.split("\n")
# Find and clean up the title
if lines and "=" in lines[1]: # Title is typically the first line
old_title = lines[0]
new_title = clean_title(old_title)
content = content.replace(old_title, new_title)
rst_file.write_text(content)
logger.info(f"Updated title: {old_title} -> {new_title}")
except Exception as e:
logger.error(f"Error generating API documentation: {e}", exc_info=True)
# Run module verification
verify_modules()

View File

@@ -1,77 +0,0 @@
Pipecat API Reference Docs
==========================
Welcome to Pipecat's API reference documentation!
Pipecat is an open source framework for building voice and multimodal assistants.
It provides a flexible pipeline architecture for connecting various AI services,
audio processing, and transport layers.
Quick Links
-----------
* `GitHub Repository <https://github.com/pipecat-ai/pipecat>`_
* `Website <https://pipecat.ai>`_
API Reference
-------------
Core Components
~~~~~~~~~~~~~~~
* :mod:`Frames <pipecat.frames>`
* :mod:`Processors <pipecat.processors>`
* :mod:`Pipeline <pipecat.pipeline>`
Audio Processing
~~~~~~~~~~~~~~~~
* :mod:`Audio <pipecat.audio>`
Services
~~~~~~~~
* :mod:`Services <pipecat.services>`
Transport & Serialization
~~~~~~~~~~~~~~~~~~~~~~~~~
* :mod:`Transports <pipecat.transports>`
* :mod:`Local <pipecat.transports.local>`
* :mod:`Network <pipecat.transports.network>`
* :mod:`Services <pipecat.transports.services>`
* :mod:`Serializers <pipecat.serializers>`
Utilities
~~~~~~~~~
* :mod:`Clocks <pipecat.clocks>`
* :mod:`Metrics <pipecat.metrics>`
* :mod:`Sync <pipecat.sync>`
* :mod:`Transcriptions <pipecat.transcriptions>`
* :mod:`Utils <pipecat.utils>`
.. toctree::
:maxdepth: 3
:caption: API Reference
:hidden:
Audio <api/pipecat.audio>
Clocks <api/pipecat.clocks>
Frames <api/pipecat.frames>
Metrics <api/pipecat.metrics>
Pipeline <api/pipecat.pipeline>
Processors <api/pipecat.processors>
Serializers <api/pipecat.serializers>
Services <api/pipecat.services>
Sync <api/pipecat.sync>
Transcriptions <api/pipecat.transcriptions>
Transports <api/pipecat.transports>
Utils <api/pipecat.utils>
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

View File

@@ -1,35 +0,0 @@
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=.
set BUILDDIR=_build
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.https://www.sphinx-doc.org/
exit /b 1
)
if "%1" == "" goto help
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
:end
popd

View File

@@ -1,40 +0,0 @@
# Sphinx dependencies
sphinx>=8.1.3
sphinx-rtd-theme
sphinx-markdown-builder
sphinx-autodoc-typehints
toml
# Install all extras individually to ensure they're properly resolved
pipecat-ai[anthropic]
pipecat-ai[assemblyai]
pipecat-ai[aws]
pipecat-ai[azure]
pipecat-ai[canonical]
pipecat-ai[cartesia]
pipecat-ai[daily]
pipecat-ai[deepgram]
pipecat-ai[elevenlabs]
pipecat-ai[fal]
pipecat-ai[fireworks]
pipecat-ai[gladia]
pipecat-ai[google]
pipecat-ai[grok]
pipecat-ai[groq]
# pipecat-ai[krisp] # Mocked instead
pipecat-ai[langchain]
pipecat-ai[livekit]
pipecat-ai[lmnt]
pipecat-ai[local]
pipecat-ai[moondream]
pipecat-ai[nim]
pipecat-ai[noisereduce]
pipecat-ai[openai]
# pipecat-ai[openpipe]
# pipecat-ai[playht] # Mocked due to grpcio conflict with riva
pipecat-ai[riva]
pipecat-ai[silero]
pipecat-ai[simli]
pipecat-ai[soundfile]
pipecat-ai[websocket]
pipecat-ai[whisper]

View File

@@ -1,38 +0,0 @@
#!/bin/bash
set -e
# Configuration
DOCS_DIR=$(pwd)
PROJECT_ROOT=$(cd ../../ && pwd)
TEST_DIR="/tmp/rtd-test-$(date +%Y%m%d_%H%M%S)"
echo "Creating test directory: $TEST_DIR"
mkdir -p "$TEST_DIR"
cd "$TEST_DIR"
# Create virtual environment
python -m venv venv
source venv/bin/activate
echo "Installing build dependencies..."
pip install --upgrade pip wheel setuptools
echo "Installing documentation dependencies..."
pip install -r "$DOCS_DIR/requirements.txt"
echo "Building documentation..."
cd "$DOCS_DIR"
sphinx-build -b html . "_build/html"
echo "Build complete. Check _build/html directory for output."
# Print summary
echo -e "\n=== Build Summary ==="
echo "Documentation: $DOCS_DIR/_build/html"
echo "Test environment: $TEST_DIR"
echo -e "\nTo view the documentation:"
echo "open $DOCS_DIR/_build/html/index.html"
# Print installed packages for verification
echo -e "\n=== Installed Packages ==="
pip freeze | grep -E "sphinx|pipecat"

View File

@@ -96,6 +96,9 @@ Notable control frames:
## 7. Special Purpose Frames
### AppFrame
Base class for application-specific custom frames.
### MetricsFrame
Contains performance metrics data.

View File

@@ -54,33 +54,5 @@ TAVUS_API_KEY=...
TAVUS_REPLICA_ID=...
TAVUS_PERSONA_ID=...
# Simli
SIMLI_API_KEY=...
SIMLI_FACE_ID=...
# Krisp
KRISP_MODEL_PATH=...
# DeepSeek
DEEPSEEK_API_KEY=...
# Groq
GROQ_API_KEY=...
# Grok
GROK_API_KEY=...
# Together.ai
TOGETHER_API_KEY=...
# Cerebras
CEREBRAS_API_KEY=...
# Fish Audio
FISH_API_KEY=...
# Assembly AI
ASSEMBLYAI_API_KEY=...
# OpenRouter
OPENROUTER_API_KEY=...
#Krisp
KRISP_MODEL_PATH=...

View File

@@ -39,10 +39,9 @@ Next, follow the steps in the README for each demo.
| [Translation Chatbot](translation-chatbot) | Listens for user speech, then translates that speech to Spanish and speaks the translation back. Demonstrates multi-participant use-cases. | Deepgram, Azure, OpenAI, Daily, Daily Prebuilt UI |
| [Moondream Chatbot](moondream-chatbot) | Demonstrates how to add vision capabilities to GPT4. **Note: works best with a GPU** | Deepgram, ElevenLabs, OpenAI, Moondream, Daily, Daily Prebuilt UI |
| [Patient intake](patient-intake) | A chatbot that can call functions in response to user input. | Deepgram, ElevenLabs, OpenAI, Daily, Daily Prebuilt UI |
| [Phone Chatbot](phone-chatbot) | A chatbot that connects to PSTN/SIP phone calls, powered by Daily or Twilio. | Deepgram, ElevenLabs, OpenAI, Daily, Twilio |
| [Dialin Chatbot](dialin-chatbot) | A chatbot that connects to an incoming phone call from Daily or Twilio. | Deepgram, ElevenLabs, OpenAI, Daily, Twilio |
| [Twilio Chatbot](twilio-chatbot) | A chatbot that connects to an incoming phone call from Twilio. | Deepgram, ElevenLabs, OpenAI, Daily, Twilio |
| [studypal](studypal) | A chatbot to have a conversation about any article on the web | |
| [WebSocket Chatbot Server](websocket-server) | A real-time websocket server that handles audio streaming and bot interactions with speech-to-text and text-to-speech capabilities. | Cartesia, Deepgram, OpenAI, Websockets |
> [!IMPORTANT]
> These example projects use Daily as a WebRTC transport and can be joined using their hosted Prebuilt UI.

View File

@@ -1,45 +0,0 @@
# Bot ready signaling
A simple Pipecat example demonstrating how to handle signaling between the client and the bot,
ensuring that the bot starts sending audio only when the client is available,
thereby avoiding the risk of cutting off the beginning of the audio.
## Quick Start
### First, start the bot server:
1. Navigate to the server directory:
```bash
cd server
```
2. Create and activate a virtual environment:
```bash
python3 -m venv venv
source venv/bin/activate # On Windows: venv\Scripts\activate
```
3. Install requirements:
```bash
pip install -r requirements.txt
```
4. Copy env.example to .env and configure:
- Add your API keys
5. Start the server:
```bash
python server.py
```
### Next, connect using the client app:
For client-side setup, refer to the [JavaScript Guide](client/javascript/README.md).
## Important Note
Ensure the bot server is running before using any client implementations.
## Requirements
- Python 3.10+
- Node.js 16+ (for JavaScript)
- Daily API key
- Cartesia API key
- Modern web browser with WebRTC support

View File

@@ -1,27 +0,0 @@
# JavaScript Implementation
Basic implementation using the [Pipecat JavaScript SDK](https://docs.pipecat.ai/client/js/introduction).
## Setup
1. Run the bot server. See the [server README](../../README).
2. Navigate to the `client/javascript` directory:
```bash
cd client/javascript
```
3. Install dependencies:
```bash
npm install
```
4. Run the client app:
```
npm run dev
```
5. Visit http://localhost:5173 in your browser.

View File

@@ -1,34 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>AI Chatbot</title>
</head>
<body>
<div class="container">
<div class="status-bar">
<div class="status">
Status: <span id="connection-status">Disconnected</span>
</div>
<div class="controls">
<button id="connect-btn">Connect</button>
<button id="disconnect-btn" disabled>Disconnect</button>
</div>
</div>
<audio id="bot-audio" autoplay></audio>
<div class="debug-panel">
<h3>Debug Info</h3>
<div id="debug-log"></div>
</div>
</div>
<script type="module" src="/src/app.js"></script>
<link rel="stylesheet" href="/src/style.css">
</body>
</html>

File diff suppressed because it is too large Load Diff

View File

@@ -1,20 +0,0 @@
{
"name": "client",
"version": "1.0.0",
"main": "index.js",
"scripts": {
"dev": "vite",
"build": "vite build",
"preview": "vite preview"
},
"keywords": [],
"author": "",
"license": "ISC",
"description": "",
"devDependencies": {
"vite": "^6.0.9"
},
"dependencies": {
"@daily-co/daily-js": "0.74.0"
}
}

View File

@@ -1,216 +0,0 @@
/**
* Copyright (c) 20242025, Daily
*
* SPDX-License-Identifier: BSD 2-Clause License
*/
import Daily from "@daily-co/daily-js";
/**
* ChatbotClient handles the connection and media management for a real-time
* voice interaction with an AI bot.
*/
class ChatbotClient {
constructor() {
// Initialize client state
this.dailyCallObject = null;
this.setupDOMElements();
this.setupEventListeners();
}
/**
* Set up references to DOM elements and create necessary media elements
*/
setupDOMElements() {
// Get references to UI control elements
this.connectBtn = document.getElementById('connect-btn');
this.disconnectBtn = document.getElementById('disconnect-btn');
this.statusSpan = document.getElementById('connection-status');
this.debugLog = document.getElementById('debug-log');
// Create an audio element for bot's voice output
this.botAudio = document.createElement('audio');
this.botAudio.autoplay = true;
this.botAudio.playsInline = true;
document.body.appendChild(this.botAudio);
}
/**
* Set up event listeners for connect/disconnect buttons
*/
setupEventListeners() {
this.connectBtn.addEventListener('click', () => this.connect());
this.disconnectBtn.addEventListener('click', () => this.disconnect());
}
/**
* Add a timestamped message to the debug log
*/
log(message) {
const entry = document.createElement('div');
entry.textContent = `${new Date().toISOString()} - ${message}`;
// Add styling based on message type
if (message.startsWith('User: ')) {
entry.style.color = '#2196F3'; // blue for user
} else if (message.startsWith('Bot: ')) {
entry.style.color = '#4CAF50'; // green for bot
}
this.debugLog.appendChild(entry);
this.debugLog.scrollTop = this.debugLog.scrollHeight;
console.log(message);
}
/**
* Update the connection status display
*/
updateStatus(status) {
this.statusSpan.textContent = status;
this.log(`Status: ${status}`);
}
handleEventToConsole (evt) {
this.log(`Received event: ${evt.action}`);
};
/**
* Set up listeners for track events (start/stop)
* This handles new tracks being added during the session
*/
setupTrackListeners() {
if (!this.dailyCallObject) return;
this.dailyCallObject.on("joined-meeting", () => {
this.updateStatus('Connected');
this.connectBtn.disabled = true;
this.disconnectBtn.disabled = false;
this.log('Client connected');
});
this.dailyCallObject.on("track-started", (evt) => {
if (evt.track.kind === "audio" && evt.participant.local === false) {
this.log("Audio track started.")
this.setupAudioTrack(evt.track);
}
});
this.dailyCallObject.on("track-stopped", this.handleEventToConsole.bind(this));
this.dailyCallObject.on("participant-joined", this.handleEventToConsole.bind(this));
this.dailyCallObject.on("participant-updated", this.handleEventToConsole.bind(this));
this.dailyCallObject.on("participant-left", () => {
// When the bot leaves, we are also disconnecting from the call
this.disconnect()
});
this.dailyCallObject.on("left-meeting", () => {
this.updateStatus('Disconnected');
this.connectBtn.disabled = false;
this.disconnectBtn.disabled = true;
this.log('Client disconnected');
});
this.dailyCallObject.on("error", this.handleEventToConsole.bind(this));
}
/**
* Set up an audio track for playback
* Handles both initial setup and track updates
*/
setupAudioTrack(track) {
this.log(`Setting up audio track, track state: ${track.readyState}, muted: ${track.muted}`);
// Check if we're already playing this track
if (this.botAudio.srcObject) {
const oldTrack = this.botAudio.srcObject.getAudioTracks()[0];
if (oldTrack?.id === track.id) return;
}
// Create a new MediaStream with the track and set it as the audio source
this.botAudio.srcObject = new MediaStream([track]);
this.botAudio.onplaying = async (event) => {
this.log("onplaying")
this.log("Will send the audio message to play the audio at the next tick")
this.dailyCallObject.sendAppMessage("playable")
}
}
async fetchRoomInfo() {
let connectUrl = '/connect'
let res = await fetch(connectUrl, {
method: "POST",
mode: "cors",
headers: new Headers({
"Content-Type": "application/json"
}),
})
if (res.ok) {
return res.json();
}
}
/**
* Initialize and connect to the bot
* This sets up the RTVI client, initializes devices, and establishes the connection
*/
async connect() {
try {
// Initialize the client
this.dailyCallObject = Daily.createCallObject({
subscribeToTracksAutomatically: true,
});
// Set up listeners for media track events
this.setupTrackListeners();
this.log('Creating the bot...');
let roomInfo = await this.fetchRoomInfo()
// Connect to the bot
this.log('Connecting to bot...');
// Only for making debugger easier
window.callObject = this.dailyCallObject;
await this.dailyCallObject.join({
url: roomInfo.room_url,
});
this.log('Connection complete');
} catch (error) {
// Handle any errors during connection
this.log(`Error connecting: ${error.message}`);
this.log(`Error stack: ${error.stack}`);
this.updateStatus('Error');
// Clean up if there's an error
if (this.dailyCallObject) {
try {
await this.dailyCallObject.leave();
} catch (disconnectError) {
this.log(`Error during disconnect: ${disconnectError.message}`);
}
}
}
}
/**
* Disconnect from the bot and clean up media resources
*/
async disconnect() {
if (this.dailyCallObject) {
try {
// Disconnect the RTVI client
await this.dailyCallObject.leave();
await this.dailyCallObject.destroy();
this.dailyCallObject = null;
// Clean up audio
if (this.botAudio.srcObject) {
this.botAudio.srcObject.getTracks().forEach((track) => track.stop());
this.botAudio.srcObject = null;
}
} catch (error) {
this.log(`Error disconnecting: ${error.message}`);
}
}
}
}
// Initialize the client when the page loads
window.addEventListener('DOMContentLoaded', () => {
new ChatbotClient();
});

View File

@@ -1,98 +0,0 @@
body {
margin: 0;
padding: 20px;
font-family: Arial, sans-serif;
background-color: #f0f0f0;
}
.container {
max-width: 1200px;
margin: 0 auto;
}
.status-bar {
display: flex;
justify-content: space-between;
align-items: center;
padding: 10px;
background-color: #fff;
border-radius: 8px;
margin-bottom: 20px;
}
.controls button {
padding: 8px 16px;
margin-left: 10px;
border: none;
border-radius: 4px;
cursor: pointer;
}
#connect-btn {
background-color: #4caf50;
color: white;
}
#disconnect-btn {
background-color: #f44336;
color: white;
}
button:disabled {
opacity: 0.5;
cursor: not-allowed;
}
.main-content {
background-color: #fff;
border-radius: 8px;
padding: 20px;
margin-bottom: 20px;
}
.bot-container {
display: flex;
flex-direction: column;
align-items: center;
}
#bot-video-container {
width: 640px;
height: 360px;
background-color: #e0e0e0;
border-radius: 8px;
margin: 20px auto;
overflow: hidden;
display: flex;
align-items: center;
justify-content: center;
}
#bot-video-container video {
width: 100%;
height: 100%;
object-fit: cover;
}
.debug-panel {
background-color: #fff;
border-radius: 8px;
padding: 20px;
}
.debug-panel h3 {
margin: 0 0 10px 0;
font-size: 16px;
font-weight: bold;
}
#debug-log {
height: 200px;
overflow-y: auto;
background-color: #f8f8f8;
padding: 10px;
border-radius: 4px;
font-family: monospace;
font-size: 12px;
line-height: 1.4;
}

View File

@@ -1,13 +0,0 @@
import { defineConfig } from 'vite';
export default defineConfig({
server: {
proxy: {
// Proxy /api requests to the backend server
'/connect': {
target: 'http://0.0.0.0:7860', // Replace with your backend URL
changeOrigin: true,
},
},
},
});

View File

@@ -1,50 +0,0 @@
# Bot ready signaling Server
A FastAPI server that manages bot instances and provide endpoint for Pipecat client connections.
## Endpoints
- `POST /connect` - Pipecat client connection endpoint
## Environment Variables
Copy `env.example` to `.env` and configure:
```ini
# Required API Keys
DAILY_API_KEY= # Your Daily API key
CARTESIA_API_KEY= # Your Cartesia API key
# Optional Configuration
DAILY_API_URL= # Optional: Daily API URL (defaults to https://api.daily.co/v1)
DAILY_SAMPLE_ROOM_URL= # Optional: Fixed room URL for development
HOST= # Optional: Host address (defaults to 0.0.0.0)
FAST_API_PORT= # Optional: Port number (defaults to 7860)
```
## Running the Server
Set up and activate your virtual environment:
```bash
python3 -m venv venv
source venv/bin/activate # On Windows: venv\Scripts\activate
```
Install dependencies:
```bash
pip install -r requirements.txt
```
If you want to use the local version of `pipecat` in this repo rather than the last published version, also run:
```bash
pip install --editable "../../../[daily,cartesia,openai]"
```
Run the server:
```bash
python server.py
```

View File

@@ -1,3 +0,0 @@
DAILY_SAMPLE_ROOM_URL=https://yourdomain.daily.co/yourroom # (for joining the bot to the same room repeatedly for local dev)
DAILY_API_KEY=
CARTESIA_API_KEY=

View File

@@ -1,4 +0,0 @@
python-dotenv
fastapi[all]
uvicorn
pipecat-ai[daily,cartesia,openai]

View File

@@ -1,64 +0,0 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import os
from typing import Optional
import aiohttp
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
async def configure(aiohttp_session: aiohttp.ClientSession):
(url, token, _) = await configure_with_args(aiohttp_session)
return (url, token)
async def configure_with_args(
aiohttp_session: aiohttp.ClientSession, parser: Optional[argparse.ArgumentParser] = None
):
if not parser:
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
parser.add_argument(
"-u", "--url", type=str, required=False, help="URL of the Daily room to join"
)
parser.add_argument(
"-k",
"--apikey",
type=str,
required=False,
help="Daily API Key (needed to create an owner token for the room)",
)
args, unknown = parser.parse_known_args()
url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL")
key = args.apikey or os.getenv("DAILY_API_KEY")
if not url:
raise Exception(
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL."
)
if not key:
raise Exception(
"No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers."
)
daily_rest_helper = DailyRESTHelper(
daily_api_key=key,
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
aiohttp_session=aiohttp_session,
)
# Create a meeting token for the given room with an expiration 1 hour in
# the future.
expiry_time: float = 60 * 60
token = await daily_rest_helper.get_token(url, expiry_time)
return (url, token, args)

View File

@@ -1,147 +0,0 @@
#
# Copyright (c) 2025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import os
import subprocess
from contextlib import asynccontextmanager
from typing import Any, Dict
import aiohttp
from dotenv import load_dotenv
from fastapi import FastAPI, HTTPException, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomParams
# Load environment variables from .env file
load_dotenv(override=True)
# Dictionary to track bot processes: {pid: (process, room_url)}
bot_procs = {}
# Store Daily API helpers
daily_helpers = {}
def cleanup():
"""Cleanup function to terminate all bot processes.
Called during server shutdown.
"""
for entry in bot_procs.values():
proc = entry[0]
proc.terminate()
proc.wait()
@asynccontextmanager
async def lifespan(app: FastAPI):
"""FastAPI lifespan manager that handles startup and shutdown tasks.
- Creates aiohttp session
- Initializes Daily API helper
- Cleans up resources on shutdown
"""
aiohttp_session = aiohttp.ClientSession()
daily_helpers["rest"] = DailyRESTHelper(
daily_api_key=os.getenv("DAILY_API_KEY", ""),
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
aiohttp_session=aiohttp_session,
)
yield
await aiohttp_session.close()
cleanup()
# Initialize FastAPI app with lifespan manager
app = FastAPI(lifespan=lifespan)
# Configure CORS to allow requests from any origin
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
async def create_room_and_token() -> tuple[str, str]:
"""Helper function to create a Daily room and generate an access token.
Returns:
tuple[str, str]: A tuple containing (room_url, token)
Raises:
HTTPException: If room creation or token generation fails
"""
room = await daily_helpers["rest"].create_room(DailyRoomParams())
if not room.url:
raise HTTPException(status_code=500, detail="Failed to create room")
token = await daily_helpers["rest"].get_token(room.url)
if not token:
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room.url}")
return room.url, token
@app.post("/connect")
async def bot_connect(request: Request) -> Dict[Any, Any]:
"""Connect endpoint that creates a room and returns connection credentials.
This endpoint is called by client to establish a connection.
Returns:
Dict[Any, Any]: Authentication bundle containing room_url and token
Raises:
HTTPException: If room creation, token generation, or bot startup fails
"""
print("Creating room for RTVI connection")
room_url, token = await create_room_and_token()
print(f"Room URL: {room_url}")
# Start the bot process
try:
bot_file = "signalling_bot"
proc = subprocess.Popen(
[f"python3 -m {bot_file} -u {room_url} -t {token}"],
shell=True,
bufsize=1,
cwd=os.path.dirname(os.path.abspath(__file__)),
)
bot_procs[proc.pid] = (proc, room_url)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to start subprocess: {e}")
# Return the authentication bundle in format expected by DailyTransport
return {"room_url": room_url, "token": token}
if __name__ == "__main__":
import uvicorn
# Parse command line arguments for server configuration
default_host = os.getenv("HOST", "0.0.0.0")
default_port = int(os.getenv("FAST_API_PORT", "7860"))
parser = argparse.ArgumentParser(description="Daily Travel Companion FastAPI server")
parser.add_argument("--host", type=str, default=default_host, help="Host address")
parser.add_argument("--port", type=int, default=default_port, help="Port number")
parser.add_argument("--reload", action="store_true", help="Reload code on change")
config = parser.parse_args()
# Start the FastAPI server
uvicorn.run(
"server:app",
host=config.host,
port=config.port,
reload=config.reload,
)

View File

@@ -1,95 +0,0 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import os
import sys
from dataclasses import dataclass
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.frames.frames import AudioRawFrame, EndFrame, OutputAudioRawFrame, TTSSpeakFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.services.cartesia import CartesiaTTSService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
@dataclass
class SilenceFrame(OutputAudioRawFrame):
def __init__(
self,
*,
sample_rate: int,
duration: float,
):
# Initialize the parent class with the silent frame's data
super().__init__(
audio=self.create_silent_audio_frame(sample_rate, 1, duration).audio,
sample_rate=sample_rate,
num_channels=1,
)
@staticmethod
def create_silent_audio_frame(
sample_rate: int, num_channels: int, duration: float
) -> AudioRawFrame:
"""Create an AudioRawFrame containing silence."""
frame_size = num_channels * 2 # 2 bytes per sample for 16-bit audio
total_frames = int(sample_rate * duration)
total_bytes = total_frames * frame_size
silent_audio = bytes(total_bytes) # Create a byte array filled with zeros
return AudioRawFrame(audio=silent_audio, sample_rate=sample_rate, num_channels=num_channels)
async def main():
async with aiohttp.ClientSession() as session:
(room_url, _) = await configure(session)
transport = DailyTransport(
room_url, None, "Say One Thing", DailyParams(audio_out_enabled=True)
)
tts = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
)
runner = PipelineRunner()
task = PipelineTask(Pipeline([tts, transport.output()]))
# Register an event handler so we can play the audio when we receive a specific message
@transport.event_handler("on_app_message")
async def on_app_message(transport, message, sender):
logger.debug(f"Received app message: {message} - {sender}")
if "playable" not in message:
return
await task.queue_frames(
[
SilenceFrame(
sample_rate=task.params.audio_out_sample_rate,
duration=0.5,
),
TTSSpeakFrame(f"Hello there, how are you doing today ?"),
EndFrame(),
]
)
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
@@ -15,7 +15,7 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame
from pipecat.frames.frames import EndFrame, LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -65,6 +65,7 @@ async def main():
# English
#
voice_id="cgSgspJ2msm6clMCkdW9",
aiohttp_session=session,
#
# Spanish
#
@@ -96,15 +97,15 @@ async def main():
call completion, CanonicalMetrics will send the audio buffer to Canonical for
analysis. Visit https://voice.canonical.chat to learn more.
"""
audio_buffer_processor = AudioBufferProcessor(num_channels=2)
audio_buffer_processor = AudioBufferProcessor()
canonical = CanonicalMetricsService(
audio_buffer_processor=audio_buffer_processor,
aiohttp_session=session,
api_key=os.getenv("CANONICAL_API_KEY"),
api_url=os.getenv("CANONICAL_API_URL"),
call_id=str(uuid.uuid4()),
assistant="pipecat-chatbot",
assistant_speaks_first=True,
context=context,
)
pipeline = Pipeline(
[
@@ -123,20 +124,17 @@ async def main():
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await audio_buffer_processor.start_recording()
await transport.capture_participant_transcription(participant["id"])
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMMessagesFrame(messages)])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
print(f"Participant left: {participant}")
await task.cancel()
await task.queue_frame(EndFrame())
@transport.event_handler("on_call_state_updated")
async def on_call_state_updated(transport, state):
if state == "left":
# Here we don't want to cancel, we just want to finish sending
# whatever is queued, so we use an EndFrame().
await task.queue_frame(EndFrame())
runner = PipelineRunner()

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
@@ -53,3 +53,4 @@ async def configure(aiohttp_session: aiohttp.ClientSession):
token = await daily_rest_helper.get_token(url, expiry_time)
return (url, token)
return (url, token)

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#

View File

@@ -1,23 +1,22 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import datetime
import io
import os
import sys
import wave
import aiofiles
import aiohttp
import datetime
import wave
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame, LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -33,17 +32,15 @@ logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def save_audio(audio: bytes, sample_rate: int, num_channels: int):
if len(audio) > 0:
async def save_audio(audiobuffer):
if audiobuffer.has_audio():
merged_audio = audiobuffer.merge_audio_buffers()
filename = f"conversation_recording{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
with io.BytesIO() as buffer:
with wave.open(buffer, "wb") as wf:
wf.setsampwidth(2)
wf.setnchannels(num_channels)
wf.setframerate(sample_rate)
wf.writeframes(audio)
async with aiofiles.open(filename, "wb") as file:
await file.write(buffer.getvalue())
with wave.open(filename, "wb") as wf:
wf.setnchannels(2)
wf.setsampwidth(2)
wf.setframerate(audiobuffer._sample_rate)
wf.writeframes(merged_audio)
print(f"Merged audio saved to {filename}")
else:
print("No audio data to save")
@@ -82,6 +79,7 @@ async def main():
# English
#
voice_id="cgSgspJ2msm6clMCkdW9",
aiohttp_session=session,
#
# Spanish
#
@@ -108,10 +106,7 @@ async def main():
context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)
# NOTE: Watch out! This will save all the conversation in memory. You
# can pass `buffer_size` to get periodic callbacks.
audiobuffer = AudioBufferProcessor()
pipeline = Pipeline(
[
transport.input(), # microphone
@@ -126,20 +121,16 @@ async def main():
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
@audiobuffer.event_handler("on_audio_data")
async def on_audio_data(buffer, audio, sample_rate, num_channels):
await save_audio(audio, sample_rate, num_channels)
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await audiobuffer.start_recording()
await transport.capture_participant_transcription(participant["id"])
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMMessagesFrame(messages)])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
print(f"Participant left: {participant}")
await task.cancel()
await task.queue_frame(EndFrame())
await save_audio(audiobuffer)
runner = PipelineRunner()

View File

@@ -1,4 +1,3 @@
aiofiles
python-dotenv
fastapi[all]
uvicorn

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#

View File

@@ -1,21 +1,22 @@
import argparse
import asyncio
import os
import sys
from dotenv import load_dotenv
from loguru import logger
import argparse
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.frames.frames import LLMMessagesFrame, EndFrame
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.elevenlabs import ElevenLabsTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.services.elevenlabs import ElevenLabsTTSService
from pipecat.transports.services.daily import DailyParams, DailyTransport
from loguru import logger
from dotenv import load_dotenv
load_dotenv(override=True)
logger.remove(0)
@@ -75,17 +76,15 @@ async def main(room_url: str, token: str):
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMMessagesFrame(messages)])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
await task.queue_frame(EndFrame())
@transport.event_handler("on_call_state_updated")
async def on_call_state_updated(transport, state):
if state == "left":
# Here we don't want to cancel, we just want to finish sending
# whatever is queued, so we use an EndFrame().
await task.queue_frame(EndFrame())
runner = PipelineRunner()

View File

@@ -1,27 +1,29 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import aiohttp
import argparse
import os
import subprocess
import os
from contextlib import asynccontextmanager
import aiohttp
from dotenv import load_dotenv
from fastapi import FastAPI, HTTPException, Request
from fastapi import FastAPI, Request, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from pipecat.transports.services.helpers.daily_rest import (
DailyRESTHelper,
DailyRoomObject,
DailyRoomParams,
DailyRoomProperties,
DailyRoomParams,
)
from dotenv import load_dotenv
load_dotenv(override=True)

View File

@@ -2,11 +2,12 @@ import os
import aiohttp
import modal
from bot import _voice_bot_process
from fastapi import HTTPException
from fastapi.responses import JSONResponse
from loguru import logger
from bot import _voice_bot_process
MAX_SESSION_TIME = 15 * 60 # 15 minutes
app = modal.App("pipecat-modal")

View File

@@ -5,15 +5,6 @@ import sys
from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.cartesia import CartesiaTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
logger.remove(0)
@@ -21,6 +12,16 @@ logger.add(sys.stderr, level="DEBUG")
async def main(room_url: str, token: str):
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame, LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.cartesia import CartesiaTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
transport = DailyTransport(
room_url,
token,
@@ -74,11 +75,11 @@ async def main(room_url: str, token: str):
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMMessagesFrame(messages)])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
await task.queue_frame(EndFrame())
runner = PipelineRunner()

View File

@@ -1,5 +1,5 @@
python-dotenv==1.0.1
modal==0.71.3
pipecat-ai[daily,silero,cartesia,openai]==0.0.52
fastapi==0.115.6
aiohttp==3.11.11
modal==0.65.48
pipecat-ai[daily,silero,cartesia,openai]==0.0.48
fastapi==0.115.4
aiohttp==3.10.10

View File

@@ -0,0 +1,85 @@
<div align="center">
 <img alt="pipecat" width="300px" height="auto" src="image.png">
</div>
# Dialin example
Example project that demonstrates how to add phone number dialin to your Pipecat bots. We include examples for both Daily (`bot_daily.py`) and Twilio (`bot_twilio.py`), depending on who you want to use as a phone vendor.
- 🔁 Transport: Daily WebRTC
- 💬 Speech-to-Text: Deepgram via Daily transport
- 🤖 LLM: GPT4-o / OpenAI
- 🔉 Text-to-Speech: ElevenLabs
#### Should I use Daily or Twilio as a vendor?
If you're starting from scratch, using Daily to provision phone numbers alongside Daily as a transport offers some convenience (such as automatic call forwarding.)
If you already have Twilio numbers and workflows that you want to connect to your Pipecat bots, there is some additional configuration required (you'll need to create a `on_dialin_ready` and use the Twilio client to trigger the forward.)
You can read more about this, as well as see respective walkthroughs in our docs.
## Setup
```shell
# Install the requirements
pip install -r requirements.txt
# Setup your env
mv env.example .env
```
## Using Daily numbers
Run `bot_runner.py` to handle incoming HTTP requests:
`python bot_runner.py --host localhost`
Then target the following URL:
`POST /daily_start_bot`
For more configuration options, please consult Daily's API documentation.
## Using Twilio numbers
As above, but target the following URL:
`POST /twilio_start_bot`
For more configuration options, please consult Twilio's API documentation.
## Deployment example
A Dockerfile is included in this demo for convenience. Here is an example of how to build and deploy your bot to [fly.io](https://fly.io).
*Please note: This demo spawns agents as subprocesses for convenience / demonstration purposes. You would likely not want to do this in production as it would limit concurrency to available system resources. For more information on how to deploy your bots using VMs, refer to the Pipecat documentation.*
### Build the docker image
`docker build -t tag:project .`
### Launch the fly project
`mv fly.example.toml fly.toml`
`fly launch` (using the included fly.toml)
### Setup your secrets on Fly
Set the necessary secrets (found in `env.example`)
`fly secrets set DAILY_API_KEY=... OPENAI_API_KEY=... ELEVENLABS_API_KEY=... ELEVENLABS_VOICE_ID=...`
If you're using Twilio as a number vendor:
`fly secrets set TWILIO_ACCOUNT_SID=... TWILIO_AUTH_TOKEN=...`
### Deploy!
`fly deploy`
## Need to do something more advanced?
This demo covers the basics of bot telephony. If you want to know more about working with PSTN / SIP, please ping us on [Discord](https://discord.gg/pipecat).

View File

@@ -0,0 +1,104 @@
import asyncio
import os
import sys
import argparse
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.frames.frames import LLMMessagesFrame, EndFrame
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.elevenlabs import ElevenLabsTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport, DailyDialinSettings
from loguru import logger
from dotenv import load_dotenv
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
daily_api_key = os.getenv("DAILY_API_KEY", "")
daily_api_url = os.getenv("DAILY_API_URL", "https://api.daily.co/v1")
async def main(room_url: str, token: str, callId: str, callDomain: str):
# diallin_settings are only needed if Daily's SIP URI is used
# If you are handling this via Twilio, Telnyx, set this to None
# and handle call-forwarding when on_dialin_ready fires.
diallin_settings = DailyDialinSettings(call_id=callId, call_domain=callDomain)
transport = DailyTransport(
room_url,
token,
"Chatbot",
DailyParams(
api_url=daily_api_url,
api_key=daily_api_key,
dialin_settings=diallin_settings,
audio_in_enabled=True,
audio_out_enabled=True,
camera_out_enabled=False,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
transcription_enabled=True,
),
)
tts = ElevenLabsTTSService(
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
)
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
messages = [
{
"role": "system",
"content": "You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by saying 'Oh, hello! Who dares dial me at this hour?!'.",
},
]
context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)
pipeline = Pipeline(
[
transport.input(),
context_aggregator.user(),
llm,
tts,
transport.output(),
context_aggregator.assistant(),
]
)
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
await task.queue_frames([LLMMessagesFrame(messages)])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Pipecat Simple ChatBot")
parser.add_argument("-u", type=str, help="Room URL")
parser.add_argument("-t", type=str, help="Token")
parser.add_argument("-i", type=str, help="Call ID")
parser.add_argument("-d", type=str, help="Call Domain")
config = parser.parse_args()
asyncio.run(main(config.u, config.t, config.i, config.d))

View File

@@ -7,14 +7,14 @@ provisioning a room and starting a Pipecat bot in response.
Refer to README for more information.
"""
import argparse
import aiohttp
import os
import argparse
import subprocess
from contextlib import asynccontextmanager
import aiohttp
from dotenv import load_dotenv
from fastapi import FastAPI, HTTPException, Request
from fastapi import FastAPI, Request, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, PlainTextResponse
from twilio.twiml.voice_response import VoiceResponse
@@ -22,11 +22,13 @@ from twilio.twiml.voice_response import VoiceResponse
from pipecat.transports.services.helpers.daily_rest import (
DailyRESTHelper,
DailyRoomObject,
DailyRoomParams,
DailyRoomProperties,
DailyRoomSipParams,
DailyRoomParams,
)
from dotenv import load_dotenv
load_dotenv(override=True)
@@ -73,29 +75,24 @@ action using the Twilio Client library.
"""
async def _create_daily_room(
room_url, callId, callDomain=None, dialoutNumber=None, vendor="daily", detect_voicemail=False
):
async def _create_daily_room(room_url, callId, callDomain=None, vendor="daily"):
if not room_url:
# Create base properties with SIP settings
properties = DailyRoomProperties(
sip=DailyRoomSipParams(
display_name="dialin-user", video=False, sip_mode="dial-in", num_endpoints=1
params = DailyRoomParams(
properties=DailyRoomProperties(
# Note: these are the default values, except for the display name
sip=DailyRoomSipParams(
display_name="dialin-user", video=False, sip_mode="dial-in", num_endpoints=1
)
)
)
# Only enable dialout if dialoutNumber is provided
if dialoutNumber:
properties.enable_dialout = True
params = DailyRoomParams(properties=properties)
print(f"Creating new room...")
room: DailyRoomObject = await daily_helpers["rest"].create_room(params=params)
else:
# Check passed room URL exist (we assume that it already has a sip set up!)
try:
print(f"Joining existing room: {room_url}")
room: DailyRoomObject = await daily_helpers["rest"].get_room_from_url(room_url)
except Exception:
raise HTTPException(status_code=500, detail=f"Room not found: {room_url}")
@@ -111,9 +108,7 @@ async def _create_daily_room(
# Spawn a new agent, and join the user session
# Note: this is mostly for demonstration purposes (refer to 'deployment' in docs)
if vendor == "daily":
bot_proc = f"python3 -m bot_daily -u {room.url} -t {token} -i {callId} -d {callDomain}{' -v' if detect_voicemail else ''}"
if dialoutNumber:
bot_proc += f" -o {dialoutNumber}"
bot_proc = f"python3 -m bot_daily -u {room.url} -t {token} -i {callId} -d {callDomain}"
else:
bot_proc = f"python3 -m bot_twilio -u {room.url} -t {token} -i {callId} -s {room.config.sip_endpoint}"
@@ -184,18 +179,13 @@ async def daily_start_bot(request: Request) -> JSONResponse:
if "test" in data:
# Pass through any webhook checks
return JSONResponse({"test": True})
detect_voicemail = data.get("detectVoicemail", False)
callId = data.get("callId", None)
callDomain = data.get("callDomain", None)
dialoutNumber = data.get("dialoutNumber", None)
except Exception:
raise HTTPException(
status_code=500, detail="Missing properties 'callId', 'callDomain', or 'dialoutNumber'"
)
raise HTTPException(status_code=500, detail="Missing properties 'callId' or 'callDomain'")
room: DailyRoomObject = await _create_daily_room(
room_url, callId, callDomain, dialoutNumber, "daily", detect_voicemail
)
print(f"CallId: {callId}, CallDomain: {callDomain}")
room: DailyRoomObject = await _create_daily_room(room_url, callId, callDomain, "daily")
# Grab a token for the user to join with
return JSONResponse({"room_url": room.url, "sipUri": room.config.sip_endpoint})

View File

@@ -1,21 +1,24 @@
import argparse
import asyncio
import os
import sys
from dotenv import load_dotenv
from loguru import logger
from twilio.rest import Client
import argparse
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.frames.frames import LLMMessagesFrame, EndFrame
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.elevenlabs import ElevenLabsTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
from twilio.rest import Client
from loguru import logger
from dotenv import load_dotenv
load_dotenv(override=True)
logger.remove(0)
@@ -82,11 +85,11 @@ async def main(room_url: str, token: str, callId: str, sipUri: str):
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMMessagesFrame(messages)])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
await task.queue_frame(EndFrame())
@transport.event_handler("on_dialin_ready")
async def on_dialin_ready(transport, cdata):

View File

Before

Width:  |  Height:  |  Size: 19 KiB

After

Width:  |  Height:  |  Size: 19 KiB

View File

@@ -1,25 +1,27 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import aiohttp
import os
import sys
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.frames.frames import EndFrame, TTSSpeakFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineTask
from pipecat.pipeline.runner import PipelineRunner
from pipecat.services.cartesia import CartesiaTTSService
from pipecat.transports.services.daily import DailyParams, DailyTransport
from runner import configure
from loguru import logger
from dotenv import load_dotenv
load_dotenv(override=True)
logger.remove(0)

View File

@@ -1,22 +1,25 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import aiohttp
import os
import sys
from dotenv import load_dotenv
from loguru import logger
from pipecat.frames.frames import EndFrame, TTSSpeakFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineTask
from pipecat.services.cartesia import CartesiaTTSService
from pipecat.transports.local.audio import LocalAudioTransport, LocalAudioTransportParams
from pipecat.transports.base_transport import TransportParams
from pipecat.transports.local.audio import LocalAudioTransport
from loguru import logger
from dotenv import load_dotenv
load_dotenv(override=True)
@@ -25,7 +28,7 @@ logger.add(sys.stderr, level="DEBUG")
async def main():
transport = LocalAudioTransport(LocalAudioTransportParams(audio_out_enabled=True))
transport = LocalAudioTransport(TransportParams(audio_out_enabled=True))
tts = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
@@ -40,7 +43,7 @@ async def main():
await asyncio.sleep(1)
await task.queue_frames([TTSSpeakFrame("Hello there, how is it going!"), EndFrame()])
runner = PipelineRunner(handle_sigint=False if sys.platform == "win32" else True)
runner = PipelineRunner()
await asyncio.gather(runner.run(task), say_something())

View File

@@ -4,9 +4,6 @@ import os
import sys
import aiohttp
from dotenv import load_dotenv
from livekit import api
from loguru import logger
from pipecat.frames.frames import TextFrame
from pipecat.pipeline.pipeline import Pipeline
@@ -15,6 +12,12 @@ from pipecat.pipeline.task import PipelineTask
from pipecat.services.cartesia import CartesiaTTSService
from pipecat.transports.services.livekit import LiveKitParams, LiveKitTransport
from livekit import api
from loguru import logger
from dotenv import load_dotenv
load_dotenv(override=True)
logger.remove(0)

View File

@@ -1,54 +0,0 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import os
import sys
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.frames.frames import EndFrame, TTSSpeakFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineTask
from pipecat.services.riva import FastPitchTTSService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main():
async with aiohttp.ClientSession() as session:
(room_url, _) = await configure(session)
transport = DailyTransport(
room_url, None, "Say One Thing", DailyParams(audio_out_enabled=True)
)
tts = FastPitchTTSService(api_key=os.getenv("NVIDIA_API_KEY"))
runner = PipelineRunner()
task = PipelineTask(Pipeline([tts, transport.output()]))
# Register an event handler so we can play the audio when the
# participant joins.
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
participant_name = participant.get("info", {}).get("userName", "")
await task.queue_frames([TTSSpeakFrame(f"Aloha, {participant_name}!"), EndFrame()])
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -1,18 +1,14 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import aiohttp
import os
import sys
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.frames.frames import EndFrame, LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
@@ -21,6 +17,12 @@ from pipecat.services.cartesia import CartesiaTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
from runner import configure
from loguru import logger
from dotenv import load_dotenv
load_dotenv(override=True)
logger.remove(0)

View File

@@ -1,25 +1,27 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import aiohttp
import os
import sys
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.frames.frames import TextFrame
from pipecat.frames.frames import EndFrame, TextFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineTask
from pipecat.services.fal import FalImageGenService
from pipecat.transports.services.daily import DailyParams, DailyTransport
from runner import configure
from loguru import logger
from dotenv import load_dotenv
load_dotenv(override=True)
logger.remove(0)
@@ -53,7 +55,7 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
await task.queue_frame(EndFrame())
await runner.run(task)

View File

@@ -1,17 +1,15 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import aiohttp
import os
import sys
import tkinter as tk
import aiohttp
from dotenv import load_dotenv
from loguru import logger
import tkinter as tk
from pipecat.frames.frames import TextFrame
from pipecat.pipeline.pipeline import Pipeline
@@ -21,6 +19,10 @@ from pipecat.services.fal import FalImageGenService
from pipecat.transports.base_transport import TransportParams
from pipecat.transports.local.tk import TkLocalTransport
from loguru import logger
from dotenv import load_dotenv
load_dotenv(override=True)
logger.remove(0)

View File

@@ -1,64 +0,0 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import os
import sys
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.frames.frames import EndFrame, TextFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.services.google import GoogleImageGenService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main():
async with aiohttp.ClientSession() as session:
(room_url, _) = await configure(session)
transport = DailyTransport(
room_url,
None,
"Show a still frame image",
DailyParams(camera_out_enabled=True, camera_out_width=1024, camera_out_height=1024),
)
imagegen = GoogleImageGenService(
api_key=os.getenv("GOOGLE_API_KEY"),
)
runner = PipelineRunner()
task = PipelineTask(
Pipeline([imagegen, transport.output()]), PipelineParams(enable_metrics=True)
)
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await task.queue_frame(TextFrame("a cat in the style of picasso"))
await task.queue_frame(TextFrame("a dog in the style of picasso"))
await task.queue_frame(TextFrame("a fish in the style of picasso"))
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
@@ -8,24 +8,27 @@
# This example broken on latest pipecat and needs updating.
#
import aiohttp
import asyncio
import os
import sys
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.frames.frames import EndPipeFrame, LLMMessagesFrame, TextFrame
from pipecat.pipeline.merge_pipeline import SequentialMergePipeline
from pipecat.pipeline.pipeline import Pipeline
from pipecat.frames.frames import EndPipeFrame, LLMMessagesFrame, TextFrame
from pipecat.pipeline.task import PipelineTask
from pipecat.services.azure import AzureLLMService, AzureTTSService
from pipecat.services.elevenlabs import ElevenLabsTTSService
from pipecat.services.transport_services import TransportServiceOutput
from pipecat.services.transports.daily_transport import DailyTransport
from runner import configure
from loguru import logger
from dotenv import load_dotenv
load_dotenv(override=True)
logger.remove(0)
@@ -51,6 +54,7 @@ async def main():
)
elevenlabs_tts = ElevenLabsTTSService(
aiohttp_session=session,
api_key=os.getenv("ELEVENLABS_API_KEY"),
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
)

View File

@@ -1,21 +1,18 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import aiohttp
import os
import sys
from dataclasses import dataclass
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.frames.frames import (
DataFrame,
AppFrame,
Frame,
LLMFullResponseStartFrame,
LLMMessagesFrame,
@@ -25,13 +22,19 @@ from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.sync_parallel_pipeline import SyncParallelPipeline
from pipecat.pipeline.task import PipelineTask
from pipecat.processors.aggregators.sentence import SentenceAggregator
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.processors.aggregators.sentence import SentenceAggregator
from pipecat.services.cartesia import CartesiaHttpTTSService
from pipecat.services.fal import FalImageGenService
from pipecat.services.openai import OpenAILLMService
from pipecat.services.fal import FalImageGenService
from pipecat.transports.services.daily import DailyParams, DailyTransport
from runner import configure
from loguru import logger
from dotenv import load_dotenv
load_dotenv(override=True)
logger.remove(0)
@@ -39,7 +42,7 @@ logger.add(sys.stderr, level="DEBUG")
@dataclass
class MonthFrame(DataFrame):
class MonthFrame(AppFrame):
month: str
def __str__(self):

View File

@@ -1,25 +1,23 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import aiohttp
import asyncio
import os
import sys
import tkinter as tk
import aiohttp
from dotenv import load_dotenv
from loguru import logger
import tkinter as tk
from pipecat.frames.frames import (
Frame,
LLMMessagesFrame,
OutputAudioRawFrame,
TextFrame,
TTSAudioRawFrame,
URLImageRawFrame,
LLMMessagesFrame,
TextFrame,
)
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
@@ -28,11 +26,15 @@ from pipecat.pipeline.task import PipelineTask
from pipecat.processors.aggregators.sentence import SentenceAggregator
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.services.cartesia import CartesiaHttpTTSService
from pipecat.services.fal import FalImageGenService
from pipecat.services.openai import OpenAILLMService
from pipecat.services.fal import FalImageGenService
from pipecat.transports.base_transport import TransportParams
from pipecat.transports.local.tk import TkLocalTransport, TkOutputTransport
from loguru import logger
from dotenv import load_dotenv
load_dotenv(override=True)
logger.remove(0)

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
@@ -14,7 +14,7 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import Frame, MetricsFrame
from pipecat.frames.frames import Frame, LLMMessagesFrame, MetricsFrame
from pipecat.metrics.metrics import (
LLMUsageMetricsData,
ProcessingMetricsData,
@@ -38,8 +38,6 @@ logger.add(sys.stderr, level="DEBUG")
class MetricsLogger(FrameProcessor):
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
if isinstance(frame, MetricsFrame):
for d in frame.data:
if isinstance(d, TTFBMetricsData):
@@ -49,7 +47,9 @@ class MetricsLogger(FrameProcessor):
elif isinstance(d, LLMUsageMetricsData):
tokens = d.value
print(
f"!!! MetricsFrame: {frame}, tokens: {tokens.prompt_tokens}, characters: {tokens.completion_tokens}"
f"!!! MetricsFrame: {frame}, tokens: {
tokens.prompt_tokens}, characters: {
tokens.completion_tokens}"
)
elif isinstance(d, TTSUsageMetricsData):
print(f"!!! MetricsFrame: {frame}, characters: {d.value}")
@@ -113,11 +113,7 @@ async def main():
await transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
await task.queue_frames([LLMMessagesFrame(messages)])
runner = PipelineRunner()

View File

@@ -1,35 +1,33 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import aiohttp
import os
import sys
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from PIL import Image
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import (
BotStartedSpeakingFrame,
BotStoppedSpeakingFrame,
Frame,
OutputImageRawFrame,
TextFrame,
)
from pipecat.frames.frames import Frame, OutputImageRawFrame, SystemFrame, TextFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.pipeline.task import PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.services.cartesia import CartesiaTTSService
from pipecat.services.cartesia import CartesiaHttpTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
from pipecat.transports.services.daily import DailyTransport
from pipecat.transports.services.daily import DailyParams
from runner import configure
from loguru import logger
from dotenv import load_dotenv
load_dotenv(override=True)
@@ -51,7 +49,7 @@ class ImageSyncAggregator(FrameProcessor):
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
if isinstance(frame, BotStartedSpeakingFrame):
if not isinstance(frame, SystemFrame) and direction == FrameDirection.DOWNSTREAM:
await self.push_frame(
OutputImageRawFrame(
image=self._speaking_image_bytes,
@@ -59,8 +57,7 @@ class ImageSyncAggregator(FrameProcessor):
format=self._speaking_image_format,
)
)
elif isinstance(frame, BotStoppedSpeakingFrame):
await self.push_frame(frame)
await self.push_frame(
OutputImageRawFrame(
image=self._waiting_image_bytes,
@@ -68,8 +65,8 @@ class ImageSyncAggregator(FrameProcessor):
format=self._waiting_image_format,
)
)
await self.push_frame(frame)
else:
await self.push_frame(frame)
async def main():
@@ -91,7 +88,7 @@ async def main():
),
)
tts = CartesiaTTSService(
tts = CartesiaHttpTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
)
@@ -116,24 +113,16 @@ async def main():
pipeline = Pipeline(
[
transport.input(),
image_sync_aggregator,
context_aggregator.user(),
llm,
tts,
image_sync_aggregator,
transport.output(),
context_aggregator.assistant(),
]
)
task = PipelineTask(
pipeline,
PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
report_only_initial_ttfb=True,
),
)
task = PipelineTask(pipeline)
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
@@ -141,10 +130,6 @@ async def main():
await transport.capture_participant_transcription(participant["id"])
await task.queue_frames([TextFrame(f"Hi there {participant_name}!")])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
runner = PipelineRunner()
await runner.run(task)

View File

@@ -1,27 +1,30 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import aiohttp
import os
import sys
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.frames.frames import LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.processors.audio.vad.silero import SileroVAD
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.cartesia import CartesiaTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
from runner import configure
from loguru import logger
from dotenv import load_dotenv
load_dotenv(override=True)
logger.remove(0)
@@ -89,11 +92,7 @@ async def main():
await transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
await task.queue_frames([LLMMessagesFrame(messages)])
runner = PipelineRunner()

View File

@@ -1,19 +1,16 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import aiohttp
import os
import sys
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -22,6 +19,12 @@ from pipecat.services.cartesia import CartesiaTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
from runner import configure
from loguru import logger
from dotenv import load_dotenv
load_dotenv(override=True)
logger.remove(0)
@@ -87,11 +90,7 @@ async def main():
await transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
await task.queue_frames([LLMMessagesFrame(messages)])
runner = PipelineRunner()

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
@@ -14,6 +14,7 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -77,25 +78,13 @@ async def main():
]
)
task = PipelineTask(
pipeline,
PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
report_only_initial_ttfb=True,
),
)
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
await task.queue_frames([LLMMessagesFrame(messages)])
runner = PipelineRunner()

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
@@ -9,14 +9,6 @@ import os
import sys
import aiohttp
from dotenv import load_dotenv
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_openai import ChatOpenAI
from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMMessagesFrame
@@ -31,6 +23,18 @@ from pipecat.processors.frameworks.langchain import LangchainProcessor
from pipecat.services.cartesia import CartesiaTTSService
from pipecat.transports.services.daily import DailyParams, DailyTransport
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_openai import ChatOpenAI
from loguru import logger
from runner import configure
from dotenv import load_dotenv
load_dotenv(override=True)
@@ -101,15 +105,7 @@ async def main():
]
)
task = PipelineTask(
pipeline,
PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
report_only_initial_ttfb=True,
),
)
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
@@ -122,10 +118,6 @@ async def main():
messages = [({"content": "Please briefly introduce yourself to the user."})]
await task.queue_frames([LLMMessagesFrame(messages)])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
runner = PipelineRunner()
await runner.run(task)

View File

@@ -1,116 +0,0 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import os
import sys
import aiohttp
from deepgram import LiveOptions
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.frames.frames import (
BotInterruptionFrame,
StopInterruptionFrame,
UserStartedSpeakingFrame,
UserStoppedSpeakingFrame,
)
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.deepgram import DeepgramSTTService, DeepgramTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main():
async with aiohttp.ClientSession() as session:
(room_url, _) = await configure(session)
transport = DailyTransport(
room_url,
None,
"Respond bot",
DailyParams(
audio_in_enabled=True,
audio_out_enabled=True,
),
)
stt = DeepgramSTTService(
api_key=os.getenv("DEEPGRAM_API_KEY"),
live_options=LiveOptions(vad_events=True, utterance_end_ms="1000"),
)
tts = DeepgramTTSService(api_key=os.getenv("DEEPGRAM_API_KEY"), voice="aura-helios-en")
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
},
]
context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)
pipeline = Pipeline(
[
transport.input(), # Transport user input
stt, # STT
context_aggregator.user(), # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
context_aggregator.assistant(), # Assistant spoken responses
]
)
task = PipelineTask(
pipeline,
PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
report_only_initial_ttfb=True,
),
)
@stt.event_handler("on_speech_started")
async def on_speech_started(stt, *args, **kwargs):
await task.queue_frames([BotInterruptionFrame(), UserStartedSpeakingFrame()])
@stt.event_handler("on_utterance_end")
async def on_utterance_end(stt, *args, **kwargs):
await task.queue_frames([StopInterruptionFrame(), UserStoppedSpeakingFrame()])
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
@@ -9,12 +9,12 @@ import os
import sys
import aiohttp
from deepgram import LiveOptions
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -45,23 +45,7 @@ async def main():
),
)
# stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
stt = DeepgramSTTService(
api_key=os.getenv("DEEPGRAM_API_KEY"),
# url=deepgram_url,
live_options=LiveOptions(
encoding="linear16",
language="en-US",
model="nova-3",
channels=1,
interim_results=True,
# smart_format=smart_format,
# endpointing=endpointing,
vad_events=True,
diarize=True,
filler_words=True,
),
)
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
tts = DeepgramTTSService(api_key=os.getenv("DEEPGRAM_API_KEY"), voice="aura-helios-en")
@@ -89,25 +73,13 @@ async def main():
]
)
task = PipelineTask(
pipeline,
PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
report_only_initial_ttfb=True,
),
)
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
await task.queue_frames([LLMMessagesFrame(messages)])
runner = PipelineRunner()

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
@@ -11,13 +11,14 @@ import sys
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.elevenlabs import ElevenLabsTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
@@ -87,11 +88,7 @@ async def main():
await transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
await task.queue_frames([LLMMessagesFrame(messages)])
runner = PipelineRunner()

View File

@@ -1,103 +0,0 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import os
import sys
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.openai import OpenAILLMService
from pipecat.services.playht import PlayHTHttpTTSService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
"Respond bot",
DailyParams(
audio_out_enabled=True,
transcription_enabled=True,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
),
)
tts = PlayHTHttpTTSService(
user_id=os.getenv("PLAYHT_USER_ID"),
api_key=os.getenv("PLAYHT_API_KEY"),
voice_url="s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json",
)
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
},
]
context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)
pipeline = Pipeline(
[
transport.input(), # Transport user input
context_aggregator.user(), # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
context_aggregator.assistant(), # Assistant spoken responses
]
)
task = PipelineTask(
pipeline,
PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
report_only_initial_ttfb=True,
),
)
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
@@ -14,6 +14,7 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -90,11 +91,7 @@ async def main():
await transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
await task.queue_frames([LLMMessagesFrame(messages)])
runner = PipelineRunner()

View File

@@ -1,19 +1,16 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import aiohttp
import asyncio
import os
import sys
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -21,6 +18,13 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.azure import AzureLLMService, AzureSTTService, AzureTTSService
from pipecat.transports.services.daily import DailyParams, DailyTransport
from runner import configure
from loguru import logger
from dotenv import load_dotenv
load_dotenv(override=True)
logger.remove(0)
@@ -81,26 +85,14 @@ async def main():
]
)
task = PipelineTask(
pipeline,
PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
report_only_initial_ttfb=True,
),
)
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
await task.queue_frames([LLMMessagesFrame(messages)])
runner = PipelineRunner()

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
@@ -11,15 +11,15 @@ import sys
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.fish import FishAudioTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.services.openai import OpenAILLMService, OpenAITTSService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
@@ -38,16 +38,14 @@ async def main():
"Respond bot",
DailyParams(
audio_out_enabled=True,
audio_out_sample_rate=24000,
transcription_enabled=True,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
),
)
tts = FishAudioTTSService(
api_key=os.getenv("FISH_API_KEY"),
model="4ce7e917cedd4bc2bb2e6ff3a46acaa1", # Barack Obama
)
tts = OpenAITTSService(api_key=os.getenv("OPENAI_API_KEY"), voice="alloy")
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
@@ -72,26 +70,14 @@ async def main():
]
)
task = PipelineTask(
pipeline,
PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
report_only_initial_ttfb=True,
),
)
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
await task.queue_frames([LLMMessagesFrame(messages)])
runner = PipelineRunner()

View File

@@ -1,109 +0,0 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import os
import sys
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.openai import OpenAILLMService, OpenAISTTService, OpenAITTSService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
"Respond bot",
DailyParams(
audio_out_enabled=True,
audio_out_sample_rate=24000,
transcription_enabled=False,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
vad_audio_passthrough=True,
),
)
# You can use the OpenAI compatible API like Groq.
# stt = OpenAISTTService(
# base_url="https://api.groq.com/openai/v1",
# api_key="gsk_***",
# model="whisper-large-v3",
# )
stt = OpenAISTTService(api_key=os.getenv("OPENAI_API_KEY"), model="whisper-1")
tts = OpenAITTSService(api_key=os.getenv("OPENAI_API_KEY"), voice="alloy")
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
},
]
context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)
pipeline = Pipeline(
[
transport.input(), # Transport user input
stt, # STT
context_aggregator.user(), # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
context_aggregator.assistant(), # Assistant spoken responses
]
)
task = PipelineTask(
pipeline,
PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
report_only_initial_ttfb=True,
),
)
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -1,20 +1,16 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import aiohttp
import os
import sys
import time
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -23,6 +19,13 @@ from pipecat.services.cartesia import CartesiaTTSService
from pipecat.services.openpipe import OpenPipeLLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
from runner import configure
from loguru import logger
import time
from dotenv import load_dotenv
load_dotenv(override=True)
logger.remove(0)
@@ -79,26 +82,14 @@ async def main():
]
)
task = PipelineTask(
pipeline,
PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
report_only_initial_ttfb=True,
),
)
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
await task.queue_frames([LLMMessagesFrame(messages)])
runner = PipelineRunner()

View File

@@ -1,19 +1,16 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import aiohttp
import os
import sys
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -22,6 +19,12 @@ from pipecat.services.openai import OpenAILLMService
from pipecat.services.xtts import XTTSService
from pipecat.transports.services.daily import DailyParams, DailyTransport
from runner import configure
from loguru import logger
from dotenv import load_dotenv
load_dotenv(override=True)
logger.remove(0)
@@ -47,6 +50,7 @@ async def main():
tts = XTTSService(
aiohttp_session=session,
voice_id="Claribel Dervla",
language="en",
base_url="http://localhost:8000",
)
@@ -73,26 +77,14 @@ async def main():
]
)
task = PipelineTask(
pipeline,
PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
report_only_initial_ttfb=True,
),
)
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
await task.queue_frames([LLMMessagesFrame(messages)])
runner = PipelineRunner()

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
@@ -14,6 +14,7 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame, LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -78,27 +79,19 @@ async def main():
]
)
task = PipelineTask(
pipeline,
PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
report_only_initial_ttfb=True,
),
)
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
await task.queue_frames([LLMMessagesFrame(messages)])
# Register an event handler to exit the application when the user leaves.
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
await task.queue_frame(EndFrame())
runner = PipelineRunner()

View File

@@ -1,19 +1,16 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import aiohttp
import asyncio
import os
import sys
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -22,6 +19,12 @@ from pipecat.services.lmnt import LmntTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
from runner import configure
from loguru import logger
from dotenv import load_dotenv
load_dotenv(override=True)
logger.remove(0)
@@ -38,6 +41,7 @@ async def main():
"Respond bot",
DailyParams(
audio_out_enabled=True,
audio_out_sample_rate=24000,
transcription_enabled=True,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
@@ -69,26 +73,14 @@ async def main():
]
)
task = PipelineTask(
pipeline,
PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
report_only_initial_ttfb=True,
),
)
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
await task.queue_frames([LLMMessagesFrame(messages)])
runner = PipelineRunner()

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
@@ -14,6 +14,7 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -89,10 +90,7 @@ async def main():
task = PipelineTask(
pipeline,
PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
report_only_initial_ttfb=True,
allow_interruptions=True, enable_metrics=True, enable_usage_metrics=True
),
)
@@ -100,11 +98,7 @@ async def main():
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
await task.queue_frames([LLMMessagesFrame(messages)])
runner = PipelineRunner()

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
@@ -14,11 +14,12 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.aws import PollyTTSService
from pipecat.services.aws import AWSTTSService
from pipecat.services.deepgram import DeepgramSTTService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
@@ -47,12 +48,12 @@ async def main():
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
tts = PollyTTSService(
tts = AWSTTSService(
api_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
region=os.getenv("AWS_REGION"),
voice_id="Amy",
params=PollyTTSService.InputParams(engine="neural", language="en-GB", rate="1.05"),
params=AWSTTSService.InputParams(engine="neural", language="en-GB", rate="1.05"),
)
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
@@ -79,26 +80,14 @@ async def main():
]
)
task = PipelineTask(
pipeline,
PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
report_only_initial_ttfb=True,
),
)
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
await task.queue_frames([LLMMessagesFrame(messages)])
runner = PipelineRunner()

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
@@ -14,12 +14,14 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.google import GoogleLLMService, GoogleSTTService, GoogleTTSService
from pipecat.transcriptions.language import Language
from pipecat.services.deepgram import DeepgramSTTService
from pipecat.services.google import GoogleTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
@@ -38,22 +40,21 @@ async def main():
"Respond bot",
DailyParams(
audio_out_enabled=True,
audio_out_sample_rate=24000,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
vad_audio_passthrough=True,
),
)
stt = GoogleSTTService(
params=GoogleSTTService.InputParams(languages=Language.EN_US),
)
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
tts = GoogleTTSService(
voice_id="en-US-Journey-F",
params=GoogleTTSService.InputParams(language=Language.EN_US),
voice_id="en-US-Neural2-J",
params=GoogleTTSService.InputParams(language="en-US", rate="1.05"),
)
llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"))
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
messages = [
{
@@ -77,26 +78,14 @@ async def main():
]
)
task = PipelineTask(
pipeline,
PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
report_only_initial_ttfb=True,
),
)
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
await task.queue_frames([LLMMessagesFrame(messages)])
runner = PipelineRunner()

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
@@ -14,6 +14,7 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -78,26 +79,14 @@ async def main():
]
)
task = PipelineTask(
pipeline,
PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
report_only_initial_ttfb=True,
),
)
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
await task.queue_frames([LLMMessagesFrame(messages)])
runner = PipelineRunner()

View File

@@ -1,40 +1,41 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import aiohttp
import asyncio
import os
import sys
from dataclasses import dataclass
import aiohttp
import google.ai.generativelanguage as glm
from dataclasses import dataclass
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.cartesia import CartesiaTTSService
from pipecat.services.google import GoogleLLMService
from pipecat.processors.frame_processor import FrameProcessor
from pipecat.transports.services.daily import DailyParams, DailyTransport
from pipecat.frames.frames import (
Frame,
InputAudioRawFrame,
LLMFullResponseEndFrame,
LLMFullResponseStartFrame,
LLMFullResponseEndFrame,
InputAudioRawFrame,
Frame,
StartInterruptionFrame,
TextFrame,
TranscriptionFrame,
UserStartedSpeakingFrame,
UserStoppedSpeakingFrame,
)
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.processors.frame_processor import FrameProcessor
from pipecat.services.cartesia import CartesiaTTSService
from pipecat.services.google import GoogleLLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
@@ -216,7 +217,7 @@ async def main():
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
)
llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"), model="gemini-2.0-flash-001")
llm = GoogleLLMService(model="gemini-1.5-flash-latest", api_key=os.getenv("GOOGLE_API_KEY"))
messages = [
{
@@ -264,10 +265,6 @@ async def main():
# Kick off the conversation.
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
runner = PipelineRunner()
await runner.run(task)

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
@@ -13,15 +13,19 @@ from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.audio.filters.krisp_filter import KrispFilter
from pipecat.frames.frames import LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.processors.aggregators.llm_response import (
LLMAssistantResponseAggregator,
LLMUserResponseAggregator,
)
from pipecat.services.deepgram import DeepgramSTTService, DeepgramTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
from pipecat.vad.silero import SileroVADAnalyzer
from pipecat.audio.filters.krisp_filter import KrispFilter
load_dotenv(override=True)
@@ -59,40 +63,28 @@ async def main():
},
]
context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)
tma_in = LLMUserResponseAggregator(messages)
tma_out = LLMAssistantResponseAggregator(messages)
pipeline = Pipeline(
[
transport.input(), # Transport user input
stt, # STT
context_aggregator.user(), # User responses
tma_in, # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
context_aggregator.assistant(), # Assistant spoken responses
tma_out, # Assistant spoken responses
]
)
task = PipelineTask(
pipeline,
PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
report_only_initial_ttfb=True,
),
)
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
await task.queue_frames([LLMMessagesFrame(messages)])
runner = PipelineRunner()

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
@@ -14,12 +14,13 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.openai import OpenAILLMService
from pipecat.services.rime import RimeTTSService
from pipecat.services.rime import RimeHttpTTSService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
@@ -44,9 +45,10 @@ async def main():
),
)
tts = RimeTTSService(
tts = RimeHttpTTSService(
api_key=os.getenv("RIME_API_KEY", ""),
voice_id="rex",
params=RimeHttpTTSService.InputParams(reduce_latency=True),
)
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
@@ -87,11 +89,7 @@ async def main():
await transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
await task.queue_frames([LLMMessagesFrame(messages)])
runner = PipelineRunner()

View File

@@ -1,95 +0,0 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import os
import sys
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.nim import NimLLMService
from pipecat.services.riva import FastPitchTTSService, ParakeetSTTService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main():
async with aiohttp.ClientSession() as session:
(room_url, _) = await configure(session)
transport = DailyTransport(
room_url,
None,
"Respond bot",
DailyParams(
audio_out_enabled=True,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
vad_audio_passthrough=True,
),
)
stt = ParakeetSTTService(api_key=os.getenv("NVIDIA_API_KEY"))
llm = NimLLMService(
api_key=os.getenv("NVIDIA_API_KEY"), model="meta/llama-3.1-405b-instruct"
)
tts = FastPitchTTSService(api_key=os.getenv("NVIDIA_API_KEY"))
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
},
]
context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)
pipeline = Pipeline(
[
transport.input(), # Transport user input
stt, # STT
context_aggregator.user(), # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
context_aggregator.assistant(), # Assistant spoken responses
]
)
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -1,19 +1,20 @@
from typing import Tuple
import aiohttp
import asyncio
import logging
import os
from typing import Tuple
import aiohttp
from dotenv import load_dotenv
from runner import configure
from pipecat.frames.frames import AudioFrame, EndFrame, ImageFrame, LLMMessagesFrame, TextFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.processors.aggregators import SentenceAggregator
from pipecat.pipeline.pipeline import Pipeline
from pipecat.transports.services.daily import DailyTransport
from pipecat.services.azure import AzureLLMService, AzureTTSService
from pipecat.services.elevenlabs import ElevenLabsTTSService
from pipecat.services.fal import FalImageGenService
from pipecat.transports.services.daily import DailyTransport
from pipecat.frames.frames import AudioFrame, EndFrame, ImageFrame, LLMMessagesFrame, TextFrame
from runner import configure
from dotenv import load_dotenv
load_dotenv(override=True)
@@ -48,6 +49,7 @@ async def main():
region=os.getenv("AZURE_SPEECH_REGION"),
)
tts2 = ElevenLabsTTSService(
aiohttp_session=session,
api_key=os.getenv("ELEVENLABS_API_KEY"),
voice_id="jBpfuIE2acCO8z3wKNLl",
)

View File

@@ -1,17 +1,13 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import aiohttp
import asyncio
import sys
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.frames.frames import (
Frame,
InputAudioRawFrame,
@@ -21,9 +17,15 @@ from pipecat.frames.frames import (
)
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.pipeline.task import PipelineTask
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.transports.services.daily import DailyParams, DailyTransport
from pipecat.transports.services.daily import DailyTransport, DailyParams
from runner import configure
from loguru import logger
from dotenv import load_dotenv
load_dotenv(override=True)
@@ -61,6 +63,7 @@ async def main():
"Test",
DailyParams(
audio_in_enabled=True,
audio_in_sample_rate=24000,
audio_out_enabled=True,
camera_out_enabled=True,
camera_out_is_live=True,
@@ -77,9 +80,7 @@ async def main():
runner = PipelineRunner()
task = PipelineTask(
pipeline, PipelineParams(audio_in_sample_rate=24000, audio_out_sample_rate=24000)
)
task = PipelineTask(pipeline)
await runner.run(task)

View File

@@ -1,17 +1,14 @@
#
# Copyright (c) 20242025, Daily
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import aiohttp
import asyncio
import sys
import tkinter as tk
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure
import tkinter as tk
from pipecat.frames.frames import (
Frame,
@@ -22,12 +19,18 @@ from pipecat.frames.frames import (
)
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.pipeline.task import PipelineTask
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.transports.base_transport import TransportParams
from pipecat.transports.local.tk import TkLocalTransport
from pipecat.transports.services.daily import DailyParams, DailyTransport
from runner import configure
from loguru import logger
from dotenv import load_dotenv
load_dotenv(override=True)
logger.remove(0)
@@ -62,7 +65,7 @@ async def main():
tk_root.title("Local Mirror")
daily_transport = DailyTransport(
room_url, token, "Test", DailyParams(audio_in_enabled=True)
room_url, token, "Test", DailyParams(audio_in_enabled=True, audio_in_sample_rate=24000)
)
tk_transport = TkLocalTransport(
@@ -82,9 +85,7 @@ async def main():
pipeline = Pipeline([daily_transport.input(), MirrorProcessor(), tk_transport.output()])
task = PipelineTask(
pipeline, PipelineParams(audio_in_sample_rate=24000, audio_out_sample_rate=24000)
)
task = PipelineTask(pipeline)
async def run_tk():
while not task.has_finished():

Some files were not shown because too many files have changed in this diff Show More