Compare commits
1 Commits
jpt/runner
...
mb/fix-ote
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c522a1ad1a |
18
.github/workflows/android.yaml
vendored
18
.github/workflows/android.yaml
vendored
@@ -6,13 +6,11 @@ on:
|
||||
- main
|
||||
paths:
|
||||
- "examples/simple-chatbot/client/android/**"
|
||||
- "examples/p2p-webrtc/video-transform/client/android/**"
|
||||
pull_request:
|
||||
branches:
|
||||
- "**"
|
||||
paths:
|
||||
- "examples/simple-chatbot/client/android/**"
|
||||
- "examples/p2p-webrtc/video-transform/client/android/**"
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
sdk_git_ref:
|
||||
@@ -25,7 +23,7 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
sdk:
|
||||
name: "Demo apps"
|
||||
name: "Simple chatbot demo"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repo
|
||||
@@ -39,22 +37,12 @@ jobs:
|
||||
distribution: 'temurin'
|
||||
java-version: '17'
|
||||
|
||||
- name: "Example app: Simple Chatbot"
|
||||
- name: Build demo app
|
||||
working-directory: examples/simple-chatbot/client/android
|
||||
run: ./gradlew :simple-chatbot-client:assembleDebug
|
||||
|
||||
- name: Upload Simple Chatbot APK
|
||||
- name: Upload demo APK
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: Simple Chatbot Android Client
|
||||
path: examples/simple-chatbot/client/android/simple-chatbot-client/build/outputs/apk/debug/simple-chatbot-client-debug.apk
|
||||
|
||||
- name: "Example app: Small WebRTC Client"
|
||||
working-directory: examples/p2p-webrtc/video-transform/client/android
|
||||
run: ./gradlew :small-webrtc-client:assembleDebug
|
||||
|
||||
- name: Upload Small WebRTC APK
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: Small WebRTC Android Client
|
||||
path: examples/p2p-webrtc/video-transform/client/android/small-webrtc-client/build/outputs/apk/debug/small-webrtc-client-debug.apk
|
||||
|
||||
6
.github/workflows/format.yaml
vendored
6
.github/workflows/format.yaml
vendored
@@ -17,7 +17,7 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
ruff-format:
|
||||
name: "Code quality checks"
|
||||
name: "Formatting checker"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repo
|
||||
@@ -39,8 +39,8 @@ jobs:
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
ruff format --diff
|
||||
- name: Ruff linter (all rules)
|
||||
- name: Ruff import linter
|
||||
id: ruff-check
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
ruff check
|
||||
ruff check --select I
|
||||
|
||||
2
.github/workflows/publish.yaml
vendored
2
.github/workflows/publish.yaml
vendored
@@ -5,7 +5,7 @@ on:
|
||||
inputs:
|
||||
gitref:
|
||||
type: string
|
||||
description: "what git tag to build (e.g. v0.0.74)"
|
||||
description: "what git ref to build"
|
||||
required: true
|
||||
|
||||
jobs:
|
||||
|
||||
@@ -4,5 +4,5 @@ repos:
|
||||
hooks:
|
||||
- id: ruff
|
||||
language_version: python3
|
||||
args: [--fix]
|
||||
args: [ --select, I, ]
|
||||
- id: ruff-format
|
||||
|
||||
579
CHANGELOG.md
579
CHANGELOG.md
@@ -9,589 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
### Added
|
||||
|
||||
- Added a new field `handle_sigterm` to `PipelineRunner`. It defaults to `False`.
|
||||
This field handles SIGTERM signals. The `handle_sigint` field still defaults
|
||||
to `True`, but now it handles only SIGINT signals.
|
||||
|
||||
- Added foundational example `14u-function-calling-ollama.py` for Ollama
|
||||
function calling.
|
||||
|
||||
- Added `LocalSmartTurnAnalyzerV2`, which supports local on-device inference
|
||||
with the new `smart-turn-v2` turn detection model.
|
||||
|
||||
- Added `set_log_level` to `DailyTransport`, allowing setting the logging level
|
||||
for Daily's internal logging system.
|
||||
|
||||
### Changed
|
||||
|
||||
- Play delayed messages from `ElevenLabsTTSService` if they still belong to the
|
||||
current context.
|
||||
|
||||
- Dependency compatibility improvements: Relaxed version constraints for core
|
||||
dependencies to support broader version ranges while maintaining stability:
|
||||
|
||||
- `aiohttp`, `Markdown`, `nltk`, `numpy`, `Pillow`, `pydantic`, `openai`,
|
||||
`numba`: Now support up to the next major version (e.g. `numpy>=1.26.4,<3`)
|
||||
- `pyht`: Relaxed to `>=0.1.6` to resolve `grpcio` conflicts with
|
||||
`nvidia-riva-client`
|
||||
- `fastapi`: Updated to support versions `>=0.115.6,<0.117.0`
|
||||
- `torch`/`torchaudio`: Changed from exact pinning (`==2.5.0`) to compatible
|
||||
range (`~=2.5.0`)
|
||||
- `aws_sdk_bedrock_runtime`: Added Python 3.12+ constraint via environment
|
||||
marker
|
||||
- `numba`: Reduced minimum version to `0.60.0` for better compatibility
|
||||
|
||||
- Changed `NeuphonicHttpTTSService` to use a POST based request instead of the
|
||||
`pyneuphonic` package. This removes a package requirement, allowing Neuphonic
|
||||
to work with more services.
|
||||
|
||||
- Updated the `deepgram` optional dependency to 4.7.0, which downgrades the
|
||||
`tasks cancelled error` to a debug log. This removes the log from appearing
|
||||
in Pipecat logs upon leaving.
|
||||
|
||||
- Upgraded the `websockets` implementation to the new asyncio implementation.
|
||||
Along with this change, we're updating support for versions >=13.1.0 and
|
||||
<15.0.0. All services have been update to use the asyncio implementation.
|
||||
|
||||
- Updated `MiniMaxHttpTTSService` with a `base_url` arg where you can specify
|
||||
the Global endpoint (default) or Mainland China.
|
||||
|
||||
- Replaced regex-based sentence detection in `match_endofsentence` with NLTK's
|
||||
punkt_tab tokenizer for more reliable sentence boundary detection.
|
||||
|
||||
- Changed the `livekit` optional dependency for `tenacity` to
|
||||
`tenacity>=8.2.3,<10.0.0` in order to support the `google-genai` package.
|
||||
|
||||
- For `LmntTTSService`, changed the default `model` to `blizzard`, LMNT's
|
||||
recommended model.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed a dependency issue for uv users where an `llvmlite` version required python 3.9.
|
||||
|
||||
- Fixed an issue in `MiniMaxHttpTTSService` where the `pitch` param was the
|
||||
incorrect type.
|
||||
|
||||
- Fixed an issue with OpenTelemetry tracing where the `enable_tracing` flag did
|
||||
not disable the internal tracing decorator functions.
|
||||
|
||||
- Fixed an issue in `OLLamaLLMService` where kwargs were not passed correctly
|
||||
to the parent class.
|
||||
|
||||
- Fixed an issue in `ElevenLabsTTSService` where the word/timestamp pairs were
|
||||
calculating word boundaries incorrectly.
|
||||
|
||||
- Fixed an issue where, in some edge cases, the `EmulateUserStartedSpeakingFrame`
|
||||
could be created even if we didn't have a transcription.
|
||||
|
||||
- Fixed an issue in `GoogleLLMContext` where it would inject the
|
||||
`system_message` as a "user" message into cases where it was not meant to;
|
||||
it was only meant to do that when there were no "regular" (non-function-call)
|
||||
messages in the context, to ensure that inference would run properly.
|
||||
|
||||
- Fixed an issue in `LiveKitTransport` where the `on_audio_track_subscribed` was never emitted.
|
||||
|
||||
## [0.0.76] - 2025-07-11
|
||||
|
||||
### Added
|
||||
|
||||
- Added `SpeechControlParamsFrame`, a new `SystemFrame` that notifies
|
||||
downstream processors of the VAD and Turn analyzer params. This frame is
|
||||
pushed by the `BaseInputTransport` at Start and any time a
|
||||
`VADParamsUpdateFrame` is received.
|
||||
|
||||
### Changed
|
||||
|
||||
- Two package dependencies have been updated:
|
||||
- `numpy` now supports 1.26.0 and newer
|
||||
- `transformers` now supports 4.48.0 and newer
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue with RTVI's handling of `append-to-context`.
|
||||
|
||||
- Fixed an issue where using audio input with a sample rate requiring resampling
|
||||
could result in empty audio being passed to STT services, causing errors.
|
||||
|
||||
- Fixed the VAD analyzer to process the full audio buffer as long as it contains
|
||||
more than the minimum required bytes per iteration, instead of only analyzing
|
||||
the first chunk.
|
||||
|
||||
- Fixed an issue in ParallelPipeline that caused errors when attempting to drain
|
||||
the queues.
|
||||
|
||||
- Fixed an issue with emulated VAD timeout inconsistency in
|
||||
`LLMUserContextAggregator`. Previously, emulated VAD scenarios (where
|
||||
transcription is received without VAD detection) used a hardcoded
|
||||
`aggregation_timeout` (default 0.5s) instead of matching the VAD's
|
||||
`stop_secs` parameter (default 0.8s). This created different user experiences
|
||||
between real VAD and emulated VAD scenarios. Now, emulated VAD timeouts
|
||||
automatically synchronize with the VAD's `stop_secs` parameter.
|
||||
|
||||
- Fix a pipeline freeze when using AWS Nova Sonic, which would occur if the
|
||||
user started early, while the bot was still working through
|
||||
`trigger_assistant_response()`.
|
||||
|
||||
## [0.0.75] - 2025-07-08
|
||||
|
||||
### Added
|
||||
|
||||
- Added an `aggregate_sentences` arg in `CartesiaTTSService`,
|
||||
`ElevenLabsTTSService`, `NeuphonicTTSService` and `RimeTTSService`, where the
|
||||
default value is True. When `aggregate_sentences` is True, the `TTSService`
|
||||
aggregates the LLM streamed tokens into sentences by default. Note: setting
|
||||
the value to False requires a custom processor before the `TTSService` to
|
||||
aggregate LLM tokens.
|
||||
|
||||
- Added `kwargs` to the `OLLamaLLMService` to allow for configuration args to
|
||||
be passed to Ollama.
|
||||
|
||||
- Added call hang-up error handling in `TwilioFrameSerializer`, which handles
|
||||
the case where the user has hung up before the `TwilioFrameSerializer` hangs
|
||||
up the call.
|
||||
|
||||
### Changed
|
||||
|
||||
- Updated `RTVIObserver` and `RTVIProcessor` to match the new RTVI 1.0.0 protocol.
|
||||
This includes:
|
||||
|
||||
- Deprecating support for all messages related to service configuaration and
|
||||
actions.
|
||||
- Adding support for obtaining and logging data about client, including its
|
||||
RTVI version and optionally included system information (OS/browser/etc.)
|
||||
- Adding support for handling the new `client-message` RTVI message through
|
||||
either a `on_client_message` event handler or listening for a new
|
||||
`RTVIClientMessageFrame`
|
||||
- Adding support for responding to a `client-message` with a `server-response`
|
||||
via either a direct call on the `RTVIProcessor` or via pushing a new
|
||||
`RTVIServerResponseFrame`
|
||||
- Adding built-in support for handling the new `append-to-context` RTVI message
|
||||
which allows a client to add to the user or assistant llm context. No extra
|
||||
code is required for supporting this behavior.
|
||||
- Updating all JavaScript and React client RTVI examples to use versions 1.0.0
|
||||
of the clients.
|
||||
|
||||
Get started migrating to RTVI protocol 1.0.0 by following the migration guide:
|
||||
https://docs.pipecat.ai/client/migration-guide
|
||||
|
||||
- Refactored `AWSBedrockLLMService` and `AWSPollyTTSService` to work
|
||||
asynchronously using `aioboto3` instead of the `boto3` library.
|
||||
|
||||
- The `UserIdleProcessor` now handles the scenario where function calls take
|
||||
longer than the idle timeout duration. This allows you to use the
|
||||
`UserIdleProcessor` in conjunction with function calls that take a while to
|
||||
return a result.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Updated the `NeuphonicTTSService` to work with the updated websocket API.
|
||||
|
||||
- Fixed an issue with `RivaSTTService` where the watchdog feature was causing
|
||||
an error on initialization.
|
||||
|
||||
### Performance
|
||||
|
||||
- Remove unncessary push task in each `FrameProcessor`.
|
||||
|
||||
## [0.0.74] - 2025-07-03
|
||||
|
||||
### Added
|
||||
|
||||
- Added a new STT service, `SpeechmaticsSTTService`. This service provides
|
||||
real-time speech-to-text transcription using the Speechmatics API. It supports
|
||||
partial and final transcriptions, multiple languages, various audio formats,
|
||||
and speaker diarization.
|
||||
|
||||
- Added `normalize` and `model_id` to `FishAudioTTSService`.
|
||||
|
||||
- Added `http_options` argument to `GoogleLLMService`.
|
||||
|
||||
- Added `run_llm` field to `LLMMessagesAppendFrame` and `LLMMessagesUpdateFrame`
|
||||
frames. If true, a context frame will be pushed triggering the LLM to respond.
|
||||
|
||||
- Added a new `SOXRStreamAudioResampler` for processing audio in chunks or
|
||||
streams. If you write your own processor and need to use an audio resampler,
|
||||
use the new `create_stream_resampler()`.
|
||||
|
||||
- Added new `DailyParams.audio_in_user_tracks` to allow receiving one track per
|
||||
user (default) or a single track from the room (all participants mixed).
|
||||
|
||||
- Added support for providing "direct" functions, which don't need an
|
||||
accompanying `FunctionSchema` or function definition dict. Instead, metadata
|
||||
(i.e. `name`, `description`, `properties`, and `required`) are automatically
|
||||
extracted from a combination of the function signature and docstring.
|
||||
|
||||
Usage:
|
||||
|
||||
```python
|
||||
# "Direct" function
|
||||
# `params` must be the first parameter
|
||||
async def do_something(params: FunctionCallParams, foo: int, bar: str = ""):
|
||||
"""
|
||||
Do something interesting.
|
||||
|
||||
Args:
|
||||
foo (int): The foo to do something interesting with.
|
||||
bar (string): The bar to do something interesting with.
|
||||
"""
|
||||
|
||||
result = await process(foo, bar)
|
||||
await params.result_callback({"result": result})
|
||||
|
||||
# ...
|
||||
|
||||
llm.register_direct_function(do_something)
|
||||
|
||||
# ...
|
||||
|
||||
tools = ToolsSchema(standard_tools=[do_something])
|
||||
```
|
||||
|
||||
- `user_id` is now populated in the `TranscriptionFrame` and
|
||||
`InterimTranscriptionFrame` when using a transport that provides a `user_id`,
|
||||
like `DailyTransport` or `LiveKitTransport`.
|
||||
|
||||
- Added `watchdog_coroutine()`. This is a watchdog helper for couroutines. So,
|
||||
if you have a coroutine that is waiting for a result and that takes a long
|
||||
time, you will need to wrap it with `watchdog_coroutine()` so the watchdog
|
||||
timers are reset regularly.
|
||||
|
||||
- Added `session_token` parameter to `AWSNovaSonicLLMService`.
|
||||
|
||||
- Added Gemini Multimodal Live File API for uploading, fetching, listing, and
|
||||
deleting files. See `26f-gemini-multimodal-live-files-api.py` for example usage.
|
||||
|
||||
### Changed
|
||||
|
||||
- Updated all the services to use the new `SOXRStreamAudioResampler`, ensuring smooth
|
||||
transitions and eliminating clicks.
|
||||
|
||||
- Upgraded `daily-python` to 0.19.4.
|
||||
|
||||
- Updated `google` optional dependency to use `google-genai` version `1.24.0`.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue where audio would get stuck in the queue when an interrupt occurs
|
||||
during Azure TTS synthesis.
|
||||
|
||||
- Fixed a race condition that occurs in Python 3.10+ where the task could miss
|
||||
the `CancelledError` and continue running indefinitely, freezing the pipeline.
|
||||
|
||||
- Fixed a `AWSNovaSonicLLMService` issue introduced in 0.0.72.
|
||||
|
||||
### Deprecated
|
||||
|
||||
- In `FishAudioTTSService`, deprecated `model` and replaced with
|
||||
`reference_id`. This change is to better align with Fish Audio's variable
|
||||
naming and to reduce confusion about what functionality the variable
|
||||
controls.
|
||||
|
||||
## [0.0.73] - 2025-06-26
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue introduced in 0.0.72 that would cause `ElevenLabsTTSService`,
|
||||
`GladiaSTTService`, `NeuphonicTTSService` and `OpenAIRealtimeBetaLLMService`
|
||||
to throw an error.
|
||||
|
||||
## [0.0.72] - 2025-06-26
|
||||
|
||||
### Added
|
||||
|
||||
- Added logging and improved error handling to help diagnose and prevent potential
|
||||
Pipeline freezes.
|
||||
|
||||
- Added `WatchdogQueue`, `WatchdogPriorityQueue`, `WatchdogEvent` and
|
||||
`WatchdogAsyncIterator`. These helper utilities reset watchdog timers
|
||||
appropriately before they expire. When watchdog timers are disabled, the
|
||||
utilities behave as standard counterparts without side effects.
|
||||
|
||||
- Introduce task watchdog timers. Watchdog timers are used to detect if a
|
||||
Pipecat task is taking longer than expected (by default 5 seconds). Watchdog
|
||||
timers are disabled by default and can be enabled globally by passing
|
||||
`enable_watchdog_timers` argument to `PipelineTask` constructor. It is
|
||||
possible to change the default watchdog timer timeout by using the
|
||||
`watchdog_timeout` argument. You can also log how long it takes to reset the
|
||||
watchdog timers which is done with the `enable_watchdog_logging`. You can
|
||||
control all these settings per each frame processor or even per task. That is,
|
||||
you can set `enable_watchdog_timers`, `enable_watchdog_logging` and
|
||||
`watchdog_timeout` when creating any frame processor through their constructor
|
||||
arguments or when you create a task with `FrameProcessor.create_task()`. Note
|
||||
that watchdog timers only work with Pipecat tasks and will not work if you use
|
||||
`asycio.create_task()` or similar.
|
||||
|
||||
- Added `lexicon_names` parameter to `AWSPollyTTSService.InputParams`.
|
||||
|
||||
- Added reconnection logic and audio buffer management to `GladiaSTTService`.
|
||||
|
||||
- The `TurnTrackingObserver` now ends a turn upon observing an `EndFrame` or
|
||||
`CancelFrame`.
|
||||
|
||||
- Added Polish support to `AWSTranscribeSTTService`.
|
||||
|
||||
- Added new frames `FrameProcessorPauseFrame` and `FrameProcessorResumeFrame`
|
||||
which allow pausing and resuming frame processing for a given frame
|
||||
processor. These are control frames, so they are ordered. Pausing frame
|
||||
processor will keep old frames in the internal queues until resume takes
|
||||
place. Frames being pushed while a frame processor is paused will be pushed to
|
||||
the queues. When frame processing is resumed all queued frames will be
|
||||
processed in order. Also added `FrameProcessorPauseUrgentFrame` and
|
||||
`FrameProcessorResumeUrgentFrame` which are system frames and therefore they
|
||||
have high priority.
|
||||
|
||||
- Added a property called `has_function_calls_in_progress` in
|
||||
`LLMAssistantContextAggregator` that exposes whether a function call is in
|
||||
progress.
|
||||
|
||||
- Added `SambaNovaLLMService` which provides llm api integration with an
|
||||
OpenAI-compatible interface.
|
||||
|
||||
- Added `SambaNovaTTSService` which provides speech-to-text functionality using
|
||||
SambaNovas's (whisper) API.
|
||||
|
||||
- Add fundational examples for function calling and transcription
|
||||
`14s-function-calling-sambanova.py`, `13g-sambanova-transcription.py`
|
||||
|
||||
### Changed
|
||||
|
||||
- `HeartbeatFrame`s are now control frames. This will make it easier to detect
|
||||
pipeline freezes. Previously, heartbeat frames were system frames which meant
|
||||
they were not get queued with other frames, making it difficult to detect
|
||||
pipeline stalls.
|
||||
|
||||
- Updated `OpenAIRealtimeBetaLLMService` to accept `language` in the
|
||||
`InputAudioTranscription` class for all models.
|
||||
|
||||
- Updated the default model for `OpenAIRealtimeBetaLLMService` to
|
||||
`gpt-4o-realtime-preview-2025-06-03`.
|
||||
|
||||
- The `PipelineParams` arg `allow_interruptions` now defaults to `True`.
|
||||
|
||||
- `TavusTransport` and `TavusVideoService` now send audio to Tavus using WebRTC
|
||||
audio tracks instead of `app-messages` over WebSocket. This should improve the
|
||||
overall audio quality.
|
||||
|
||||
- Upgraded `daily-python` to 0.19.3.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue that would cause heartbeat frames to be sent before processors
|
||||
were started.
|
||||
|
||||
- Fixed an event loop blocking issue when using `SentryMetrics`.
|
||||
|
||||
- Fixed an issue in `FastAPIWebsocketClient` to ensure proper disconnection
|
||||
when the websocket is already closed.
|
||||
|
||||
- Fixed an issue where the `UserStoppedSpeakingFrame` was not received if the
|
||||
transport was not receiving new audio frames.
|
||||
|
||||
- Fixed an edge case where if the user interrupted the bot but no new aggregation
|
||||
was received, the bot would not resume speaking.
|
||||
|
||||
- Fixed an issue with `TelnyxFrameSerializer` where it would throw an exception
|
||||
when the user hung up the call.
|
||||
|
||||
- Fixed an issue with `ElevenLabsTTSService` where the context was not being
|
||||
closed.
|
||||
|
||||
- Fixed function calling in `AWSNovaSonicLLMService`.
|
||||
|
||||
- Fixed an issue that would cause multiple `PipelineTask.on_idle_timeout`
|
||||
events to be triggered repeatedly.
|
||||
|
||||
- Fixed an issue that was causing user and bot speech to not be synchronized
|
||||
during recordings.
|
||||
|
||||
- Fixed an issue where voice settings weren't applied to ElevenLabsTTSService.
|
||||
|
||||
- Fixed an issue with `GroqTTSService` where it was not properly parsing the
|
||||
WAV file header.
|
||||
|
||||
- Fixed an issue with `GoogleSTTService` where it was constantly reconnecting
|
||||
before starting to receive audio from the user.
|
||||
|
||||
- Fixed an issue where `GoogleLLMService`'s TTFB value was incorrect.
|
||||
|
||||
### Deprecated
|
||||
|
||||
- `AudioBufferProcessor` parameter `user_continuos_stream` is deprecated.
|
||||
|
||||
### Other
|
||||
|
||||
- Rename `14e-function-calling-gemini.py` to `14e-function-calling-google.py`.
|
||||
|
||||
## [0.0.71] - 2025-06-10
|
||||
|
||||
### Added
|
||||
|
||||
- Adds a parameter called `additional_span_attributes` to PipelineTask that
|
||||
lets you add any additional attributes you'd like to the conversation span.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue with `CartesiaSTTService` initialization.
|
||||
|
||||
## [0.0.70] - 2025-06-10
|
||||
|
||||
### Added
|
||||
|
||||
- Added `ExotelFrameSerializer` to handle telephony calls via Exotel.
|
||||
|
||||
- Added the option `informal` to `TranslationConfig` on Gladia config.
|
||||
Allowing to force informal language forms when available.
|
||||
|
||||
- Added `CartesiaSTTService` which is a websocket based implementation to
|
||||
transcribe audio. Added a foundational example in
|
||||
`13f-cartesia-transcription.py`
|
||||
|
||||
- Added an `websocket` example, showing how to use the new Pipecat client
|
||||
`WebsocketTransport` to connect with Pipecat `FastAPIWebsocketTransport` or
|
||||
`WebsocketServerTransport`.
|
||||
|
||||
- Added language support to `RimeHttpTTSService`. Extended languages to include
|
||||
German and French for both `RimeTTSService` and `RimeHttpTTSService`.
|
||||
|
||||
### Changed
|
||||
|
||||
- Upgraded `daily-python` to 0.19.2.
|
||||
|
||||
- Make `PipelineTask.add_observer()` synchronous. This allows callers to call it
|
||||
before doing the work of running the `PipelineTask` (i.e. without invoking
|
||||
`PipelineTask.set_event_loop()` first).
|
||||
|
||||
- Pipecat 0.0.69 forced `uvloop` event loop on Linux on macOS. Unfortunately,
|
||||
this is causing issue in some systems. So, `uvloop` is not enabled by default
|
||||
anymore. If you want to use `uvloop` you can just set the `asyncio` event
|
||||
policy before starting your agent with:
|
||||
|
||||
```python
|
||||
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
|
||||
```
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue with various TTS services that would cause audio glitches at
|
||||
the start of every bot turn.
|
||||
|
||||
- Fixed an `ElevenLabsTTSService` issue where a context warning was printed
|
||||
when pushing a `TTSSpeakFrame`.
|
||||
|
||||
- Fixed an `AssemblyAISTTService` issue that could cause unexpected behavior
|
||||
when yielding empty `Frame()`s.
|
||||
|
||||
- Fixed an issue where `OutputAudioRawFrame.transport_destination` was being
|
||||
reset to `None` instead of retaining its intended value before sending the
|
||||
audio frame to `write_audio_frame`.
|
||||
|
||||
- Fixed a typo in Livekit transport that prevented initialization.
|
||||
|
||||
## [0.0.69] - 2025-06-02 "AI Engineer World's Fair release" ✨
|
||||
|
||||
### Added
|
||||
|
||||
- Added a new frame `FunctionCallsStartedFrame`. This frame is pushed both
|
||||
upstream and downstream from the LLM service to indicate that one or more
|
||||
function calls are going to be executed.
|
||||
|
||||
- Added LLM services `on_function_calls_started` event. This event will be
|
||||
triggered when the LLM service receives function calls from the model and is
|
||||
going to start executing them.
|
||||
|
||||
- Function calls can now be executed sequentially (in the order received in the
|
||||
completion) by passing `run_in_parallel=False` when creating your LLM
|
||||
service. By default, if the LLM completion returns 2 or more function calls
|
||||
they run concurrently. In both cases, concurrently and sequentially, a new LLM
|
||||
completion will run when the last function call finishes.
|
||||
|
||||
- Added OpenTelemetry tracing for `GeminiMultimodalLiveLLMService` and
|
||||
`OpenAIRealtimeBetaLLMService`.
|
||||
|
||||
- Added initial support for interruption strategies, which determine if the user
|
||||
should interrupt the bot while the bot is speaking. Interruption strategies
|
||||
can be based on factors such as audio volume or the number of words spoken by
|
||||
the user. These can be specified via the new `interruption_strategies` field
|
||||
in `PipelineParams`. A new `MinWordsInterruptionStrategy` strategy has been
|
||||
introduced which triggers an interruption if the user has spoken a minimum
|
||||
number of words. If no interruption strategies are specified, the normal
|
||||
interruption behavior applies. If multiple strategies are provided, the first
|
||||
one that evaluates to true will trigger the interruption.
|
||||
|
||||
- `BaseInputTransport` now handles `StopFrame`. When a `StopFrame` is received
|
||||
the transport will pause sending frames downstream until a new `StartFrame` is
|
||||
received. This allows the transport to be reused (keeping the same connection)
|
||||
in a different pipeline.
|
||||
|
||||
- Updated AssemblyAI STT service to support their latest streaming
|
||||
speech-to-text model with improved transcription latency and endpointing.
|
||||
|
||||
- You can now access STT service results through the new
|
||||
`TranscriptionFrame.result` and `InterimTranscriptionFrame.result` field. This
|
||||
is useful in case you use some specific settings for the STT and you want to
|
||||
access the STT results.
|
||||
|
||||
- The examples runner is now public from the `pipecat.examples` package. This
|
||||
allows everyone to build their own examples and run them easily.
|
||||
|
||||
- It is now possible to push `OutputDTMFFrame` or `OutputDTMFUrgentFrame` with
|
||||
`DailyTransport`. This will be sent properly if a Daily dial-out connection
|
||||
has been established.
|
||||
|
||||
- Added `OutputDTMFUrgentFrame` to send a DTMF keypress quickly. The previous
|
||||
`OutputDTMFFrame` queues the keypress with the rest of data frames.
|
||||
|
||||
- Added `DTMFAggregator`, which aggregates keypad presses into
|
||||
`TranscriptionFrame`s. Aggregation occurs after a timeout, termination key
|
||||
press, or user interruption. You can specify the prefix of the
|
||||
`TranscriptionFrame`.
|
||||
|
||||
- Added new functions `DailyTransport.start_transcription()` and
|
||||
`DailyTransport.stop_transcription()` to be able to start and stop Daily
|
||||
transcription dynamically (maybe with different settings).
|
||||
|
||||
### Changed
|
||||
|
||||
- Reverted the default model for `GeminiMultimodalLiveLLMService` back to
|
||||
`models/gemini-2.0-flash-live-001`.
|
||||
`gemini-2.5-flash-preview-native-audio-dialog` has inconsistent performance.
|
||||
You can opt in to using this model by setting the `model` arg.
|
||||
|
||||
- Function calls are now cancelled by default if there's an interruption. To
|
||||
disable this behavior you can set `cancel_on_interruption=False` when
|
||||
registering the function call. Since function calls are executed as tasks you
|
||||
can tell if a function call has been cancelled by catching the
|
||||
`asyncio.CancelledError` exception (and don't forget to raise it again!).
|
||||
|
||||
- Updated OpenTelemetry tracing attribute `metrics.ttfb_ms` to `metrics.ttfb`.
|
||||
The attribute reports TTFB in seconds.
|
||||
|
||||
### Deprecated
|
||||
|
||||
- `DailyTransport.send_dtmf()` is deprecated, push an `OutputDTMFFrame` or an
|
||||
`OutputDTMFUrgentFrame` instead.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue with `ElevenLabsTTSService` where long responses would
|
||||
continue generating output even after an interruption.
|
||||
|
||||
- Fixed an issue with the `OpenAILLMContext` where non-Roman characters were
|
||||
being incorrectly encoded as Unicode escape sequences. This was a logging
|
||||
issue and did not impact the actual conversation.
|
||||
|
||||
- In `AWSBedrockLLMService`, worked around a possible bug in AWS Bedrock where
|
||||
a `toolConfig` is required if there has been previous tool use in the
|
||||
messages array. This workaround includes a no_op factory function call is
|
||||
used to satisfy the requirement.
|
||||
|
||||
- Fixed `WebsocketClientTransport` to use `FrameProcessorSetup.task_manager`
|
||||
instead of `StartFrame.task_manager`.
|
||||
|
||||
### Performance
|
||||
|
||||
- Use `uvloop` as the new event loop on Linux and macOS systems.
|
||||
|
||||
## [0.0.68] - 2025-05-28
|
||||
|
||||
### Added
|
||||
|
||||
150
CONTRIBUTING.md
150
CONTRIBUTING.md
@@ -41,150 +41,36 @@ We use Ruff for code linting and formatting. Please ensure your code passes all
|
||||
|
||||
We follow Google-style docstrings with these specific conventions:
|
||||
|
||||
**Regular Classes:**
|
||||
- Class docstrings should fully document all parameters used in `__init__`
|
||||
- We don't require separate docstrings for `__init__` methods when parameters are documented in the class docstring
|
||||
- Property methods should have docstrings explaining their purpose and return value
|
||||
|
||||
- Class docstring describes the class purpose and key functionality
|
||||
- `__init__` method has its own docstring with complete `Args:` section documenting all parameters
|
||||
- All public methods must have docstrings with `Args:` and `Returns:` sections as appropriate
|
||||
|
||||
**Dataclasses:**
|
||||
|
||||
- Class docstring describes the purpose and documents all fields in a `Parameters:` section
|
||||
- No `__init__` docstring (auto-generated)
|
||||
|
||||
**Properties:**
|
||||
|
||||
- Must have docstrings with `Returns:` section
|
||||
|
||||
**Abstract Methods:**
|
||||
|
||||
- Must have docstrings explaining what subclasses should implement
|
||||
|
||||
**`__init__.py` Files:**
|
||||
|
||||
- **Skip docstrings** for pure import/re-export modules
|
||||
- **Add brief docstrings** for top-level packages or those with initialization logic
|
||||
|
||||
**Enums:**
|
||||
|
||||
- Class docstring describes the enumeration purpose
|
||||
- Use `Parameters:` section to document each enum value and its meaning
|
||||
- No `__init__` docstring (Enums don't have custom constructors)
|
||||
|
||||
**Code Examples in Docstrings:**
|
||||
|
||||
- Use `Examples:` as a section header for multiple examples
|
||||
- Use descriptive text followed by double colons (`::`) for each example
|
||||
- **Always include a blank line after the `::"`**
|
||||
- Indent all code consistently within each block
|
||||
- Separate multiple examples with blank lines for readability
|
||||
|
||||
**Lists and Bullets in Docstrings:**
|
||||
|
||||
- Use dashes (`-`) for bullet points, not asterisks (`*`)
|
||||
- **Add a blank line before bullet lists** when they follow a colon
|
||||
- Use section headers like "Supported features:" or "Behavior:" before lists
|
||||
- For complex nested information, consider using paragraph format instead
|
||||
|
||||
**Deprecations:**
|
||||
|
||||
- Use `warnings.warn()` in code for runtime deprecation warnings
|
||||
- Add `.. deprecated::` directive in docstrings for documentation visibility
|
||||
- Include version information and describe current status
|
||||
- Describe parameters in present tense, use directive to indicate deprecation status
|
||||
|
||||
#### Examples:
|
||||
Example of correctly documented class:
|
||||
|
||||
```python
|
||||
# Regular class
|
||||
class MyService(BaseService):
|
||||
"""Description of what the service does.
|
||||
class MyClass:
|
||||
"""Class description.
|
||||
|
||||
Provides detailed explanation of the service's functionality,
|
||||
key features, and usage patterns.
|
||||
Additional details about the class.
|
||||
|
||||
Supported features:
|
||||
|
||||
- Feature one with detailed explanation
|
||||
- Feature two with additional context
|
||||
- Feature three for advanced use cases
|
||||
Args:
|
||||
param1: Description of first parameter.
|
||||
param2: Description of second parameter.
|
||||
"""
|
||||
|
||||
def __init__(self, param1: str, old_param: str = None, **kwargs):
|
||||
"""Initialize the service.
|
||||
|
||||
Args:
|
||||
param1: Description of param1.
|
||||
old_param: Controls legacy behavior.
|
||||
|
||||
.. deprecated:: 1.2.0
|
||||
This parameter no longer has any effect and will be removed in version 2.0.
|
||||
|
||||
**kwargs: Additional arguments passed to parent.
|
||||
"""
|
||||
if old_param is not None:
|
||||
import warnings
|
||||
warnings.warn(
|
||||
"Parameter 'old_param' is deprecated and will be removed in version 2.0.",
|
||||
DeprecationWarning,
|
||||
)
|
||||
super().__init__(**kwargs)
|
||||
def __init__(self, param1, param2):
|
||||
# No docstring required here as parameters are documented above
|
||||
self.param1 = param1
|
||||
self.param2 = param2
|
||||
|
||||
@property
|
||||
def sample_rate(self) -> int:
|
||||
"""Get the current sample rate.
|
||||
def some_property(self) -> str:
|
||||
"""Get the formatted property value.
|
||||
|
||||
Returns:
|
||||
The sample rate in Hz.
|
||||
A string representation of the property.
|
||||
"""
|
||||
return self._sample_rate
|
||||
|
||||
async def process_data(self, data: str) -> bool:
|
||||
"""Process the provided data.
|
||||
|
||||
Args:
|
||||
data: The data to process.
|
||||
|
||||
Returns:
|
||||
True if processing succeeded.
|
||||
"""
|
||||
pass
|
||||
|
||||
# Dataclass with code examples
|
||||
@dataclass
|
||||
class MessageFrame:
|
||||
"""Frame containing messages in OpenAI format.
|
||||
|
||||
Supports both simple and content list message formats.
|
||||
|
||||
Example::
|
||||
|
||||
[
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "assistant", "content": "Hi there!"}
|
||||
]
|
||||
|
||||
Parameters:
|
||||
messages: List of messages in OpenAI format.
|
||||
"""
|
||||
|
||||
messages: List[dict]
|
||||
|
||||
# Enum class
|
||||
class Status(Enum):
|
||||
"""Status codes for processing operations.
|
||||
|
||||
Parameters:
|
||||
PENDING: Operation is queued but not started.
|
||||
RUNNING: Operation is currently in progress.
|
||||
COMPLETED: Operation finished successfully.
|
||||
FAILED: Operation encountered an error.
|
||||
"""
|
||||
|
||||
PENDING = "pending"
|
||||
RUNNING = "running"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
return f"Property: {self.param1}"
|
||||
```
|
||||
|
||||
# Contributor Covenant Code of Conduct
|
||||
|
||||
26
README.md
26
README.md
@@ -51,19 +51,19 @@ You can connect to Pipecat from any platform using our official SDKs:
|
||||
|
||||
## 🧩 Available services
|
||||
|
||||
| Category | Services |
|
||||
| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Parakeet (NVIDIA)](https://docs.pipecat.ai/server/services/stt/parakeet), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Ultravox](https://docs.pipecat.ai/server/services/stt/ultravox), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova) [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
|
||||
| Text-to-Speech | [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [FastPitch (NVIDIA)](https://docs.pipecat.ai/server/services/tts/fastpitch), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
|
||||
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local |
|
||||
| Serializers | [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx) |
|
||||
| Video | [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
|
||||
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
|
||||
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/fal), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
|
||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) |
|
||||
| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
|
||||
| Category | Services |
|
||||
| ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Parakeet (NVIDIA)](https://docs.pipecat.ai/server/services/stt/parakeet), [Ultravox](https://docs.pipecat.ai/server/services/stt/ultravox), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
|
||||
| Text-to-Speech | [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [FastPitch (NVIDIA)](https://docs.pipecat.ai/server/services/tts/fastpitch), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
|
||||
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local |
|
||||
| Serializers | [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx) |
|
||||
| Video | [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
|
||||
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
|
||||
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/fal), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
|
||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) |
|
||||
| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
|
||||
|
||||
📚 [View full services documentation →](https://docs.pipecat.ai/server/services/supported-services)
|
||||
|
||||
|
||||
@@ -1,20 +1,13 @@
|
||||
build~=1.2.2
|
||||
coverage~=7.9.1
|
||||
coverage~=7.6.12
|
||||
grpcio-tools~=1.67.1
|
||||
pip-tools~=7.4.1
|
||||
pre-commit~=4.2.0
|
||||
pyright~=1.1.402
|
||||
pytest~=8.4.1
|
||||
pytest-asyncio~=1.0.0
|
||||
pre-commit~=4.0.1
|
||||
pyright~=1.1.397
|
||||
pytest~=8.3.4
|
||||
pytest-asyncio~=0.25.3
|
||||
pytest-aiohttp==1.1.0
|
||||
ruff~=0.12.1
|
||||
setuptools~=78.1.1
|
||||
setuptools_scm~=8.3.1
|
||||
python-dotenv~=1.1.1
|
||||
|
||||
# For running examples
|
||||
uvicorn
|
||||
python-dotenv
|
||||
fastapi
|
||||
aiohttp
|
||||
aiortc
|
||||
ruff~=0.11.1
|
||||
setuptools~=70.0.0
|
||||
setuptools_scm~=8.1.0
|
||||
python-dotenv~=1.0.1
|
||||
|
||||
191
docs/api/conf.py
191
docs/api/conf.py
@@ -1,6 +1,5 @@
|
||||
import logging
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
# Configure logging
|
||||
@@ -14,8 +13,7 @@ sys.path.insert(0, str(project_root / "src"))
|
||||
|
||||
# Project information
|
||||
project = "pipecat-ai"
|
||||
current_year = datetime.now().year
|
||||
copyright = f"2024-{current_year}, Daily" if current_year > 2024 else "2024, Daily"
|
||||
copyright = "2024, Daily"
|
||||
author = "Daily"
|
||||
|
||||
# General configuration
|
||||
@@ -26,20 +24,19 @@ extensions = [
|
||||
"sphinx.ext.intersphinx",
|
||||
]
|
||||
|
||||
suppress_warnings = [
|
||||
"autodoc.mocked_object",
|
||||
]
|
||||
|
||||
# Napoleon settings
|
||||
napoleon_google_docstring = True
|
||||
napoleon_numpy_docstring = False
|
||||
napoleon_include_init_with_doc = True
|
||||
|
||||
# AutoDoc settings
|
||||
autodoc_default_options = {
|
||||
"members": True,
|
||||
"member-order": "bysource",
|
||||
"undoc-members": False,
|
||||
"exclude-members": "__weakref__,model_config",
|
||||
"special-members": "__init__",
|
||||
"undoc-members": True,
|
||||
"exclude-members": "__weakref__",
|
||||
"no-index": True,
|
||||
"show-inheritance": True,
|
||||
}
|
||||
|
||||
@@ -74,16 +71,20 @@ autodoc_mock_imports = [
|
||||
"langchain",
|
||||
"lmnt",
|
||||
"noisereduce",
|
||||
"openai",
|
||||
"openpipe",
|
||||
"simli",
|
||||
"soundfile",
|
||||
"soniox",
|
||||
"pipecat_ai_krisp",
|
||||
"pyaudio",
|
||||
"_tkinter",
|
||||
"tkinter",
|
||||
"daily",
|
||||
"daily_python",
|
||||
"pydantic.BaseModel",
|
||||
"pydantic.Field",
|
||||
"pydantic._internal._model_construction",
|
||||
"pydantic._internal._fields",
|
||||
# Moondream dependencies
|
||||
"torch",
|
||||
"transformers",
|
||||
@@ -144,76 +145,85 @@ autodoc_mock_imports = [
|
||||
"transformers.AutoFeatureExtractor",
|
||||
# Also add specific classes that are imported
|
||||
"AutoFeatureExtractor",
|
||||
# Sentry dependencies
|
||||
"sentry_sdk",
|
||||
# AWS Nova Sonic dependencies
|
||||
"aws_sdk_bedrock_runtime",
|
||||
"aws_sdk_bedrock_runtime.client",
|
||||
"aws_sdk_bedrock_runtime.config",
|
||||
"aws_sdk_bedrock_runtime.models",
|
||||
"smithy_aws_core",
|
||||
"smithy_aws_core.credentials_resolvers",
|
||||
"smithy_aws_core.credentials_resolvers.static",
|
||||
"smithy_aws_core.identity",
|
||||
"smithy_core",
|
||||
"smithy_core.aio",
|
||||
"smithy_core.aio.eventstream",
|
||||
# MCP dependencies (you may already have these)
|
||||
"mcp",
|
||||
"mcp.client",
|
||||
"mcp.client.session_group",
|
||||
"mcp.client.sse",
|
||||
"mcp.client.stdio",
|
||||
"mcp.ClientSession",
|
||||
"mcp.StdioServerParameters",
|
||||
# gstreamer
|
||||
"gi",
|
||||
"gi.require_version",
|
||||
"gi.repository",
|
||||
# Protobuf mocks
|
||||
"pipecat.frames.protobufs.frames_pb2",
|
||||
"pipecat.serializers.protobuf",
|
||||
"google.protobuf",
|
||||
"google.protobuf.descriptor",
|
||||
"google.protobuf.descriptor_pool",
|
||||
"google.protobuf.runtime_version",
|
||||
"google.protobuf.symbol_database",
|
||||
"google.protobuf.internal.builder",
|
||||
]
|
||||
|
||||
# HTML output settings
|
||||
html_theme = "sphinx_rtd_theme"
|
||||
html_static_path = ["_static"]
|
||||
autodoc_typehints = "signature" # Show type hints in the signature only, not in the docstring
|
||||
autodoc_typehints = "description"
|
||||
html_show_sphinx = False
|
||||
|
||||
|
||||
def import_core_modules():
|
||||
"""Import core pipecat modules for autodoc to discover."""
|
||||
core_modules = [
|
||||
"pipecat",
|
||||
"pipecat.frames",
|
||||
"pipecat.pipeline",
|
||||
"pipecat.processors",
|
||||
"pipecat.services",
|
||||
"pipecat.transports",
|
||||
"pipecat.audio",
|
||||
"pipecat.adapters",
|
||||
"pipecat.clocks",
|
||||
"pipecat.metrics",
|
||||
"pipecat.observers",
|
||||
"pipecat.serializers",
|
||||
"pipecat.sync",
|
||||
"pipecat.transcriptions",
|
||||
"pipecat.utils",
|
||||
]
|
||||
def verify_modules():
|
||||
"""Verify that required modules are available."""
|
||||
required_modules = {
|
||||
"services": [
|
||||
"assemblyai",
|
||||
"aws",
|
||||
"cartesia",
|
||||
"deepgram",
|
||||
"google",
|
||||
"lmnt",
|
||||
"riva",
|
||||
"simli",
|
||||
],
|
||||
"serializers": ["livekit"],
|
||||
"vad": ["silero", "vad_analyzer"],
|
||||
"transports": {
|
||||
"services": ["daily", "livekit"],
|
||||
"local": ["audio", "tk"],
|
||||
"network": ["fastapi_websocket", "websocket_server"],
|
||||
},
|
||||
}
|
||||
|
||||
for module_name in core_modules:
|
||||
try:
|
||||
__import__(module_name)
|
||||
logger.info(f"Successfully imported {module_name}")
|
||||
except ImportError as e:
|
||||
logger.warning(f"Failed to import {module_name}: {e}")
|
||||
# Skip importing modules that are in autodoc_mock_imports
|
||||
skipped_modules = set(autodoc_mock_imports)
|
||||
|
||||
missing = []
|
||||
for category, modules in required_modules.items():
|
||||
if isinstance(modules, dict):
|
||||
# Handle nested structure
|
||||
for subcategory, submodules in modules.items():
|
||||
for module in submodules:
|
||||
# Check if module is in autodoc_mock_imports
|
||||
if (
|
||||
f"pipecat.{category}.{subcategory}.{module}" in skipped_modules
|
||||
or module in skipped_modules
|
||||
):
|
||||
logger.info(
|
||||
f"Skipping import of mocked module: pipecat.{category}.{subcategory}.{module}"
|
||||
)
|
||||
continue
|
||||
|
||||
try:
|
||||
__import__(f"pipecat.{category}.{subcategory}.{module}")
|
||||
logger.info(
|
||||
f"Successfully imported pipecat.{category}.{subcategory}.{module}"
|
||||
)
|
||||
except (ImportError, TypeError, NameError) as e:
|
||||
missing.append(f"pipecat.{category}.{subcategory}.{module}")
|
||||
logger.warning(
|
||||
f"Optional module not available: pipecat.{category}.{subcategory}.{module} - {str(e)}"
|
||||
)
|
||||
else:
|
||||
# Handle flat structure
|
||||
for module in modules:
|
||||
# Check if module is in autodoc_mock_imports
|
||||
if f"pipecat.{category}.{module}" in skipped_modules or module in skipped_modules:
|
||||
logger.info(f"Skipping import of mocked module: pipecat.{category}.{module}")
|
||||
continue
|
||||
|
||||
try:
|
||||
__import__(f"pipecat.{category}.{module}")
|
||||
logger.info(f"Successfully imported pipecat.{category}.{module}")
|
||||
except (ImportError, TypeError, NameError) as e:
|
||||
missing.append(f"pipecat.{category}.{module}")
|
||||
logger.warning(
|
||||
f"Optional module not available: pipecat.{category}.{module} - {str(e)}"
|
||||
)
|
||||
|
||||
if missing:
|
||||
logger.warning(f"Some optional modules are not available: {missing}")
|
||||
|
||||
|
||||
def clean_title(title: str) -> str:
|
||||
@@ -225,7 +235,36 @@ def clean_title(title: str) -> str:
|
||||
parts = title.split(".")
|
||||
title = parts[-1]
|
||||
|
||||
return title
|
||||
# Special cases for service names and common acronyms
|
||||
special_cases = {
|
||||
"ai": "AI",
|
||||
"aws": "AWS",
|
||||
"api": "API",
|
||||
"vad": "VAD",
|
||||
"assemblyai": "AssemblyAI",
|
||||
"deepgram": "Deepgram",
|
||||
"elevenlabs": "ElevenLabs",
|
||||
"openai": "OpenAI",
|
||||
"openpipe": "OpenPipe",
|
||||
"playht": "PlayHT",
|
||||
"xtts": "XTTS",
|
||||
"lmnt": "LMNT",
|
||||
}
|
||||
|
||||
# Check if the entire title is a special case
|
||||
if title.lower() in special_cases:
|
||||
return special_cases[title.lower()]
|
||||
|
||||
# Otherwise, capitalize each word
|
||||
words = title.split("_")
|
||||
cleaned_words = []
|
||||
for word in words:
|
||||
if word.lower() in special_cases:
|
||||
cleaned_words.append(special_cases[word.lower()])
|
||||
else:
|
||||
cleaned_words.append(word.capitalize())
|
||||
|
||||
return " ".join(cleaned_words)
|
||||
|
||||
|
||||
def setup(app):
|
||||
@@ -250,8 +289,9 @@ def setup(app):
|
||||
|
||||
excludes = [
|
||||
str(project_root / "src/pipecat/pipeline/to_be_updated"),
|
||||
str(project_root / "src/pipecat/examples"),
|
||||
str(project_root / "src/pipecat/tests"),
|
||||
str(project_root / "src/pipecat/processors/gstreamer"),
|
||||
str(project_root / "src/pipecat/services/to_be_updated"),
|
||||
str(project_root / "src/pipecat/vad"), # deprecated
|
||||
"**/test_*.py",
|
||||
"**/tests/*.py",
|
||||
]
|
||||
@@ -292,4 +332,5 @@ def setup(app):
|
||||
logger.error(f"Error generating API documentation: {e}", exc_info=True)
|
||||
|
||||
|
||||
import_core_modules()
|
||||
# Run module verification
|
||||
verify_modules()
|
||||
|
||||
@@ -1,17 +1,57 @@
|
||||
Pipecat API Reference
|
||||
=====================
|
||||
Pipecat API Reference Docs
|
||||
==========================
|
||||
|
||||
Welcome to the Pipecat API reference.
|
||||
Welcome to Pipecat's API reference documentation!
|
||||
|
||||
Use the navigation on the left to browse modules, or search using the search box.
|
||||
|
||||
**New to Pipecat?** Check out the `main documentation <https://docs.pipecat.ai>`_ for tutorials, guides, and client SDK information.
|
||||
Pipecat is an open source framework for building voice and multimodal assistants.
|
||||
It provides a flexible pipeline architecture for connecting various AI services,
|
||||
audio processing, and transport layers.
|
||||
|
||||
Quick Links
|
||||
-----------
|
||||
|
||||
* `GitHub Repository <https://github.com/pipecat-ai/pipecat>`_
|
||||
* `Join our Community <https://discord.gg/pipecat>`_
|
||||
* `Website <https://pipecat.ai>`_
|
||||
|
||||
API Reference
|
||||
-------------
|
||||
|
||||
Core Components
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
* :mod:`Frames <pipecat.frames>`
|
||||
* :mod:`Processors <pipecat.processors>`
|
||||
* :mod:`Pipeline <pipecat.pipeline>`
|
||||
|
||||
Audio Processing
|
||||
~~~~~~~~~~~~~~~~
|
||||
|
||||
* :mod:`Audio <pipecat.audio>`
|
||||
|
||||
Services
|
||||
~~~~~~~~
|
||||
|
||||
* :mod:`Services <pipecat.services>`
|
||||
|
||||
Transport & Serialization
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* :mod:`Transports <pipecat.transports>`
|
||||
* :mod:`Local <pipecat.transports.local>`
|
||||
* :mod:`Network <pipecat.transports.network>`
|
||||
* :mod:`Services <pipecat.transports.services>`
|
||||
* :mod:`Serializers <pipecat.serializers>`
|
||||
|
||||
Utilities
|
||||
~~~~~~~~~
|
||||
|
||||
* :mod:`Adapters <pipecat.adapters>`
|
||||
* :mod:`Clocks <pipecat.clocks>`
|
||||
* :mod:`Metrics <pipecat.metrics>`
|
||||
* :mod:`Observers <pipecat.observers>`
|
||||
* :mod:`Sync <pipecat.sync>`
|
||||
* :mod:`Transcriptions <pipecat.transcriptions>`
|
||||
* :mod:`Utils <pipecat.utils>`
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 3
|
||||
@@ -31,4 +71,11 @@ Quick Links
|
||||
Sync <api/pipecat.sync>
|
||||
Transcriptions <api/pipecat.transcriptions>
|
||||
Transports <api/pipecat.transports>
|
||||
Utils <api/pipecat.utils>
|
||||
Utils <api/pipecat.utils>
|
||||
|
||||
Indices and tables
|
||||
==================
|
||||
|
||||
* :ref:`genindex`
|
||||
* :ref:`modindex`
|
||||
* :ref:`search`
|
||||
@@ -42,12 +42,9 @@ pipecat-ai[openai]
|
||||
pipecat-ai[qwen]
|
||||
pipecat-ai[remote-smart-turn]
|
||||
# pipecat-ai[riva] # Mocked
|
||||
pipecat-ai[sambanova]
|
||||
pipecat-ai[silero]
|
||||
pipecat-ai[simli]
|
||||
pipecat-ai[soundfile]
|
||||
pipecat-ai[soniox]
|
||||
pipecat-ai[speechmatics]
|
||||
pipecat-ai[tavus]
|
||||
pipecat-ai[together]
|
||||
# pipecat-ai[ultravox] # Mocked
|
||||
|
||||
@@ -107,17 +107,4 @@ MINIMAX_API_KEY=...
|
||||
MINIMAX_GROUP_ID=...
|
||||
|
||||
# Sarvam AI
|
||||
SARVAM_API_KEY=...
|
||||
|
||||
# Soniox
|
||||
SONIOX_API_KEY=
|
||||
|
||||
# Speechmatics
|
||||
SPEECHMATICS_API_KEY=...
|
||||
|
||||
|
||||
# SambaNova
|
||||
SAMBANOVA_API_KEY=...
|
||||
|
||||
# Sentry
|
||||
SENTRY_DSN=...
|
||||
SARVAM_API_KEY=...
|
||||
@@ -1,60 +0,0 @@
|
||||
# AWS Strands Examples
|
||||
|
||||
This folder contains two Python examples demonstrating how to use Pipecat with the AWS Strands agent.
|
||||
|
||||
## Overview
|
||||
|
||||
These examples show how to delegate complex, multi-step tasks to a Strands agent, which can reason step-by-step and call tools to accomplish user requests.
|
||||
|
||||
These examples are intentionally simplified for demonstration, using mock API calls. They work best if you ask it:
|
||||
|
||||
> What's the weather where the Golden Gate Bridge is?
|
||||
|
||||
## Example Scripts
|
||||
|
||||
### `black-box.py`
|
||||
|
||||
A minimal example that demonstrates how to use the Strands agent with Pipecat. The agent can handle multi-step queries by calling tools, but does not explain its reasoning out loud.
|
||||
|
||||
### `explain-thinking.py`
|
||||
|
||||
An enhanced example where the Strands agent explains each step of its reasoning in clear, simple language as it works through a multi-step task.
|
||||
|
||||
## Quick Start
|
||||
|
||||
1. **Clone the repository and navigate to this example:**
|
||||
|
||||
```bash
|
||||
git clone https://github.com/pipecat-ai/pipecat.git
|
||||
cd pipecat/examples/aws-strands
|
||||
```
|
||||
|
||||
2. **Set up a virtual environment:**
|
||||
|
||||
```bash
|
||||
python -m venv venv
|
||||
source venv/bin/activate # On Windows: venv\Scripts\activate
|
||||
```
|
||||
|
||||
3. **Install dependencies:**
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
4. **Configure environment variables:**
|
||||
|
||||
Copy the provided `env.example` file to `.env` and fill in the necessary credentials:
|
||||
|
||||
```bash
|
||||
cp env.example .env
|
||||
# Then edit .env with your preferred editor
|
||||
```
|
||||
|
||||
5. **Run an example:**
|
||||
|
||||
```bash
|
||||
python black-box.py
|
||||
# or
|
||||
python explain-thinking.py
|
||||
```
|
||||
@@ -1,206 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from strands import Agent, tool
|
||||
from strands.models import BedrockModel
|
||||
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
"""This example demonstrates how to use the Strands agent with Pipecat.
|
||||
|
||||
You can delegate complex, multi-step tasks to the Strands agent, which can cycle through LLM-based reasoning and tool calls to accomplish the task.
|
||||
|
||||
Try asking: "What's the weather where the Golden Gate Bridge is?"
|
||||
"""
|
||||
|
||||
# Strands agent tools
|
||||
|
||||
|
||||
@tool
|
||||
def get_location_name_from_landmark(landmark: str) -> str:
|
||||
"""
|
||||
Get the location name from a landmark.
|
||||
|
||||
Args:
|
||||
landmark (str): The name of the landmark, e.g. "Golden Gate Bridge".
|
||||
"""
|
||||
# Simulate fetching location
|
||||
return "San Francisco, CA"
|
||||
|
||||
|
||||
@tool
|
||||
def get_lat_long_from_location_name(location: str) -> dict:
|
||||
"""
|
||||
Get the latitude and longitude for a location name.
|
||||
|
||||
Args:
|
||||
location (str): The city and state, e.g. "San Francisco, CA".
|
||||
"""
|
||||
# Simulate fetching lat/long from a geocoding service
|
||||
return {"lat": 37.7749, "long": -122.4194}
|
||||
|
||||
|
||||
@tool
|
||||
def get_current_weather_from_lat_long(lat: float, long: float) -> dict:
|
||||
"""
|
||||
Get the current weather for a specific latitude and longitude.
|
||||
|
||||
Args:
|
||||
lat (float): The latitude of the location.
|
||||
long (float): The longitude of the location.
|
||||
"""
|
||||
# Simulate fetching weather data from a weather service
|
||||
return {"conditions": "nice", "temperature": "75"}
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
strands_agent = Agent(
|
||||
model=BedrockModel(
|
||||
model_id="us.anthropic.claude-3-7-sonnet-20250219-v1:0", max_tokens=64000
|
||||
),
|
||||
tools=[
|
||||
get_location_name_from_landmark,
|
||||
get_lat_long_from_location_name,
|
||||
get_current_weather_from_lat_long,
|
||||
],
|
||||
system_prompt="""
|
||||
You are a helpful personal assistant who can look up information about places and weather.
|
||||
|
||||
Your key capabilities:
|
||||
1. Look up where landmarks are located.
|
||||
2. Find latitude and longitude for a location.
|
||||
3. Look up the current weather for a specific latitude and longitude.
|
||||
|
||||
Explain each step of your reasoning in clear, simple, and concise language. Your responses will be converted to audio, so avoid special characters and numbered lists.
|
||||
""",
|
||||
)
|
||||
|
||||
async def handle_location_or_weather_related_queries(params: FunctionCallParams, query: str):
|
||||
"""
|
||||
Handle location or weather related queries.
|
||||
|
||||
Args:
|
||||
query (str): The user's query, e.g. "What's the weather where the Golden Gate Bridge is?".
|
||||
"""
|
||||
# Run in a background thread
|
||||
# (Otherwise the agent blocks the event loop; one effect of that is that we don't hear
|
||||
# "let me check on that" until the agent finishes)
|
||||
loop = asyncio.get_running_loop()
|
||||
result = await loop.run_in_executor(None, strands_agent, query)
|
||||
await params.result_callback(result.message)
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
llm.register_direct_function(handle_location_or_weather_related_queries)
|
||||
|
||||
@llm.event_handler("on_function_calls_started")
|
||||
async def on_function_calls_started(service, function_calls):
|
||||
await tts.queue_frame(TTSSpeakFrame("Let me check on that."))
|
||||
|
||||
tools = ToolsSchema(standard_tools=[handle_location_or_weather_related_queries])
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way. Start by suggesting that the user ask about the weather where the Golden Gate Bridge is.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
@@ -1,8 +0,0 @@
|
||||
OPENAI_API_KEY=
|
||||
CARTESIA_API_KEY=
|
||||
DEEPGRAM_API_KEY=
|
||||
DAILY_API_KEY=
|
||||
DAILY_SAMPLE_ROOM_URL=
|
||||
AWS_SECRET_ACCESS_KEY=
|
||||
AWS_ACCESS_KEY_ID=
|
||||
AWS_REGION=
|
||||
@@ -1,249 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from strands import Agent, tool
|
||||
from strands.models import BedrockModel
|
||||
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
"""This example demonstrates how to use the Strands agent with Pipecat in a way where the agent explains its reasoning step-by-step.
|
||||
|
||||
You can delegate complex, multi-step tasks to the Strands agent, which can cycle through LLM-based reasoning and tool calls to accomplish the task.
|
||||
|
||||
Try asking: "What's the weather where the Golden Gate Bridge is?"
|
||||
"""
|
||||
|
||||
|
||||
# Strands agent tools
|
||||
|
||||
|
||||
@tool
|
||||
def get_location_name_from_landmark(landmark: str) -> str:
|
||||
"""
|
||||
Get the location name from a landmark.
|
||||
|
||||
Args:
|
||||
landmark (str): The name of the landmark, e.g. "Golden Gate Bridge".
|
||||
"""
|
||||
# Simulate fetching location (slowly)
|
||||
time.sleep(3)
|
||||
return "San Francisco, CA"
|
||||
|
||||
|
||||
@tool
|
||||
def get_lat_long_from_location_name(location: str) -> dict:
|
||||
"""
|
||||
Get the latitude and longitude for a location name.
|
||||
|
||||
Args:
|
||||
location (str): The city and state, e.g. "San Francisco, CA".
|
||||
"""
|
||||
# Simulate fetching lat/long from a geocoding service (slowly)
|
||||
time.sleep(3)
|
||||
return {"lat": 37.7749, "long": -122.4194}
|
||||
|
||||
|
||||
@tool
|
||||
def get_current_weather_from_lat_long(lat: float, long: float) -> dict:
|
||||
"""
|
||||
Get the current weather for a specific latitude and longitude.
|
||||
|
||||
Args:
|
||||
lat (float): The latitude of the location.
|
||||
long (float): The longitude of the location.
|
||||
"""
|
||||
# Simulate fetching weather data from a weather service (slowly)
|
||||
time.sleep(3)
|
||||
return {"conditions": "nice", "temperature": "75"}
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
next_strands_message_is_last = False
|
||||
strands_messages_queue = asyncio.Queue()
|
||||
|
||||
def strands_callback_handler(**kwargs):
|
||||
"""
|
||||
Handle events from the Strands agent.
|
||||
"""
|
||||
nonlocal next_strands_message_is_last
|
||||
if "event" in kwargs:
|
||||
event_obj = kwargs["event"]
|
||||
if event_obj and "messageStop" in event_obj:
|
||||
message_stop = event_obj["messageStop"]
|
||||
if message_stop and "stopReason" in message_stop:
|
||||
stop_reason = message_stop["stopReason"]
|
||||
if stop_reason == "end_turn":
|
||||
next_strands_message_is_last = True
|
||||
elif "message" in kwargs:
|
||||
message_obj = kwargs["message"]
|
||||
if message_obj and "content" in message_obj and "role" in message_obj:
|
||||
role = message_obj["role"]
|
||||
content = message_obj["content"]
|
||||
if role == "assistant" and isinstance(content, list):
|
||||
for content_obj in content:
|
||||
if isinstance(content_obj, dict) and "text" in content_obj:
|
||||
message = content_obj["text"]
|
||||
if not next_strands_message_is_last:
|
||||
strands_messages_queue.put_nowait(message)
|
||||
|
||||
async def process_strands_messages():
|
||||
while True:
|
||||
message = await strands_messages_queue.get()
|
||||
await tts.queue_frame(TTSSpeakFrame(message))
|
||||
strands_messages_queue.task_done()
|
||||
|
||||
asyncio.create_task(process_strands_messages())
|
||||
|
||||
strands_agent = Agent(
|
||||
model=BedrockModel(
|
||||
model_id="us.anthropic.claude-3-7-sonnet-20250219-v1:0", max_tokens=64000
|
||||
),
|
||||
tools=[
|
||||
get_location_name_from_landmark,
|
||||
get_lat_long_from_location_name,
|
||||
get_current_weather_from_lat_long,
|
||||
],
|
||||
system_prompt="""
|
||||
You are a helpful personal assistant who can look up information about places and weather.
|
||||
|
||||
Your key capabilities:
|
||||
1. Look up where landmarks are located.
|
||||
2. Find latitude and longitude for a location.
|
||||
3. Look up the current weather for a specific latitude and longitude.
|
||||
|
||||
Explain each step of your reasoning in clear, simple, and concise language. Your responses will be converted to audio, so avoid special characters and numbered lists.
|
||||
""",
|
||||
callback_handler=strands_callback_handler,
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
async def handle_location_or_weather_related_queries(params: FunctionCallParams, query: str):
|
||||
"""
|
||||
Handle location or weather related queries.
|
||||
|
||||
Args:
|
||||
query (str): The user's query, e.g. "What's the weather where the Golden Gate Bridge is?".
|
||||
"""
|
||||
# Run in a background thread
|
||||
# (Otherwise the agent blocks the event loop; one effect of that is that we don't hear
|
||||
# the agent's "thinking" messages until the agent finishes)
|
||||
loop = asyncio.get_running_loop()
|
||||
result = await loop.run_in_executor(None, strands_agent, query)
|
||||
await params.result_callback(result.message)
|
||||
|
||||
llm.register_direct_function(handle_location_or_weather_related_queries)
|
||||
|
||||
@llm.event_handler("on_function_calls_started")
|
||||
async def on_function_calls_started(service, function_calls):
|
||||
await tts.queue_frame(TTSSpeakFrame("Let me check on that."))
|
||||
|
||||
tools = ToolsSchema(standard_tools=[handle_location_or_weather_related_queries])
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way. Start by suggesting that the user ask about the weather where the Golden Gate Bridge is.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
@@ -1,6 +0,0 @@
|
||||
fastapi
|
||||
uvicorn
|
||||
python-dotenv
|
||||
pipecat-ai[webrtc,daily,deepgram,cartesia]
|
||||
pipecat-ai-small-webrtc-prebuilt
|
||||
strands-agents
|
||||
@@ -4364,9 +4364,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/brace-expansion": {
|
||||
"version": "1.1.12",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
|
||||
"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
|
||||
"version": "1.1.11",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
|
||||
"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0",
|
||||
"concat-map": "0.0.1"
|
||||
@@ -6081,9 +6081,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/glob/node_modules/brace-expansion": {
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
|
||||
"integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
||||
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0"
|
||||
}
|
||||
|
||||
@@ -133,8 +133,7 @@ async def main():
|
||||
params=PipelineParams(
|
||||
audio_in_sample_rate=16000,
|
||||
audio_out_sample_rate=16000,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
allow_interruptions=True,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -71,8 +71,6 @@ async def main():
|
||||
params=PipelineParams(
|
||||
audio_in_sample_rate=16000,
|
||||
audio_out_sample_rate=16000,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -148,8 +148,10 @@ async def main():
|
||||
params=PipelineParams(
|
||||
audio_in_sample_rate=16000,
|
||||
audio_out_sample_rate=16000,
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
observers=[TranscriptionLogObserver()],
|
||||
)
|
||||
|
||||
@@ -2,4 +2,4 @@ aiofiles
|
||||
python-dotenv
|
||||
fastapi[all]
|
||||
uvicorn
|
||||
pipecat-ai[daily,deepgram,openai,silero,cartesia,soundfile]
|
||||
pipecat-ai[daily,deepgram,openai,silero,cartesia]
|
||||
|
||||
@@ -75,13 +75,7 @@ async def main(room_url: str, token: str):
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -15,7 +15,7 @@
|
||||
"vite": "^6.3.5"
|
||||
},
|
||||
"dependencies": {
|
||||
"@pipecat-ai/client-js": "^1.0.0",
|
||||
"@pipecat-ai/daily-transport": "^1.0.0"
|
||||
"@pipecat-ai/client-js": "^0.3.5",
|
||||
"@pipecat-ai/daily-transport": "^0.3.10"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
/**
|
||||
* Pipecat Client Implementation
|
||||
* RTVI Client Implementation
|
||||
*
|
||||
* This client connects to an RTVI-compatible bot server using WebRTC (via Daily).
|
||||
* It handles audio/video streaming and manages the connection lifecycle.
|
||||
@@ -16,7 +16,7 @@
|
||||
* - Browser with WebRTC support
|
||||
*/
|
||||
|
||||
import { PipecatClient, RTVIEvent } from '@pipecat-ai/client-js';
|
||||
import { RTVIClient, RTVIEvent } from '@pipecat-ai/client-js';
|
||||
import { DailyTransport } from '@pipecat-ai/daily-transport';
|
||||
|
||||
/**
|
||||
@@ -26,7 +26,7 @@ import { DailyTransport } from '@pipecat-ai/daily-transport';
|
||||
class ChatbotClient {
|
||||
constructor() {
|
||||
// Initialize client state
|
||||
this.pcClient = null;
|
||||
this.rtviClient = null;
|
||||
this.setupDOMElements();
|
||||
this.initializeClientAndTransport();
|
||||
this.setupEventListeners();
|
||||
@@ -59,7 +59,7 @@ class ChatbotClient {
|
||||
this.disconnectBtn.addEventListener('click', () => this.disconnect());
|
||||
|
||||
// Populate device selector
|
||||
this.pcClient.getAllMics().then((mics) => {
|
||||
this.rtviClient.getAllMics().then((mics) => {
|
||||
console.log('Available mics:', mics);
|
||||
mics.forEach((device) => {
|
||||
const option = document.createElement('option');
|
||||
@@ -71,16 +71,16 @@ class ChatbotClient {
|
||||
this.deviceSelector.addEventListener('change', (event) => {
|
||||
const selectedDeviceId = event.target.value;
|
||||
console.log('Selected device ID:', selectedDeviceId);
|
||||
this.pcClient.updateMic(selectedDeviceId);
|
||||
this.rtviClient.updateMic(selectedDeviceId);
|
||||
});
|
||||
|
||||
// Handle mic mute/unmute toggle
|
||||
const micToggleBtn = document.getElementById('mic-toggle-btn');
|
||||
|
||||
micToggleBtn.addEventListener('click', () => {
|
||||
let micEnabled = this.pcClient.isMicEnabled;
|
||||
let micEnabled = this.rtviClient.isMicEnabled;
|
||||
micToggleBtn.textContent = micEnabled ? 'Unmute Mic' : 'Mute Mic';
|
||||
this.pcClient.enableMic(!micEnabled);
|
||||
this.rtviClient.enableMic(!micEnabled);
|
||||
// Add logic to mute/unmute the mic
|
||||
if (micEnabled) {
|
||||
console.log('Mic muted');
|
||||
@@ -93,12 +93,23 @@ class ChatbotClient {
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up the Pipecat client and Daily transport
|
||||
* Set up the RTVI client and Daily transport
|
||||
*/
|
||||
async initializeClientAndTransport() {
|
||||
// Initialize the Pipecat client with a DailyTransport and our configuration
|
||||
this.pcClient = new PipecatClient({
|
||||
// Initialize the RTVI client with a DailyTransport and our configuration
|
||||
this.rtviClient = new RTVIClient({
|
||||
transport: new DailyTransport(),
|
||||
params: {
|
||||
// REPLACE WITH YOUR MODAL URL ENDPOINT
|
||||
baseUrl:
|
||||
'https://<Modal workspace>--pipecat-modal-bot-launcher.modal.run',
|
||||
endpoints: {
|
||||
connect: '/connect',
|
||||
},
|
||||
requestData: {
|
||||
bot_name: 'openai',
|
||||
},
|
||||
},
|
||||
enableMic: true, // Enable microphone for user input
|
||||
enableCam: false,
|
||||
callbacks: {
|
||||
@@ -165,8 +176,8 @@ class ChatbotClient {
|
||||
// Set up listeners for media track events
|
||||
this.setupTrackListeners();
|
||||
|
||||
await this.pcClient.initDevices();
|
||||
window.client = this.pcClient;
|
||||
await this.rtviClient.initDevices();
|
||||
window.client = this.rtviClient;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -201,10 +212,10 @@ class ChatbotClient {
|
||||
* This is called when the bot is ready or when the transport state changes to ready
|
||||
*/
|
||||
setupMediaTracks() {
|
||||
if (!this.pcClient) return;
|
||||
if (!this.rtviClient) return;
|
||||
|
||||
// Get current tracks from the client
|
||||
const tracks = this.pcClient.tracks();
|
||||
const tracks = this.rtviClient.tracks();
|
||||
|
||||
// Set up any available bot tracks
|
||||
if (tracks.bot?.audio) {
|
||||
@@ -220,10 +231,10 @@ class ChatbotClient {
|
||||
* This handles new tracks being added during the session
|
||||
*/
|
||||
setupTrackListeners() {
|
||||
if (!this.pcClient) return;
|
||||
if (!this.rtviClient) return;
|
||||
|
||||
// Listen for new tracks starting
|
||||
this.pcClient.on(RTVIEvent.TrackStarted, (track, participant) => {
|
||||
this.rtviClient.on(RTVIEvent.TrackStarted, (track, participant) => {
|
||||
// Only handle non-local (bot) tracks
|
||||
if (!participant?.local) {
|
||||
if (track.kind === 'audio') {
|
||||
@@ -242,7 +253,7 @@ class ChatbotClient {
|
||||
});
|
||||
|
||||
// Listen for tracks stopping
|
||||
this.pcClient.on(RTVIEvent.TrackStopped, (track, participant) => {
|
||||
this.rtviClient.on(RTVIEvent.TrackStopped, (track, participant) => {
|
||||
if (participant.local) {
|
||||
this.log('Local mic muted');
|
||||
return;
|
||||
@@ -300,27 +311,21 @@ class ChatbotClient {
|
||||
|
||||
/**
|
||||
* Initialize and connect to the bot
|
||||
* This sets up the Pipecat client, initializes devices, and establishes the connection
|
||||
* This sets up the RTVI client, initializes devices, and establishes the connection
|
||||
*/
|
||||
async connect() {
|
||||
try {
|
||||
const botSelector = document.getElementById('bot-selector');
|
||||
const selectedBot = botSelector.value;
|
||||
this.rtviClient.params.requestData.bot_name = selectedBot;
|
||||
|
||||
// Initialize audio/video devices
|
||||
this.log('Initializing devices...');
|
||||
await this.pcClient.initDevices();
|
||||
await this.rtviClient.initDevices();
|
||||
|
||||
// Connect to the bot
|
||||
this.log(`Connecting to bot: ${selectedBot}`);
|
||||
await this.pcClient.connect({
|
||||
// REPLACE WITH YOUR MODAL URL ENDPOINT
|
||||
endpoint:
|
||||
'https://<your-workspace>--pipecat-modal-fastapi-app.modal.run/connect',
|
||||
requestData: {
|
||||
bot_name: selectedBot,
|
||||
},
|
||||
});
|
||||
await this.rtviClient.connect();
|
||||
|
||||
this.log('Connection complete');
|
||||
} catch (error) {
|
||||
@@ -331,9 +336,9 @@ class ChatbotClient {
|
||||
this.updateStatus('Error');
|
||||
|
||||
// Clean up if there's an error
|
||||
if (this.pcClient) {
|
||||
if (this.rtviClient) {
|
||||
try {
|
||||
await this.pcClient.disconnect();
|
||||
await this.rtviClient.disconnect();
|
||||
} catch (disconnectError) {
|
||||
this.log(`Error during disconnect: ${disconnectError.message}`);
|
||||
}
|
||||
@@ -345,10 +350,10 @@ class ChatbotClient {
|
||||
* Disconnect from the bot and clean up media resources
|
||||
*/
|
||||
async disconnect() {
|
||||
if (this.pcClient) {
|
||||
if (this.rtviClient) {
|
||||
try {
|
||||
// Disconnect the Pipecat client
|
||||
await this.pcClient.disconnect();
|
||||
// Disconnect the RTVI client
|
||||
await this.rtviClient.disconnect();
|
||||
|
||||
// Clean up audio
|
||||
if (this.botAudio.srcObject) {
|
||||
|
||||
@@ -301,7 +301,7 @@ def fastapi_app():
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Include the endpoints from this file
|
||||
# Include the endpoints from endpoints.py
|
||||
web_app.include_router(router)
|
||||
|
||||
return web_app
|
||||
|
||||
@@ -1,3 +1,2 @@
|
||||
python-dotenv==1.0.1
|
||||
modal==1.0.5
|
||||
fastapi[all]
|
||||
modal==0.71.3
|
||||
|
||||
@@ -170,6 +170,7 @@ async def run_bot(room_url: str, token: str):
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
|
||||
@@ -198,6 +198,7 @@ async def run_bot(room_url: str, token: str):
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
|
||||
@@ -211,6 +211,7 @@ async def run_bot(room_url: str, token: str):
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
"name": "my-daily-app",
|
||||
"version": "0.1.0",
|
||||
"dependencies": {
|
||||
"axios": "^1.11.0",
|
||||
"axios": "^1.6.0",
|
||||
"next": "^14.0.0",
|
||||
"pino": "^8.15.0",
|
||||
"react": "^18.2.0",
|
||||
@@ -215,9 +215,10 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/env": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.30.tgz",
|
||||
"integrity": "sha512-KBiBKrDY6kxTQWGzKjQB7QirL3PiiOkV7KW98leHFjtVRKtft76Ra5qSA/SL75xT44dp6hOcqiiJ6iievLOYug=="
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.26.tgz",
|
||||
"integrity": "sha512-vO//GJ/YBco+H7xdQhzJxF7ub3SUwft76jwaeOyVVQFHCi5DCnkP16WHB+JBylo4vOKPoZBlR94Z8xBxNBdNJA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@next/eslint-plugin-next": {
|
||||
"version": "14.2.25",
|
||||
@@ -230,12 +231,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-darwin-arm64": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-14.2.30.tgz",
|
||||
"integrity": "sha512-EAqfOTb3bTGh9+ewpO/jC59uACadRHM6TSA9DdxJB/6gxOpyV+zrbqeXiFTDy9uV6bmipFDkfpAskeaDcO+7/g==",
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-14.2.26.tgz",
|
||||
"integrity": "sha512-zDJY8gsKEseGAxG+C2hTMT0w9Nk9N1Sk1qV7vXYz9MEiyRoF5ogQX2+vplyUMIfygnjn9/A04I6yrUTRTuRiyQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
@@ -245,12 +247,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-darwin-x64": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-14.2.30.tgz",
|
||||
"integrity": "sha512-TyO7Wz1IKE2kGv8dwQ0bmPL3s44EKVencOqwIY69myoS3rdpO1NPg5xPM5ymKu7nfX4oYJrpMxv8G9iqLsnL4A==",
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-14.2.26.tgz",
|
||||
"integrity": "sha512-U0adH5ryLfmTDkahLwG9sUQG2L0a9rYux8crQeC92rPhi3jGQEY47nByQHrVrt3prZigadwj/2HZ1LUUimuSbg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
@@ -260,12 +263,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-arm64-gnu": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-14.2.30.tgz",
|
||||
"integrity": "sha512-I5lg1fgPJ7I5dk6mr3qCH1hJYKJu1FsfKSiTKoYwcuUf53HWTrEkwmMI0t5ojFKeA6Vu+SfT2zVy5NS0QLXV4Q==",
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-14.2.26.tgz",
|
||||
"integrity": "sha512-SINMl1I7UhfHGM7SoRiw0AbwnLEMUnJ/3XXVmhyptzriHbWvPPbbm0OEVG24uUKhuS1t0nvN/DBvm5kz6ZIqpg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
@@ -275,12 +279,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-arm64-musl": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-14.2.30.tgz",
|
||||
"integrity": "sha512-8GkNA+sLclQyxgzCDs2/2GSwBc92QLMrmYAmoP2xehe5MUKBLB2cgo34Yu242L1siSkwQkiV4YLdCnjwc/Micw==",
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-14.2.26.tgz",
|
||||
"integrity": "sha512-s6JaezoyJK2DxrwHWxLWtJKlqKqTdi/zaYigDXUJ/gmx/72CrzdVZfMvUc6VqnZ7YEvRijvYo+0o4Z9DencduA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
@@ -290,12 +295,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-x64-gnu": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-14.2.30.tgz",
|
||||
"integrity": "sha512-8Ly7okjssLuBoe8qaRCcjGtcMsv79hwzn/63wNeIkzJVFVX06h5S737XNr7DZwlsbTBDOyI6qbL2BJB5n6TV/w==",
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-14.2.26.tgz",
|
||||
"integrity": "sha512-FEXeUQi8/pLr/XI0hKbe0tgbLmHFRhgXOUiPScz2hk0hSmbGiU8aUqVslj/6C6KA38RzXnWoJXo4FMo6aBxjzg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
@@ -305,12 +311,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-x64-musl": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-14.2.30.tgz",
|
||||
"integrity": "sha512-dBmV1lLNeX4mR7uI7KNVHsGQU+OgTG5RGFPi3tBJpsKPvOPtg9poyav/BYWrB3GPQL4dW5YGGgalwZ79WukbKQ==",
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-14.2.26.tgz",
|
||||
"integrity": "sha512-BUsomaO4d2DuXhXhgQCVt2jjX4B4/Thts8nDoIruEJkhE5ifeQFtvW5c9JkdOtYvE5p2G0hcwQ0UbRaQmQwaVg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
@@ -320,12 +327,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-arm64-msvc": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-14.2.30.tgz",
|
||||
"integrity": "sha512-6MMHi2Qc1Gkq+4YLXAgbYslE1f9zMGBikKMdmQRHXjkGPot1JY3n5/Qrbg40Uvbi8//wYnydPnyvNhI1DMUW1g==",
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-14.2.26.tgz",
|
||||
"integrity": "sha512-5auwsMVzT7wbB2CZXQxDctpWbdEnEW/e66DyXO1DcgHxIyhP06awu+rHKshZE+lPLIGiwtjo7bsyeuubewwxMw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
@@ -335,12 +343,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-ia32-msvc": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-14.2.30.tgz",
|
||||
"integrity": "sha512-pVZMnFok5qEX4RT59mK2hEVtJX+XFfak+/rjHpyFh7juiT52r177bfFKhnlafm0UOSldhXjj32b+LZIOdswGTg==",
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-14.2.26.tgz",
|
||||
"integrity": "sha512-GQWg/Vbz9zUGi9X80lOeGsz1rMH/MtFO/XqigDznhhhTfDlDoynCM6982mPCbSlxJ/aveZcKtTlwfAjwhyxDpg==",
|
||||
"cpu": [
|
||||
"ia32"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
@@ -350,12 +359,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-x64-msvc": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-14.2.30.tgz",
|
||||
"integrity": "sha512-4KCo8hMZXMjpTzs3HOqOGYYwAXymXIy7PEPAXNEcEOyKqkjiDlECumrWziy+JEF0Oi4ILHGxzgQ3YiMGG2t/Lg==",
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-14.2.26.tgz",
|
||||
"integrity": "sha512-2rdB3T1/Gp7bv1eQTTm9d1Y1sv9UuJ2LAwOE0Pe2prHKe32UNscj7YS13fRB37d0GAiGNR+Y7ZcW8YjDI8Ns0w==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
@@ -610,10 +620,11 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@typescript-eslint/typescript-estree/node_modules/brace-expansion": {
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
|
||||
"integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
||||
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0"
|
||||
}
|
||||
@@ -1165,13 +1176,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/axios": {
|
||||
"version": "1.11.0",
|
||||
"resolved": "https://registry.npmjs.org/axios/-/axios-1.11.0.tgz",
|
||||
"integrity": "sha512-1Lx3WLFQWm3ooKDYZD1eXmoGO9fxYQjrycfHFC8P0sCfQVXyROp0p9PFWBehewBOdCwHc+f/b8I0fMto5eSfwA==",
|
||||
"version": "1.8.4",
|
||||
"resolved": "https://registry.npmjs.org/axios/-/axios-1.8.4.tgz",
|
||||
"integrity": "sha512-eBSYY4Y68NNlHbHBMdeDmKNtDgXWhQsJcGqzO3iLUM0GraQFSS9cVgPX5I9b3lbdFKyYoAEGAZF1DwhTaljNAw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"follow-redirects": "^1.15.6",
|
||||
"form-data": "^4.0.4",
|
||||
"form-data": "^4.0.0",
|
||||
"proxy-from-env": "^1.1.0"
|
||||
}
|
||||
},
|
||||
@@ -1213,10 +1224,11 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/brace-expansion": {
|
||||
"version": "1.1.12",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
|
||||
"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
|
||||
"version": "1.1.11",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
|
||||
"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0",
|
||||
"concat-map": "0.0.1"
|
||||
@@ -2436,15 +2448,14 @@
|
||||
}
|
||||
},
|
||||
"node_modules/form-data": {
|
||||
"version": "4.0.4",
|
||||
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.4.tgz",
|
||||
"integrity": "sha512-KrGhL9Q4zjj0kiUt5OO4Mr/A/jlI2jDYs5eHBpYHPcBEVSiipAvn2Ko2HnPe20rmcuuvMHNdZFp+4IlGTMF0Ow==",
|
||||
"version": "4.0.2",
|
||||
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.2.tgz",
|
||||
"integrity": "sha512-hGfm/slu0ZabnNt4oaRZ6uREyfCj6P4fT/n6A1rGV+Z0VdGXjfOhVUpkn6qVQONHGIFwmveGXyDs75+nr6FM8w==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"asynckit": "^0.4.0",
|
||||
"combined-stream": "^1.0.8",
|
||||
"es-set-tostringtag": "^2.1.0",
|
||||
"hasown": "^2.0.2",
|
||||
"mime-types": "^2.1.12"
|
||||
},
|
||||
"engines": {
|
||||
@@ -2603,10 +2614,11 @@
|
||||
}
|
||||
},
|
||||
"node_modules/glob/node_modules/brace-expansion": {
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
|
||||
"integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
||||
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0"
|
||||
}
|
||||
@@ -3601,11 +3613,12 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/next": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/next/-/next-14.2.30.tgz",
|
||||
"integrity": "sha512-+COdu6HQrHHFQ1S/8BBsCag61jZacmvbuL2avHvQFbWa2Ox7bE+d8FyNgxRLjXQ5wtPyQwEmk85js/AuaG2Sbg==",
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/next/-/next-14.2.26.tgz",
|
||||
"integrity": "sha512-b81XSLihMwCfwiUVRRja3LphLo4uBBMZEzBBWMaISbKTwOmq3wPknIETy/8000tr7Gq4WmbuFYPS7jOYIf+ZJw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@next/env": "14.2.30",
|
||||
"@next/env": "14.2.26",
|
||||
"@swc/helpers": "0.5.5",
|
||||
"busboy": "1.6.0",
|
||||
"caniuse-lite": "^1.0.30001579",
|
||||
@@ -3620,15 +3633,15 @@
|
||||
"node": ">=18.17.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@next/swc-darwin-arm64": "14.2.30",
|
||||
"@next/swc-darwin-x64": "14.2.30",
|
||||
"@next/swc-linux-arm64-gnu": "14.2.30",
|
||||
"@next/swc-linux-arm64-musl": "14.2.30",
|
||||
"@next/swc-linux-x64-gnu": "14.2.30",
|
||||
"@next/swc-linux-x64-musl": "14.2.30",
|
||||
"@next/swc-win32-arm64-msvc": "14.2.30",
|
||||
"@next/swc-win32-ia32-msvc": "14.2.30",
|
||||
"@next/swc-win32-x64-msvc": "14.2.30"
|
||||
"@next/swc-darwin-arm64": "14.2.26",
|
||||
"@next/swc-darwin-x64": "14.2.26",
|
||||
"@next/swc-linux-arm64-gnu": "14.2.26",
|
||||
"@next/swc-linux-arm64-musl": "14.2.26",
|
||||
"@next/swc-linux-x64-gnu": "14.2.26",
|
||||
"@next/swc-linux-x64-musl": "14.2.26",
|
||||
"@next/swc-win32-arm64-msvc": "14.2.26",
|
||||
"@next/swc-win32-ia32-msvc": "14.2.26",
|
||||
"@next/swc-win32-x64-msvc": "14.2.26"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@opentelemetry/api": "^1.1.0",
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"lint": "next lint"
|
||||
},
|
||||
"dependencies": {
|
||||
"axios": "^1.11.0",
|
||||
"axios": "^1.6.0",
|
||||
"next": "^14.0.0",
|
||||
"pino": "^8.15.0",
|
||||
"react": "^18.2.0",
|
||||
|
||||
@@ -103,7 +103,7 @@ export default async function handler(req, res) {
|
||||
const sip_config = {
|
||||
display_name: From,
|
||||
sip_mode: 'dial-in',
|
||||
num_endpoints: (call_transfer !== undefined && call_transfer !== null) ? 2 : 1,
|
||||
num_endpoints: call_transfer !== null ? 2 : 1,
|
||||
codecs: {"audio": ["OPUS"]},
|
||||
};
|
||||
daily_room_properties.sip = sip_config;
|
||||
|
||||
@@ -67,8 +67,10 @@ async def main(transport: DailyTransport):
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -90,7 +92,7 @@ async def main(transport: DailyTransport):
|
||||
logger.info("Participant left: {}", participant)
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=False, force_gc=True)
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
@@ -44,7 +44,7 @@ Try the hosted version of the demo here: https://pcc-smart-turn.vercel.app/.
|
||||
4. Run the server:
|
||||
|
||||
```bash
|
||||
LOCAL_RUN=1 python server.py
|
||||
LOCAL=1 python server.py
|
||||
```
|
||||
|
||||
### Run the client
|
||||
|
||||
1281
examples/fal-smart-turn/client/package-lock.json
generated
1281
examples/fal-smart-turn/client/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -9,9 +9,9 @@
|
||||
"lint": "next lint"
|
||||
},
|
||||
"dependencies": {
|
||||
"@pipecat-ai/client-js": "^1.0.0",
|
||||
"@pipecat-ai/client-react": "^1.0.0",
|
||||
"@pipecat-ai/daily-transport": "^1.0.0",
|
||||
"@pipecat-ai/client-js": "^0.3.5",
|
||||
"@pipecat-ai/client-react": "^0.3.5",
|
||||
"@pipecat-ai/daily-transport": "^0.3.10",
|
||||
"next": "15.3.1",
|
||||
"react": "^19.0.0",
|
||||
"react-dom": "^19.0.0"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import './globals.css';
|
||||
import { PipecatProvider } from '@/providers/PipecatProvider';
|
||||
import { RTVIProvider } from '@/providers/RTVIProvider';
|
||||
|
||||
export const metadata = {
|
||||
title: 'Pipecat React Client',
|
||||
@@ -20,7 +20,7 @@ export default function RootLayout({
|
||||
<link rel="icon" href="/favicon.svg" type="image/svg+xml" />
|
||||
</head>
|
||||
<body>
|
||||
<PipecatProvider>{children}</PipecatProvider>
|
||||
<RTVIProvider>{children}</RTVIProvider>
|
||||
</body>
|
||||
</html>
|
||||
);
|
||||
|
||||
@@ -1,22 +1,22 @@
|
||||
'use client';
|
||||
|
||||
import {
|
||||
PipecatClientAudio,
|
||||
PipecatClientVideo,
|
||||
usePipecatClientTransportState,
|
||||
RTVIClientAudio,
|
||||
RTVIClientVideo,
|
||||
useRTVIClientTransportState,
|
||||
} from '@pipecat-ai/client-react';
|
||||
import { ConnectButton } from '../components/ConnectButton';
|
||||
import { StatusDisplay } from '../components/StatusDisplay';
|
||||
import { DebugDisplay } from '../components/DebugDisplay';
|
||||
|
||||
function BotVideo() {
|
||||
const transportState = usePipecatClientTransportState();
|
||||
const transportState = useRTVIClientTransportState();
|
||||
const isConnected = transportState !== 'disconnected';
|
||||
|
||||
return (
|
||||
<div className="bot-container">
|
||||
<div className="video-container">
|
||||
{isConnected && <PipecatClientVideo participant="bot" fit="cover" />}
|
||||
{isConnected && <RTVIClientVideo participant="bot" fit="cover" />}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
@@ -35,7 +35,7 @@ export default function Home() {
|
||||
</div>
|
||||
|
||||
<DebugDisplay />
|
||||
<PipecatClientAudio />
|
||||
<RTVIClientAudio />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,17 +1,11 @@
|
||||
import {
|
||||
usePipecatClient,
|
||||
usePipecatClientTransportState,
|
||||
useRTVIClient,
|
||||
useRTVIClientTransportState,
|
||||
} from '@pipecat-ai/client-react';
|
||||
|
||||
// Get the API base URL from environment variables
|
||||
// Default to "/api" if not specified
|
||||
// "/api" is the default for Next.js API routes and used
|
||||
// for the Pipecat Cloud deployed agent
|
||||
const API_BASE_URL = process.env.NEXT_PUBLIC_API_BASE_URL || '/api';
|
||||
|
||||
export function ConnectButton() {
|
||||
const client = usePipecatClient();
|
||||
const transportState = usePipecatClientTransportState();
|
||||
const client = useRTVIClient();
|
||||
const transportState = useRTVIClientTransportState();
|
||||
const isConnected = ['connected', 'ready'].includes(transportState);
|
||||
|
||||
const handleClick = async () => {
|
||||
@@ -24,10 +18,7 @@ export function ConnectButton() {
|
||||
if (isConnected) {
|
||||
await client.disconnect();
|
||||
} else {
|
||||
await client.connect({
|
||||
endpoint: `${API_BASE_URL}/connect`,
|
||||
requestData: { foo: 'bar' },
|
||||
});
|
||||
await client.connect();
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Connection error:', error);
|
||||
|
||||
@@ -6,7 +6,7 @@ import {
|
||||
TranscriptData,
|
||||
BotLLMTextData,
|
||||
} from '@pipecat-ai/client-js';
|
||||
import { usePipecatClient, useRTVIClientEvent } from '@pipecat-ai/client-react';
|
||||
import { useRTVIClient, useRTVIClientEvent } from '@pipecat-ai/client-react';
|
||||
import './DebugDisplay.css';
|
||||
|
||||
interface SmartTurnResultData {
|
||||
@@ -20,7 +20,7 @@ interface SmartTurnResultData {
|
||||
|
||||
export function DebugDisplay() {
|
||||
const debugLogRef = useRef<HTMLDivElement>(null);
|
||||
const client = usePipecatClient();
|
||||
const client = useRTVIClient();
|
||||
|
||||
const log = useCallback((message: string) => {
|
||||
if (!debugLogRef.current) return;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { usePipecatClientTransportState } from '@pipecat-ai/client-react';
|
||||
import { useRTVIClientTransportState } from '@pipecat-ai/client-react';
|
||||
|
||||
export function StatusDisplay() {
|
||||
const transportState = usePipecatClientTransportState();
|
||||
const transportState = useRTVIClientTransportState();
|
||||
|
||||
return (
|
||||
<div className="status">
|
||||
|
||||
@@ -1,28 +0,0 @@
|
||||
'use client';
|
||||
|
||||
import { PipecatClient } from '@pipecat-ai/client-js';
|
||||
import { DailyTransport } from '@pipecat-ai/daily-transport';
|
||||
import { PipecatClientProvider } from '@pipecat-ai/client-react';
|
||||
import { PropsWithChildren, useEffect, useState } from 'react';
|
||||
|
||||
export function PipecatProvider({ children }: PropsWithChildren) {
|
||||
const [client, setClient] = useState<PipecatClient | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
const pcClient = new PipecatClient({
|
||||
transport: new DailyTransport(),
|
||||
enableMic: true,
|
||||
enableCam: false,
|
||||
});
|
||||
|
||||
setClient(pcClient);
|
||||
}, []);
|
||||
|
||||
if (!client) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return (
|
||||
<PipecatClientProvider client={client}>{children}</PipecatClientProvider>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
'use client';
|
||||
|
||||
import { RTVIClient } from '@pipecat-ai/client-js';
|
||||
import { DailyTransport } from '@pipecat-ai/daily-transport';
|
||||
import { RTVIClientProvider } from '@pipecat-ai/client-react';
|
||||
import { PropsWithChildren, useEffect, useState } from 'react';
|
||||
|
||||
// Get the API base URL from environment variables
|
||||
// Default to "/api" if not specified
|
||||
// "/api" is the default for Next.js API routes and used
|
||||
// for the Pipecat Cloud deployed agent
|
||||
const API_BASE_URL = process.env.NEXT_PUBLIC_API_BASE_URL || '/api';
|
||||
|
||||
console.log('Using API base URL:', API_BASE_URL);
|
||||
|
||||
export function RTVIProvider({ children }: PropsWithChildren) {
|
||||
const [client, setClient] = useState<RTVIClient | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
const transport = new DailyTransport();
|
||||
|
||||
const rtviClient = new RTVIClient({
|
||||
transport,
|
||||
params: {
|
||||
baseUrl: API_BASE_URL,
|
||||
endpoints: {
|
||||
connect: '/connect',
|
||||
},
|
||||
requestData: { foo: 'bar' },
|
||||
},
|
||||
enableMic: true,
|
||||
enableCam: false,
|
||||
});
|
||||
|
||||
setClient(rtviClient);
|
||||
}, []);
|
||||
|
||||
if (!client) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return <RTVIClientProvider client={client}>{children}</RTVIClientProvider>;
|
||||
}
|
||||
@@ -45,7 +45,7 @@ from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
load_dotenv(override=True)
|
||||
|
||||
# Check if we're in local development mode
|
||||
LOCAL = os.getenv("LOCAL_RUN")
|
||||
LOCAL = os.getenv("LOCAL")
|
||||
|
||||
logger.remove()
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
@@ -192,6 +192,7 @@ async def main(transport: DailyTransport):
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
|
||||
@@ -55,6 +55,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -56,6 +56,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -53,6 +53,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -77,36 +77,37 @@ async def configure_livekit():
|
||||
|
||||
|
||||
async def main():
|
||||
(url, token, room_name) = await configure_livekit()
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(url, token, room_name) = await configure_livekit()
|
||||
|
||||
transport = LiveKitTransport(
|
||||
url=url,
|
||||
token=token,
|
||||
room_name=room_name,
|
||||
params=LiveKitParams(audio_out_enabled=True),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
task = PipelineTask(Pipeline([tts, transport.output()]))
|
||||
|
||||
# Register an event handler so we can play the audio when the
|
||||
# participant joins.
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant_id):
|
||||
await asyncio.sleep(1)
|
||||
await task.queue_frame(
|
||||
TextFrame(
|
||||
"Hello there! How are you doing today? Would you like to talk about the weather?"
|
||||
)
|
||||
transport = LiveKitTransport(
|
||||
url=url,
|
||||
token=token,
|
||||
room_name=room_name,
|
||||
params=LiveKitParams(audio_out_enabled=True),
|
||||
)
|
||||
|
||||
await runner.run(task)
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
task = PipelineTask(Pipeline([tts, transport.output()]))
|
||||
|
||||
# Register an event handler so we can play the audio when the
|
||||
# participant joins.
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant_id):
|
||||
await asyncio.sleep(1)
|
||||
await task.queue_frame(
|
||||
TextFrame(
|
||||
"Hello there! How are you doing today? Would you like to talk about the weather?"
|
||||
)
|
||||
)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -50,6 +50,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -63,6 +63,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -68,6 +68,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -47,10 +47,7 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
task = PipelineTask(
|
||||
Pipeline([imagegen, transport.output()]),
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
params=PipelineParams(enable_metrics=True),
|
||||
)
|
||||
|
||||
# Register an event handler so we can play the audio when the client joins
|
||||
@@ -71,6 +68,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -93,8 +93,10 @@ async def run_example(webrtc_connection: SmallWebRTCConnection):
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -154,7 +156,7 @@ async def offer(request: dict, background_tasks: BackgroundTasks):
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
yield # Run app
|
||||
coros = [pc.disconnect() for pc in pcs_map.values()]
|
||||
coros = [pc.close() for pc in pcs_map.values()]
|
||||
await asyncio.gather(*coros)
|
||||
pcs_map.clear()
|
||||
|
||||
|
||||
@@ -9,18 +9,18 @@ import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from daily_runner import configure
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.examples.daily_runner import configure
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyLogLevel, DailyParams, DailyTransport
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -43,7 +43,6 @@ async def main():
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
transport.set_log_level(DailyLogLevel.Info)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
@@ -76,8 +75,10 @@ async def main():
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -158,8 +158,7 @@ async def main():
|
||||
],
|
||||
),
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
allow_interruptions=True, enable_metrics=True, enable_usage_metrics=True
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -174,6 +174,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -137,6 +137,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -133,8 +133,10 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -154,6 +156,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -84,8 +84,10 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -107,6 +109,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -83,8 +83,10 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -106,6 +108,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -1,153 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
|
||||
from pipecat.services.openai.base_llm import BaseOpenAILLMService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.speechmatics.stt import SpeechmaticsSTTService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
"""Run example using Speechmatics STT.
|
||||
|
||||
This example will use diarization within our STT service and output the words spoken by
|
||||
each individual speaker and wrap them with XML tags for the LLM to process. Note the
|
||||
instructions in the system context for the LLM. This greatly improves the conversation
|
||||
experience by allowing the LLM to understand who is speaking in a multi-party call.
|
||||
|
||||
If you do not wish to use diarization, then set the `enable_speaker_diarization` parameter
|
||||
to `False` or omit it altogether. The `text_format` will only be used if diarization is enabled.
|
||||
|
||||
By default, this example will use our ENHANCED operating point, which is optimized for
|
||||
high accuracy. You can change this by setting the `operating_point` parameter to a different
|
||||
value.
|
||||
|
||||
For more information on operating points, see the Speechmatics documentation:
|
||||
https://docs.speechmatics.com/rt-api-ref
|
||||
"""
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = SpeechmaticsSTTService(
|
||||
api_key=os.getenv("SPEECHMATICS_API_KEY"),
|
||||
language=Language.EN,
|
||||
enable_speaker_diarization=True,
|
||||
text_format="<{speaker_id}>{text}</{speaker_id}>",
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
|
||||
model="eleven_turbo_v2_5",
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
params=BaseOpenAILLMService.InputParams(temperature=0.75),
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You are a helpful British assistant called Alfred. "
|
||||
"Your goal is to demonstrate your capabilities in a succinct way. "
|
||||
"Your output will be converted to audio so don't include special characters in your answers. "
|
||||
"Always include punctuation in your responses. "
|
||||
"Give very short replies - do not give longer replies unless strictly necessary. "
|
||||
"Respond to what the user said in a concise, funny, creative and helpful way. "
|
||||
"Use `<Sn/>` tags to identify different speakers - do not use tags in your replies."
|
||||
),
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(aggregation_timeout=0.005),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Say a short hello to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
@@ -1,109 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.soniox.stt import SonioxSTTService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = SonioxSTTService(
|
||||
api_key=os.getenv("SONIOX_API_KEY"),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt,
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
@@ -113,8 +113,10 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -139,6 +141,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -87,8 +87,10 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -118,6 +120,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -81,8 +81,10 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -104,6 +106,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -35,7 +35,7 @@ transport_params = {
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
"twilio": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
@@ -88,8 +88,10 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -111,6 +113,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -84,8 +84,10 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -107,6 +109,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -84,8 +84,10 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -107,6 +109,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -86,8 +86,10 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -109,6 +111,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -90,8 +90,10 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -113,6 +115,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -84,9 +84,11 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
audio_out_sample_rate=24000,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -108,6 +110,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -89,8 +89,10 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -112,6 +114,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -87,8 +87,10 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -110,6 +112,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -92,8 +92,10 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -114,6 +116,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -80,8 +80,10 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -103,6 +105,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -85,6 +85,7 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
@@ -108,6 +109,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -87,8 +87,10 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -110,6 +112,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -61,12 +61,7 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
credentials=os.getenv("GOOGLE_TEST_CREDENTIALS"),
|
||||
)
|
||||
|
||||
llm = GoogleLLMService(
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
model="gemini-2.5-flash",
|
||||
# turn on thinking if you want it
|
||||
# params=GoogleLLMService.InputParams(extra={"thinking_config": {"thinking_budget": 4096}}),)
|
||||
)
|
||||
llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
@@ -93,8 +88,10 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -116,6 +113,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -86,8 +86,10 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -109,6 +111,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -84,8 +84,10 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -107,6 +109,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -89,8 +89,10 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -112,6 +114,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -83,8 +83,10 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -106,6 +108,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -80,8 +80,10 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -103,6 +105,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -8,8 +8,8 @@ import argparse
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
|
||||
import google.ai.generativelanguage as glm
|
||||
from dotenv import load_dotenv
|
||||
from google.genai.types import Content, Part
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
@@ -164,7 +164,9 @@ class TanscriptionContextFixup(FrameProcessor):
|
||||
and last_part.inline_data
|
||||
and last_part.inline_data.mime_type == "audio/wav"
|
||||
):
|
||||
self._context.messages[-2] = Content(role="user", parts=[Part(text=self._transcript)])
|
||||
self._context.messages[-2] = glm.Content(
|
||||
role="user", parts=[glm.Part(text=self._transcript)]
|
||||
)
|
||||
|
||||
def add_transcript_back_to_inference_output(self):
|
||||
if not self._transcript:
|
||||
@@ -214,12 +216,7 @@ transport_params = {
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
llm = GoogleLLMService(
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
model="gemini-2.5-flash",
|
||||
# turn on thinking if you want it
|
||||
# params=GoogleLLMService.InputParams(extra={"thinking_config": {"thinking_budget": 4096}}),
|
||||
)
|
||||
llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"), model="gemini-2.0-flash-001")
|
||||
|
||||
tts = GoogleTTSService(
|
||||
voice_id="en-US-Chirp3-HD-Charon",
|
||||
@@ -261,6 +258,7 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
@@ -284,6 +282,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -84,8 +84,10 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -107,6 +109,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -77,8 +77,8 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -97,6 +97,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -7,7 +7,6 @@
|
||||
import argparse
|
||||
import os
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
@@ -51,66 +50,65 @@ transport_params = {
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
# Create an HTTP session
|
||||
async with aiohttp.ClientSession() as session:
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = NeuphonicHttpTTSService(
|
||||
api_key=os.getenv("NEUPHONIC_API_KEY"),
|
||||
voice_id="fc854436-2dac-4d21-aa69-ae17b54e98eb", # Emily
|
||||
aiohttp_session=session,
|
||||
)
|
||||
tts = NeuphonicHttpTTSService(
|
||||
api_key=os.getenv("NEUPHONIC_API_KEY"),
|
||||
voice_id="fc854436-2dac-4d21-aa69-ae17b54e98eb", # Emily
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt,
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt,
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
runner = PipelineRunner(handle_sigint=handle_sigint)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -83,8 +83,10 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -106,6 +108,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -86,8 +86,10 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -109,6 +111,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -70,8 +70,10 @@ async def main():
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -90,8 +90,10 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -113,6 +115,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -89,8 +89,10 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -112,6 +114,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -96,6 +96,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -11,7 +11,6 @@ import tkinter as tk
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.examples.run import maybe_capture_participant_camera
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
InputAudioRawFrame,
|
||||
@@ -108,7 +107,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
await maybe_capture_participant_camera(transport, client, framerate=30)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
@@ -121,6 +119,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -85,13 +85,7 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
@@ -110,6 +104,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -162,6 +162,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -10,9 +10,9 @@ from typing import Optional
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from run import get_transport_client_id, maybe_capture_participant_video
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.examples.run import get_transport_client_id, maybe_capture_participant_camera
|
||||
from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -105,7 +105,7 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected: {client}")
|
||||
|
||||
await maybe_capture_participant_camera(transport, client)
|
||||
await maybe_capture_participant_video(transport, client)
|
||||
|
||||
# Set the participant ID in the image requester
|
||||
client_id = get_transport_client_id(transport, client)
|
||||
@@ -125,6 +125,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
@@ -10,9 +10,9 @@ from typing import Optional
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from run import get_transport_client_id, maybe_capture_participant_video
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.examples.run import get_transport_client_id, maybe_capture_participant_camera
|
||||
from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -101,17 +101,14 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
params=PipelineParams(allow_interruptions=True),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected: {client}")
|
||||
|
||||
await maybe_capture_participant_camera(transport, client)
|
||||
await maybe_capture_participant_video(transport, client)
|
||||
|
||||
# Set the participant ID in the image requester
|
||||
client_id = get_transport_client_id(transport, client)
|
||||
@@ -131,6 +128,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
from run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user