Compare commits

..

8 Commits

Author SHA1 Message Date
mattie ruth backman
7742d1a83b Add error handling for unsupported files 2026-03-18 15:49:48 -04:00
mattie ruth backman
d9cebe602f Add new FileSourceType for 'id' and use that for local uploads, prefixed with 'pipecat:' 2026-03-18 15:49:48 -04:00
mattie ruth backman
96e06d2401 Update /files/ upload response to match RTVI format, rather than inventing a new one 2026-03-18 15:49:48 -04:00
mattie ruth backman
267c86e596 support RTVI files uploads larger than the transport can handle
This PR introduces:
1. a new /files/ POST endpoint in the local runner that supports
   uploading a file to a folder that must be provided at runtime
2. By default, the runner will allow a maximum 10 files to be
   saved
3. Added logic to the send-file handler in RTVI to read a file
   from disk if the file provide is a url starting with '/files/'
2026-03-18 15:49:48 -04:00
mattie ruth backman
9fb06c3e4b Update File upload RTVI messages and frames to use mime-type as the format 2026-03-18 15:49:48 -04:00
mattie ruth backman
71197fbc2c Support files provided via url 2026-03-18 15:49:48 -04:00
mattie ruth backman
9cd4e5faca Support generic files (openai so far) 2026-03-18 15:49:48 -04:00
mattie ruth backman
4f290be834 Initial commit: Introducing RTVI support for files
This commit introduces the types for all RTVI file messaging and full
support for sending images as byte strings
2026-03-18 15:49:48 -04:00
640 changed files with 20438 additions and 18285 deletions

View File

@@ -144,7 +144,7 @@ class InputParams(BaseModel):
#### Examples
Validated against `examples/07-interruptible.py`:
Validated against `examples/foundational/07-interruptible.py`:
- Proper `create_transport()` usage
- Correct pipeline structure

View File

@@ -42,7 +42,7 @@ jobs:
- name: Test uv sync with all extras
run: |
uv sync --group dev --all-extras
uv sync --group dev --all-extras --no-extra krisp
- name: Verify installation
run: |

51
.github/workflows/sync-quickstart.yaml vendored Normal file
View File

@@ -0,0 +1,51 @@
name: Sync Quickstart to pipecat-quickstart repo
on:
push:
branches: [main]
paths:
- 'examples/quickstart/**'
workflow_dispatch: # Manual trigger
jobs:
sync-quickstart:
runs-on: ubuntu-latest
steps:
- name: Checkout main repo
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Checkout quickstart repo
uses: actions/checkout@v4
with:
repository: pipecat-ai/pipecat-quickstart
token: ${{ secrets.QUICKSTART_SYNC_TOKEN }}
path: quickstart-repo
- name: Sync files (excluding uv.lock and README.md)
run: |
# Copy all files except uv.lock and README.md
find examples/quickstart -type f \
-not -name "README.md" \
-not -name "uv.lock" \
-exec cp {} quickstart-repo/ \;
- name: Commit and push changes
run: |
cd quickstart-repo
git config user.name "GitHub Action"
git config user.email "action@github.com"
git add .
# Only commit if there are changes
if ! git diff --staged --quiet; then
git commit -m "Sync from pipecat main repo
Updated files from examples/quickstart/
Commit: ${{ github.sha }}
"
git push
else
echo "No changes to sync"
fi

View File

@@ -1,13 +1,8 @@
repos:
- repo: local
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.12.1
hooks:
- id: ruff
name: ruff
entry: uv run ruff check --fix
language: system
types: [python]
language_version: python3
args: [--fix]
- id: ruff-format
name: ruff-format
entry: uv run ruff format
language: system
types: [python]

View File

@@ -11,7 +11,7 @@ build:
jobs:
post_install:
- pip install uv
- UV_PROJECT_ENVIRONMENT=$READTHEDOCS_VIRTUALENV_PATH uv sync --group docs --all-extras --no-extra gstreamer --no-extra local_smart_turn --no-extra moondream --no-extra riva --no-extra mlx-whisper
- UV_PROJECT_ENVIRONMENT=$READTHEDOCS_VIRTUALENV_PATH uv sync --group docs --all-extras --no-extra krisp --no-extra gstreamer --no-extra local_smart_turn --no-extra moondream --no-extra riva --no-extra mlx-whisper
sphinx:
configuration: docs/api/conf.py

View File

@@ -7,618 +7,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
<!-- towncrier release notes start -->
## [0.0.108] - 2026-03-27
### Added
- Added `SarvamLLMService` with support for `sarvam-30b`, `sarvam-30b-16k`,
`sarvam-105b` and `sarvam-105b-32k`.
(PR [#3978](https://github.com/pipecat-ai/pipecat/pull/3978))
- Added `on_turn_context_created(context_id)` hook to `TTSService`. Override
this to perform provider-specific setup (e.g. eagerly opening a server-side
context) before text starts flowing. Called each time a new turn context ID
is created.
(PR [#4013](https://github.com/pipecat-ai/pipecat/pull/4013))
- Added `XAIHttpTTSService` for text-to-speech using xAI's HTTP TTS API.
(PR [#4031](https://github.com/pipecat-ai/pipecat/pull/4031))
- Added support for "developer" role messages in conversation context across
all LLM adapters. For non-OpenAI services (Anthropic, Google, AWS Bedrock),
"developer" messages are converted to "user" messages (use
`system_instruction` to set the system instruction). For OpenAI services,
"developer" messages pass through in conversation history. For the Responses
API, they are kept as "developer" role (matching the existing "system" →
"developer" conversion).
(PR [#4089](https://github.com/pipecat-ai/pipecat/pull/4089))
- Added `SmallestTTSService`, a WebSocket-based TTS service integration with
Smallest AI's Waves API. Supports the Lightning v2 and v3.1 models with
configurable voice, language, speed, consistency, similarity, and enhancement
settings.
(PR [#4092](https://github.com/pipecat-ai/pipecat/pull/4092))
- Added warnings in turn stop strategies when `VADParams.stop_secs` differs
from the recommended default (0.2s) or when `stop_secs >= STT p99 latency`,
which collapses the STT wait timeout to 0s and may cause delayed turn
detection. The warnings guide developers to re-run the
[stt-benchmark](https://github.com/pipecat-ai/stt-benchmark) with their VAD
settings.
(PR [#4115](https://github.com/pipecat-ai/pipecat/pull/4115))
- Added `domain` parameter to `AssemblyAISTTSettings` for specialized
recognition modes such as Medical Mode (`domain="medical-v1"`).
(PR [#4117](https://github.com/pipecat-ai/pipecat/pull/4117))
- Added `NovitaLLMService` for using Novita AI's LLM models via their
OpenAI-compatible API.
(PR [#4119](https://github.com/pipecat-ai/pipecat/pull/4119))
- Added `cleanup()` method to `VADAnalyzer` and `VADController` so VAD analyzer
resources are properly released when no longer needed. Custom `VADAnalyzer`
subclasses can override `cleanup()` to free any held resources.
(PR [#4120](https://github.com/pipecat-ai/pipecat/pull/4120))
- Added `on_end_of_turn` event handler to `AssemblyAISTTService`. This fires
after the final transcript is pushed, providing a reliable hook for
end-of-turn logic that doesn't race with `TranscriptionFrame`. Works in both
Pipecat and AssemblyAI turn detection modes.
(PR [#4128](https://github.com/pipecat-ai/pipecat/pull/4128))
- Added `DeepgramFluxSageMakerSTTService` for running Deepgram Flux
speech-to-text on AWS SageMaker endpoints. Use with
`ExternalUserTurnStrategies` to take advantage of Flux's turn detection.
(PR [#4143](https://github.com/pipecat-ai/pipecat/pull/4143))
- Added `Mem0MemoryService.get_memories()` convenience method for retrieving
all stored memories outside the pipeline (e.g. to build a personalized
greeting at connection time). This avoids the need to manually handle client
type branching, filter construction, and async wrapping.
(PR [#4156](https://github.com/pipecat-ai/pipecat/pull/4156))
### Changed
- Added context prewarming path for `InworldTTSService` to improve first audio
latency.
(PR [#4013](https://github.com/pipecat-ai/pipecat/pull/4013))
- Added `KrispVivaVadAnalyzer` for Voice Activity Detection using the Krisp
VIVA SDK (requires `krisp_audio`).
(PR [#4022](https://github.com/pipecat-ai/pipecat/pull/4022))
- Modified `InworldTTSService` to close context at end of turn instead of
relying on idle timeout.
(PR [#4028](https://github.com/pipecat-ai/pipecat/pull/4028))
- Added Gemini 3 support to the Gemini Live service.
(PR [#4078](https://github.com/pipecat-ai/pipecat/pull/4078))
- `TTSService`: the default `stop_frame_timeout_s` (idle time before an
automatic `TTSStoppedFrame` is pushed when `push_stop_frames=True`) has
changed from `2.0` to `3.0` seconds.
(PR [#4084](https://github.com/pipecat-ai/pipecat/pull/4084))
- ⚠️ `GeminiLLMAdapter` now only treats `messages[0]` as the initial system
message, matching all other adapters. Previously it searched for the first
"system" message anywhere in the conversation history. A "system" message
appearing later in the list will now be converted to "user" instead of being
extracted as the system instruction.
(PR [#4089](https://github.com/pipecat-ai/pipecat/pull/4089))
- Fixed `InworldTtsService` to fallback to full text when TTS timestamps are
not received.
(PR [#4113](https://github.com/pipecat-ai/pipecat/pull/4113))
- ⚠️ Realtime services (Gemini Live, OpenAI Realtime, Grok Realtime, Nova
Sonic) now prefer `system_instruction` from service settings over an initial
system message in the LLM context, matching the behavior of non-realtime
services. Previously, context-provided system instructions took precedence. A
warning is now logged when both are set.
(PR [#4130](https://github.com/pipecat-ai/pipecat/pull/4130))
- Bumped `nvidia-riva-client` minimum version to `>=2.25.1`.
(PR [#4136](https://github.com/pipecat-ai/pipecat/pull/4136))
- Upgraded `protobuf` from 5.x to 6.x (`>=6.31.1,<7`).
(PR [#4136](https://github.com/pipecat-ai/pipecat/pull/4136))
- Unrecognized language strings (e.g. Deepgram's `"multi"`) no longer produce a
warning at startup. The log message has been downgraded to debug level since
these are valid service-specific values that are passed through correctly.
(PR [#4137](https://github.com/pipecat-ai/pipecat/pull/4137))
- `GrokLLMService` and `GrokRealtimeLLMService` now live in the
`pipecat.services.xai` module alongside `XAIHttpTTSService`, since all three
use the same xAI API. Update imports from `pipecat.services.grok.*` to
`pipecat.services.xai.*` (e.g. `from pipecat.services.xai.llm import
GrokLLMService`).
(PR [#4142](https://github.com/pipecat-ai/pipecat/pull/4142))
- ⚠️ Bumped `mem0ai` dependency from `~=0.1.94` to `>=1.0.8,<2`. Users of the
`mem0` extra will need to update their mem0ai package.
(PR [#4156](https://github.com/pipecat-ai/pipecat/pull/4156))
### Deprecated
- `pipecat.services.grok.llm`, `pipecat.services.grok.realtime.llm`, and
`pipecat.services.grok.realtime.events` are deprecated. The old import paths
still work but emit a `DeprecationWarning`; use `pipecat.services.xai.llm`,
`pipecat.services.xai.realtime.llm`, and
`pipecat.services.xai.realtime.events` instead.
(PR [#4142](https://github.com/pipecat-ai/pipecat/pull/4142))
### Removed
- ⚠️ `TTSService.add_word_timestamps()` no longer supports the `"Reset"` and
`"TTSStoppedFrame"` sentinel strings. If you have a custom TTS service that
called `await self.add_word_timestamps([("Reset", 0)])` or `await
self.add_word_timestamps([("TTSStoppedFrame", 0), ("Reset", 0)], ctx_id)`,
replace them with `await self.append_to_audio_context(ctx_id,
TTSStoppedFrame(context_id=ctx_id))` and let `_handle_audio_context` manage
the word-timestamp reset automatically.
(PR [#4145](https://github.com/pipecat-ai/pipecat/pull/4145))
- Removed `SambaNovaSTTService`. SambaNova no longer offers speech-to-text
audio models. Use another STT provider instead.
(PR [#4154](https://github.com/pipecat-ai/pipecat/pull/4154))
### Fixed
- Fixed Gemini Live (`GoogleGeminiLiveLLMService`) not honoring
`settings.system_instruction`. The system instruction was being read from a
deprecated constructor parameter instead of the settings object, causing it
to be silently ignored.
(PR [#4089](https://github.com/pipecat-ai/pipecat/pull/4089))
- Fixed `AWSBedrockLLMAdapter` sending an empty message list to the API when
the only message in context was a system message. The lone system message is
now converted to "user" role instead of being extracted, matching the
existing Anthropic adapter behavior.
(PR [#4089](https://github.com/pipecat-ai/pipecat/pull/4089))
- Fixed Gemini Live pipeline hanging indefinitely when an `EndFrame` was
deferred while waiting for the bot to finish responding and `turn_complete`
never arrived. As a possible root-cause fix, `turn_complete` messages are now
handled even if they lack `usage_metadata`. As a fallback, the deferred
`EndFrame` now has a 30-second safety timeout.
(PR [#4125](https://github.com/pipecat-ai/pipecat/pull/4125))
- Fixed ElevenLabs WebSocket disconnections (1008 "Maximum simultaneous
contexts exceeded") caused by rapid user interruptions. When interruptions
arrived before any TTS text was generated, phantom contexts were created on
the ElevenLabs server that were never closed, eventually exceeding the
5-context limit.
(PR [#4126](https://github.com/pipecat-ai/pipecat/pull/4126))
- Fixed the final sentence being dropped from the conversation context when
using RTVI text input with non-word-timestamp TTS services. The
`LLMFullResponseEndFrame` was racing ahead of the last `TTSTextFrame`,
causing the `LLMAssistantAggregator` to finalize the context before the final
sentence arrived.
(PR [#4127](https://github.com/pipecat-ai/pipecat/pull/4127))
- Fixed audio crackling and popping in recordings when both user and bot are
speaking. `AudioBufferProcessor` no longer injects silence into a track's
buffer while that track is actively producing audio, preventing mid-utterance
interruptions in the recorded output.
(PR [#4135](https://github.com/pipecat-ai/pipecat/pull/4135))
- Fixed websocket TTS word timestamps so interrupted contexts cannot leak stale
words or backward PTS values into later turns.
(PR [#4145](https://github.com/pipecat-ai/pipecat/pull/4145))
- Fixed a race condition in `InterruptibleTTSService` where, if `run_tts` had
been invoked but `BotStartedSpeakingFrame` had not yet been received, a user
interruption could allow stale audio to leak through.
(PR [#4145](https://github.com/pipecat-ai/pipecat/pull/4145))
- Fixed Gemini Live local VAD mode (`GeminiVADParams(disabled=True)` with
external VAD) not working. The bot now correctly detects user speech and
signals turn boundaries to the Gemini API.
(PR [#4146](https://github.com/pipecat-ai/pipecat/pull/4146))
- Fixed Gemini Live message handling to process all `server_content` fields
independently. Gemini 3.x can bundle multiple fields (e.g. `model_turn` and
`output_transcription`) on the same message, but the previous `elif` chain
only processed the first match, silently dropping the rest.
(PR [#4147](https://github.com/pipecat-ai/pipecat/pull/4147))
- Fixed `ServiceSwitcher` with `ServiceSwitcherStrategyFailover` incorrectly
triggering failover when `ErrorFrame`s from other pipeline stages (e.g. TTS)
propagated upstream through the switcher. Previously, any non-fatal error
passing through would be misattributed to the active service and trigger an
unwanted service switch. Now only errors originating from the switcher's own
managed services trigger failover.
(PR [#4149](https://github.com/pipecat-ai/pipecat/pull/4149))
- Fixed `LiveKitOutputTransport` not clearing the `rtc.AudioSource` internal
buffer on interruption, causing the bot to continue speaking for several
seconds after being interrupted.
(PR [#4151](https://github.com/pipecat-ai/pipecat/pull/4151))
- Fixed a crash in OpenAI LLM processing when the provider returns
`chunk.choices[0].delta.audio = None`, which caused `'NoneType' object has no
attribute 'get'` errors during audio transcript handling.
(PR [#4152](https://github.com/pipecat-ai/pipecat/pull/4152))
- Fixed error floods in `DeepgramSTTService` when the WebSocket connection
drops. With Deepgram SDK 6.x, `send_media()` raises exceptions on a dead
connection instead of silently failing, causing every queued audio frame to
log an error. Now `send_media()` failures are caught gracefully — a single
warning is logged and audio frames are skipped until the existing
reconnection logic restores the connection.
(PR [#4153](https://github.com/pipecat-ai/pipecat/pull/4153))
- `Mem0MemoryService` no longer blocks the event loop during memory storage and
retrieval. All Mem0 API calls now run in a background thread, and message
storage is fire-and-forget so it doesn't delay downstream processing.
(PR [#4156](https://github.com/pipecat-ai/pipecat/pull/4156))
- Fixed `Mem0MemoryService` failing to store messages when the context
contained system or developer role messages. The Mem0 API only accepts user
and assistant roles, so other roles are now filtered out before storing.
(PR [#4156](https://github.com/pipecat-ai/pipecat/pull/4156))
- Added missing `on_dtmf_event` callback to `LemonSliceTransportClient.setup()`
`DailyCallbacks` construction, fixing a `ValidationError` at pipeline setup
time.
(PR [#4161](https://github.com/pipecat-ai/pipecat/pull/4161))
- Fixed an issue in `InworldTTSService` where, in cases of fast interruption,
we would continue receiving audio from the previous context.
(PR [#4167](https://github.com/pipecat-ai/pipecat/pull/4167))
- Fixed a word timestamp interleaving issue in `InworldTTSService` when
processing multiple sentences.
(PR [#4167](https://github.com/pipecat-ai/pipecat/pull/4167))
- Fixed duplicate `TTSStoppedFrame` being pushed in TTS services using
`push_stop_frames=True`. When the stop-frame timeout fired, a second
`TTSStoppedFrame` could be pushed after the normal one at context completion.
(PR [#4172](https://github.com/pipecat-ai/pipecat/pull/4172))
- ⚠️ Fixed `DeepgramSTTService` compatibility with deepgram-sdk 6.1.0. The SDK
now requires explicit message objects for `send_keep_alive()`,
`send_close_stream()`, and `send_finalize()`. The minimum deepgram-sdk
version is now 6.1.0.
(PR [#4174](https://github.com/pipecat-ai/pipecat/pull/4174))
- Fixed RTVI events not being delivered to clients when using WebSocket
transports. `ProtobufFrameSerializer` now sets `ignore_rtvi_messages=False`
by default.
(PR [#4176](https://github.com/pipecat-ai/pipecat/pull/4176))
- Fixed a timing issue where turn detection timer tasks (idle controller,
speech timeout, turn analyzer, and turn completion) could miss their first
tick because the newly created asyncio task was not yet scheduled when the
caller continued.
(PR [#4183](https://github.com/pipecat-ai/pipecat/pull/4183))
- Fixed `FastAPIWebsocketTransport` intermittently hanging on shutdown when the
remote side (e.g. Twilio) disconnects while audio is being sent. A race
condition between the send and receive paths could cause the
`on_client_disconnected` callback to be skipped, leaving the pipeline waiting
for a disconnect signal that never came.
(PR [#4186](https://github.com/pipecat-ai/pipecat/pull/4186))
### Performance
- `RimeTTSService` now handles Rime's `done` WebSocket message to complete
audio contexts immediately, eliminating the 3-second idle timeout that
previously added latency at the end of each utterance.
(PR [#4172](https://github.com/pipecat-ai/pipecat/pull/4172))
## [0.0.107] - 2026-03-23
### Added
- Added `frame_order` parameter to `SyncParallelPipeline`. Set
`frame_order=FrameOrder.PIPELINE` to push synchronized output frames in
pipeline definition order (all frames from the first pipeline, then the
second, etc.) instead of the default arrival order.
(PR [#4029](https://github.com/pipecat-ai/pipecat/pull/4029))
- Added `sync_with_audio` field to `OutputImageRawFrame`. When set to `True`,
the output transport queues image frames with audio so they are displayed
only after all preceding audio has been sent, enabling synchronized
audio/image playback.
(PR [#4029](https://github.com/pipecat-ai/pipecat/pull/4029))
- Added `OpenAIResponsesLLMService`, a new LLM service that uses the OpenAI
Responses API. Supports streaming text, function calling, usage metrics, and
out-of-band inference. Works with the universal `LLMContext` and
`LLMContextAggregatorPair`. See
`examples/foundational/07-interruptible-openai-responses.py` and
`14-function-calling-openai-responses.py`.
(PR [#4074](https://github.com/pipecat-ai/pipecat/pull/4074))
- Added `audio_out_auto_silence` parameter to `TransportParams` (defaults to
`True`). When set to `False`, the transport waits for audio data instead of
inserting silence when the output queue is empty, which is useful for
scenarios that require uninterrupted audio playback without artificial gaps.
(PR [#4104](https://github.com/pipecat-ai/pipecat/pull/4104))
### Changed
- Renamed tracing span attributes to align with OpenTelemetry GenAI semantic
conventions: `gen_ai.system` to `gen_ai.provider.name`, `system` to
`gen_ai.system_instructions`, `gen_ai.usage.cache_read_input_tokens` to
`gen_ai.usage.cache_read.input_tokens`, and
`gen_ai.usage.cache_creation_input_tokens` to
`gen_ai.usage.cache_creation.input_tokens`.
(PR [#3449](https://github.com/pipecat-ai/pipecat/pull/3449))
- `DeepgramSageMakerTTSService` now correctly routes audio through the base
`TTSService` audio context queue. Audio frames are delivered via
`append_to_audio_context()` instead of being pushed directly, enabling proper
ordering, interruption handling, and start/stop frame lifecycle management.
Interruptions now trigger a `Clear` message to Deepgram (flushing its text
buffer) at the right time via `on_audio_context_interrupted`.
(PR [#4083](https://github.com/pipecat-ai/pipecat/pull/4083))
- `GradiumTTSService` now sends a per-context `setup` message with
`client_req_id` before the first text message for each TTS context, following
Gradium's multiplexing protocol. Previously, a single setup message was sent
at connection time without a `client_req_id`, which prevented Gradium from
associating requests with their sessions when using `close_ws_on_eos=False`.
(PR [#4091](https://github.com/pipecat-ai/pipecat/pull/4091))
### Fixed
- Fixed stale `system_instruction` in LLM tracing spans by reading from
`_settings.system_instruction` instead of the removed `_system_instruction`
attribute.
(PR [#3449](https://github.com/pipecat-ai/pipecat/pull/3449))
- Fixed `SyncParallelPipeline` breaking the Whisker debugger.
(PR [#4029](https://github.com/pipecat-ai/pipecat/pull/4029))
- Fixed `SyncParallelPipeline` race condition where concurrent SystemFrame
processing (e.g. from RTVI) could corrupt sink queues and cause deadlocks.
SystemFrames now take a fast path that passes them through without draining
queued output.
(PR [#4029](https://github.com/pipecat-ai/pipecat/pull/4029))
- Fixed TTS frame ordering so that non-system frames always arrive in correct
order relative to the `TTSStartedFrame`/`TTSAudioRawFrame`/`TTSStoppedFrame`
sequence. Previously these frames could race ahead of or behind audio context
frames, producing out-of-order output downstream.
(PR [#4075](https://github.com/pipecat-ai/pipecat/pull/4075))
- Fixed `SarvamTTSService` audio and error frames now route through
`append_to_audio_context()` instead of `push_frame()`, ensuring correct
behavior with audio contexts and interruptions.
(PR [#4082](https://github.com/pipecat-ai/pipecat/pull/4082))
- Fixed audio frame ordering and interruption handling in Fish Audio, LMNT,
Neuphonic, and Rime NonJson TTS services. These services were bypassing the
base `TTSService` audio context serialization queue by pushing audio frames
directly, which could cause out-of-order frames and broken interruptions
during speech.
(PR [#4090](https://github.com/pipecat-ai/pipecat/pull/4090))
- Fixed Genesys AudioHook serializer to always include the `parameters` field in
protocol messages. The AudioHook protocol requires every message to carry a
`parameters` object (even if empty), but `_create_message` omitted it when no
parameters were provided. This caused clients that validate message structure
(including the Genesys reference implementation) to reject `pong` and
parameter-less `closed` responses, breaking server sequence tracking and
preventing `outputVariables` from reaching the Architect flow.
(PR [#4093](https://github.com/pipecat-ai/pipecat/pull/4093))
## [0.0.106] - 2026-03-18
### Added
- Added optional `service` field to `ServiceUpdateSettingsFrame` (and its
subclasses `LLMUpdateSettingsFrame`, `TTSUpdateSettingsFrame`,
`STTUpdateSettingsFrame`) to target a specific service instance. When
`service` is set, only the matching service applies the settings; others
forward the frame unchanged. This enables updating a single service when
multiple services of the same type exist in the pipeline.
(PR [#4004](https://github.com/pipecat-ai/pipecat/pull/4004))
- Added `sip_provider` and `room_geo` parameters to `configure()` in the Daily
runner. These convenience parameters let callers specify a SIP provider name
and geographic region directly without manually constructing
`DailyRoomProperties` and `DailyRoomSipParams`.
(PR [#4005](https://github.com/pipecat-ai/pipecat/pull/4005))
- Added `PerplexityLLMAdapter` that automatically transforms conversation
messages to satisfy Perplexity's stricter API constraints (strict role
alternation, no non-initial system messages, last message must be user/tool).
Previously, certain conversation histories could cause Perplexity API errors
that didn't occur with OpenAI (`PerplexityLLMService` subclasses
`OpenAILLMService` since Perplexity uses an OpenAI-compatible API).
(PR [#4009](https://github.com/pipecat-ai/pipecat/pull/4009))
- Added DTMF input event support to the Daily transport. Incoming DTMF tones
are now received via Daily's `on_dtmf_event` callback and pushed into the
pipeline as `InputDTMFFrame`, enabling bots to react to keypad presses from
phone callers.
(PR [#4047](https://github.com/pipecat-ai/pipecat/pull/4047))
- Added `WakePhraseUserTurnStartStrategy` for triggering user turns based on
wake phrases, with support for `single_activation` mode. Deprecates
`WakeCheckFilter`.
(PR [#4064](https://github.com/pipecat-ai/pipecat/pull/4064))
- Added `default_user_turn_start_strategies()` and
`default_user_turn_stop_strategies()` helper functions for composing custom
strategy lists.
(PR [#4064](https://github.com/pipecat-ai/pipecat/pull/4064))
### Changed
- Changed tool result JSON serialization to use `ensure_ascii=False`,
preserving UTF-8 characters instead of escaping them. This reduces context
size and token usage for non-English languages.
(PR [#3457](https://github.com/pipecat-ai/pipecat/pull/3457))
- `OpenAIRealtimeSTTService`'s `noise_reduction` parameter is now part of
`OpenAIRealtimeSTTSettings`, making it runtime-updatable via
`STTUpdateSettingsFrame`. The direct `noise_reduction` init argument is
deprecated as of 0.0.106.
(PR [#3991](https://github.com/pipecat-ai/pipecat/pull/3991))
- Updated `sarvamai` dependency from `0.1.26a2` (alpha) to `0.1.26` (stable
release).
(PR [#3997](https://github.com/pipecat-ai/pipecat/pull/3997))
- `SimliVideoService` now extends `AIService` instead of `FrameProcessor`,
aligning it with the HeyGen and Tavus video services. It supports
`SimliVideoService.Settings(...)` for configuration and uses
`start()`/`stop()`/`cancel()` lifecycle methods. Existing constructor usage
(`api_key`, `face_id`, etc.) remains unchanged.
(PR [#4001](https://github.com/pipecat-ai/pipecat/pull/4001))
- Update `pipecat-ai-small-webrtc-prebuilt` to `2.4.0`.
(PR [#4023](https://github.com/pipecat-ai/pipecat/pull/4023))
- Nova Sonic assistant text transcripts are now delivered in real-time using
speculative text events instead of delayed final text events. Previously,
assistant text only arrived after all audio had finished playing, causing
laggy transcripts in client UIs. Speculative text arrives before each audio
chunk, providing text synchronized with what the bot is saying. This also
simplifies the internal text handling by removing the interruption re-push
hack and assistant text buffer.
(PR [#4042](https://github.com/pipecat-ai/pipecat/pull/4042))
- Updated `daily-python` dependency to 0.25.0.
(PR [#4047](https://github.com/pipecat-ai/pipecat/pull/4047))
- Added `enable_dialout` parameter to `configure()` in `pipecat.runner.daily`
to support dial-out rooms. Also narrowed misleading `Optional` type hints and
deduplicated token expiry calculation.
(PR [#4048](https://github.com/pipecat-ai/pipecat/pull/4048))
- Extended `ProcessFrameResult` to stop strategies, allowing a stop strategy to
short-circuit evaluation of subsequent strategies by returning `STOP`.
(PR [#4064](https://github.com/pipecat-ai/pipecat/pull/4064))
- `GradiumSTTService` now takes both an `encoding` and `sample_rate`
constructor argument which is assmebled in the class to form the
`input_format`. PCM accepts `8000`, `16000`, and `24000` Hz sample rates.
(PR [#4066](https://github.com/pipecat-ai/pipecat/pull/4066))
- Improved `GradiumSTTService` transcription accuracy by reworking how text
fragments are accumulated and finalized. Previously, trailing words could be
dropped when the server's `flushed` response arrived before all text tokens
were delivered. The service now uses a short aggregation delay after flush to
capture trailing tokens, producing complete utterances.
(PR [#4066](https://github.com/pipecat-ai/pipecat/pull/4066))
### Deprecated
- `SimliVideoService.InputParams` is deprecated. Use the direct constructor
parameters `max_session_length`, `max_idle_time`, and `enable_logging`
instead.
(PR [#4001](https://github.com/pipecat-ai/pipecat/pull/4001))
- Deprecated `LocalSmartTurnAnalyzerV2` and `LocalCoreMLSmartTurnAnalyzer`. Use
`LocalSmartTurnAnalyzerV3` instead. Instantiating these analyzers will now
emit a `DeprecationWarning`.
(PR [#4012](https://github.com/pipecat-ai/pipecat/pull/4012))
- Deprecated `WakeCheckFilter` in favor of `WakePhraseUserTurnStartStrategy`.
(PR [#4064](https://github.com/pipecat-ai/pipecat/pull/4064))
### Fixed
- Fixed an issue where the default model for `OpenAILLMService` and
`AzureLLMService` was mistakenly reverted to `gpt-4o`. The defaults are now
restored to `gpt-4.1`.
(PR [#4000](https://github.com/pipecat-ai/pipecat/pull/4000))
- Fixed a race condition where `EndTaskFrame` could cause the pipeline to shut
down before in-flight frames (e.g. LLM function call responses) finished
processing. `EndTaskFrame` and `StopTaskFrame` now flow through the pipeline
as `ControlFrame`s, ensuring all pending work is flushed before shutdown
begins. `CancelTaskFrame` and `InterruptionTaskFrame` remain immediate
(`SystemFrame`).
(PR [#4006](https://github.com/pipecat-ai/pipecat/pull/4006))
- Fixed `ParallelPipeline` dropping or misordering frames during lifecycle
synchronization. Buffered frames are now flushed in the correct order
relative to synchronization frames (`StartFrame` goes first,
`EndFrame`/`CancelFrame` go after), and frames added to the buffer during
flush are also drained.
(PR [#4007](https://github.com/pipecat-ai/pipecat/pull/4007))
- Fixed `TTSService` potentially canceling in-flight audio during shutdown. The
stop sequence now waits for all queued audio contexts to finish processing
before canceling the stop frame task.
(PR [#4007](https://github.com/pipecat-ai/pipecat/pull/4007))
- Fixed `Language` enum values (e.g. `Language.ES`) not being converted to
service-specific codes when passed via
`settings=Service.Settings(language=Language.ES)` at init time. This caused
API errors (e.g. 400 from Rime) because the raw enum was sent instead of the
expected language code (e.g. `"spa"`). Runtime updates via
`UpdateSettingsFrame` were unaffected. The fix centralizes conversion in the
base `TTSService` and `STTService` classes so all services handle this
consistently.
(PR [#4024](https://github.com/pipecat-ai/pipecat/pull/4024))
- Fixed `DeepgramSTTService` ignoring the `base_url` scheme when using `ws://`
or `http://`. Previously these were silently overwritten with `wss://` /
`https://`, breaking air-gapped or private deployments that don't use TLS.
All scheme choices (`wss://`, `https://`, `ws://`, `http://`, or bare
hostname) are now respected.
(PR [#4026](https://github.com/pipecat-ai/pipecat/pull/4026))
- Fixed `LLMSwitcher.register_function()` and `register_direct_function()` not
accepting or forwarding the `timeout_secs` parameter.
(PR [#4037](https://github.com/pipecat-ai/pipecat/pull/4037))
- Fixed empty user transcriptions in Nova Sonic causing spurious interruptions.
Previously, an empty transcription could trigger an interruption of the
assistant's response even though the user hadn't actually spoken.
(PR [#4042](https://github.com/pipecat-ai/pipecat/pull/4042))
- Fixed `SonioxSTTService` and `OpenAIRealtimeSTTService` crash when language
parameters contain plain strings instead of `Language` enum values.
(PR [#4046](https://github.com/pipecat-ai/pipecat/pull/4046))
- Fixed premature user turn stops caused by late transcriptions arriving
between turns. A stale transcript from the previous turn could persist into
the next turn and trigger a stop before the current turn's real transcript
arrived. Stop strategies are now reset at both turn start and turn stop to
prevent state from leaking across turn boundaries.
(PR [#4057](https://github.com/pipecat-ai/pipecat/pull/4057))
- Fixed raw language strings like `"de-DE"` silently failing when passed to
TTS/STT services (e.g. ElevenLabs producing no audio). Raw strings now go
through the same `Language` enum resolution as enum values, so regional codes
like `"de-DE"` are properly converted to service-expected formats like
`"de"`. Unrecognized strings log a warning instead of failing silently.
(PR [#4058](https://github.com/pipecat-ai/pipecat/pull/4058))
- Fixed Deepgram STT list-type settings (`keyterm`, `keywords`, `search`,
`redact`, `replace`) being stringified instead of passed as lists to the SDK,
which caused them to be sent as literal strings (e.g. `"['pipecat']"`) in the
WebSocket query params.
(PR [#4063](https://github.com/pipecat-ai/pipecat/pull/4063))
- Fixed `MinWordsUserTurnStartStrategy` including text below the word threshold
in the output by resetting aggregation when the minimum word count is not
met.
(PR [#4064](https://github.com/pipecat-ai/pipecat/pull/4064))
- Fixed audio overlap and potential dropped TTS content when multiple assistant
turns occur in quick succession. `TTSService` now flushes remaining text
before pausing frame processing on `LLMFullResponseEndFrame`/`EndFrame`,
instead of pausing first.
(PR [#4071](https://github.com/pipecat-ai/pipecat/pull/4071))
### Security
- Bumped PyJWT minimum version from 2.10.1 to 2.12.0 in the `livekit` extra to
address CVE-2026-32597 (GHSA-752w-5fwx-jx9f), where PyJWT <= 2.11.0 accepted
unknown `crit` header extensions.
(PR [#4035](https://github.com/pipecat-ai/pipecat/pull/4035))
## [0.0.105] - 2026-03-10
### Added

View File

@@ -10,7 +10,7 @@ Pipecat is an open-source Python framework for building real-time voice and mult
```bash
# Setup development environment
uv sync --group dev --all-extras --no-extra gstreamer
uv sync --group dev --all-extras --no-extra gstreamer --no-extra krisp
# Install pre-commit hooks
uv run pre-commit install

View File

@@ -23,7 +23,7 @@ Create your integration following the patterns and examples shown in the "Integr
Your repository must contain these components:
- **Source code** - Complete implementation following Pipecat patterns
- **Foundational example** - Single file example showing basic usage (see [Pipecat examples](https://github.com/pipecat-ai/pipecat/tree/main/examples))
- **Foundational example** - Single file example showing basic usage (see [Pipecat examples](https://github.com/pipecat-ai/pipecat/tree/main/examples/foundational))
- **README.md** - Must include:
- Introduction and explanation of your integration
- Installation instructions
@@ -65,25 +65,12 @@ Once your PR is submitted, post in the `#community-integrations` Discord channel
#### Websocket-based Services
**Base class:** `WebsocketSTTService`
**Use for:** Services where you manage the websocket connection directly. Combines `STTService` with `WebsocketService` for automatic reconnection and keepalive support.
**Examples:**
- [CartesiaSTTService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/cartesia/stt.py)
- [ElevenLabsRealtimeSTTService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/elevenlabs/stt.py)
#### SDK-based Streaming Services
**Base class:** `STTService`
**Use for:** Streaming services where the provider's Python SDK manages the connection internally.
**Examples:**
- [DeepgramSTTService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/deepgram/stt.py)
- [GoogleSTTService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/google/stt.py)
- [SpeechmaticsSTTService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/speechmatics/stt.py)
#### File-based Services
@@ -121,59 +108,55 @@ Once your PR is submitted, post in the `#community-integrations` Discord channel
#### Key requirements:
- **`_process_context(self, context: LLMContext)`** — The main method that processes an LLM context and generates a response. Each LLM service overrides `process_frame` to extract context from `LLMContextFrame` and calls `_process_context`.
- **`adapter_class`** — Class attribute pointing to a `BaseLLMAdapter` subclass. Defaults to `OpenAILLMAdapter`. Non-OpenAI services must implement their own adapter (see `src/pipecat/adapters/base_llm_adapter.py`) with methods:
- `get_llm_invocation_params(context)` — Extract provider-specific params from universal context
- `to_provider_tools_format(tools_schema)` — Convert standard tools to provider format
- `get_messages_for_logging(context)` — Format messages for logging
- Reference adapters: `src/pipecat/adapters/services/` (anthropic, gemini, bedrock, etc.)
- **Frame sequence:** Output must follow this frame sequence pattern:
- `LLMFullResponseStartFrame` Signals the start of an LLM response
- `LLMTextFrame` Contains LLM content, typically streamed as tokens
- `LLMFullResponseEndFrame` Signals the end of an LLM response
- `LLMFullResponseStartFrame` - Signals the start of an LLM response
- `LLMTextFrame` - Contains LLM content, typically streamed as tokens
- `LLMFullResponseEndFrame` - Signals the end of an LLM response
- **Thought frames (reasoning models):** If the model supports extended thinking / chain-of-thought, emit thought frames alongside the response:
- `LLMThoughtStartFrame` — Signals the start of a thought
- `LLMThoughtTextFrame` — Contains thought content, streamed as tokens
- `LLMThoughtEndFrame` — Signals the end of a thought
- **Context aggregation** is handled by the framework via `LLMContext` + `LLMContextAggregatorPair`. The LLM service just processes context it receives — no need to implement aggregators.
- **Context aggregation:** Implement context aggregation to collect user and assistant content:
- Aggregators come in pairs with a `user()` instance and `assistant()` instance
- Context must adhere to the `LLMContext` universal format
- Aggregators should handle adding messages, function calls, and images to the context
### TTS (Text-to-Speech) Services
#### WebsocketTTSService
#### AudioContextWordTTSService
**Use for:** Websocket-based streaming services (with or without word timestamps)
**Use for:** Websocket-based services supporting word/timestamp alignment
**Examples:**
**Example:**
- [CartesiaTTSService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/cartesia/tts.py)
- [ElevenLabsTTSService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/elevenlabs/tts.py)
#### InterruptibleTTSService
**Use for:** Websocket-based services without word timestamps that reconnect on interruption (e.g. don't support a context ID or interruption message)
**Use for:** Websocket-based services without word/timestamp alignment, requiring disconnection on interruption
**Example:**
- [SarvamTTSService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/sarvam/tts.py)
#### WordTTSService
**Use for:** HTTP-based services supporting word/timestamp alignment
**Example:**
- [ElevenLabsHttpTTSService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/elevenlabs/tts.py)
#### TTSService
**Use for:** HTTP-based services (word timestamps are supported in the base class)
**Use for:** HTTP-based services without word/timestamp alignment
**Examples:**
**Example:**
- [GoogleHttpTTSService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/google/tts.py)
- [OpenAITTSService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/openai/tts.py)
#### Key requirements:
- For websocket services, use asyncio WebSocket implementation
- For websocket services, use asyncio WebSocket implementation (required for v13+ support)
- Handle idle service timeouts with keepalives
- TTS services push both audio (`TTSAudioRawFrame`) and text (`TTSTextFrame`) frames
- TTSServices push both audio (`TTSRawAudioFrame`) and text (`TTSTextFrame`) frames
### Telephony Serializers
@@ -217,25 +200,14 @@ Vision services process images and provide analysis such as descriptions, object
#### Key requirements:
- Must implement `run_vision` method that takes a `UserImageRawFrame` and returns an `AsyncGenerator[Frame, None]`
- The method processes the image frame and yields frames with analysis results
- Must yield the frame sequence: `VisionFullResponseStartFrame`, `VisionTextFrame`, `VisionFullResponseEndFrame`
- Must implement `run_vision` method that takes an `LLMContext` and returns an `AsyncGenerator[Frame, None]`
- The method processes the latest image in the context and yields frames with analysis results
- Typically yields `TextFrame` objects containing descriptions or answers
## Implementation Guidelines
### Naming Conventions
#### Package and Repository Naming
Use the `pipecat-{vendor}` naming convention for your PyPI package and repository:
- `pipecat-{vendor}` — for single-service integrations (e.g., `pipecat-deepdub`)
- `pipecat-{vendor}-{type}` — when a vendor offers multiple service types (e.g., `pipecat-upliftai-stt`, `pipecat-upliftai-tts`)
This convention makes community packages easily discoverable via PyPI search and clearly identifies them as part of the Pipecat ecosystem.
#### Class Naming
- **STT:** `VendorSTTService`
- **LLM:** `VendorLLMService`
- **TTS:**
@@ -409,7 +381,7 @@ Note that `self.sample_rate` is a `@property` set in the TTSService base class,
Use Pipecat's tracing decorators:
- **STT:** `@traced_stt` - decorate `_handle_transcription(self, transcript, is_final, language)` (the standard method name convention)
- **STT:** `@traced_stt` - decorate a function that handles `transcript`, `is_final`, `language` as args
- **LLM:** `@traced_llm` - decorate the `_process_context()` method
- **TTS:** `@traced_tts` - decorate the `run_tts()` method
@@ -417,9 +389,8 @@ Use Pipecat's tracing decorators:
### Packaging and Distribution
- Name your package `pipecat-{vendor}` (see [Naming Conventions](#naming-conventions))
- Use [uv](https://docs.astral.sh/uv/) for packaging (encouraged)
- Publish to PyPI for easier installation
- Consider releasing to PyPI for easier installation
- Follow semantic versioning principles
- Maintain a changelog
@@ -432,15 +403,17 @@ For REST-based communication, use aiohttp. Pipecat includes this as a required d
- Wrap API calls in appropriate try/catch blocks
- Handle rate limits and network failures gracefully
- Provide meaningful error messages
- When errors occur, raise exceptions AND push errors to notify the pipeline:
- When errors occur, raise exceptions AND push `ErrorFrame`s to notify the pipeline:
```python
from pipecat.frames.frames import ErrorFrame
try:
# Your API call
result = await self._make_api_call()
except Exception as e:
# Push error upstream to notify the pipeline
await self.push_error(f"{self} error: {e}", exception=e)
# Push error frame to pipeline
await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
# Raise or handle as appropriate
raise
```

View File

@@ -8,7 +8,7 @@
**Pipecat** is an open-source Python framework for building real-time voice and multimodal conversational agents. Orchestrate audio and video, AI services, different transports, and conversation pipelines effortlessly—so you can focus on what makes your agent unique.
> Want to dive right in? Run `pipecat init quickstart` or follow the [quickstart guide](https://docs.pipecat.ai/getting-started/quickstart).
> Want to dive right in? Try the [quickstart](https://docs.pipecat.ai/getting-started/quickstart).
## 🚀 What You Can Build
@@ -65,10 +65,6 @@ claude plugin marketplace add pipecat-ai/skills
and install any of the available plugins.
### 🧩 Community Integrations
Build and share your own Pipecat service integrations! Browse existing [community integrations](https://docs.pipecat.ai/server/services/community-integrations) or check out our [guide](COMMUNITY_INTEGRATIONS.md) to create your own.
### 📺️ Pipecat TV Channel
Catch new features, interviews, and how-tos on our [Pipecat TV](https://www.youtube.com/playlist?list=PLzU2zoMTQIHjqC3v4q2XVSR3hGSzwKFwH) channel.
@@ -80,25 +76,24 @@ Catch new features, interviews, and how-tos on our [Pipecat TV](https://www.yout
<a href="https://github.com/pipecat-ai/pipecat-examples/tree/main/storytelling-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat-examples/main/storytelling-chatbot/image.png" width="400" /></a>
<br/>
<a href="https://github.com/pipecat-ai/pipecat-examples/tree/main/translation-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat-examples/main/translation-chatbot/image.png" width="400" /></a>&nbsp;
<a href="https://github.com/pipecat-ai/pipecat/blob/main/examples/vision/vision-moondream.py"><img src="https://github.com/pipecat-ai/pipecat/blob/main/examples/assets/moondream.png" width="400" /></a>
<a href="https://github.com/pipecat-ai/pipecat/blob/main/examples/foundational/12-describe-video.py"><img src="https://github.com/pipecat-ai/pipecat/blob/main/examples/foundational/assets/moondream.png" width="400" /></a>
</p>
## 🧩 Available services
| Category | Services |
| ------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Gradium](https://docs.pipecat.ai/server/services/stt/gradium), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Sarvam](https://docs.pipecat.ai/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/server/services/llm/mistral), [Nebius](https://docs.pipecat.ai/server/services/llm/nebius), [Novita](https://docs.pipecat.ai/server/services/llm/novita), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nvidia), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova), [Sarvam](https://docs.pipecat.ai/server/services/llm/sarvam), [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
| Text-to-Speech | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Camb AI](https://docs.pipecat.ai/server/services/tts/camb), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Gradium](https://docs.pipecat.ai/server/services/tts/gradium), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Hume](https://docs.pipecat.ai/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [Kokoro](https://docs.pipecat.ai/server/services/tts/kokoro), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [Resemble](https://docs.pipecat.ai/server/services/tts/resemble), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [Smallest](https://docs.pipecat.ai/server/services/tts/smallest), [Speechmatics](https://docs.pipecat.ai/server/services/tts/speechmatics), [xAI](https://docs.pipecat.ai/server/services/tts/xai), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [Grok Voice Agent](https://docs.pipecat.ai/server/services/s2s/grok), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai), [Ultravox](https://docs.pipecat.ai/server/services/s2s/ultravox), |
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [LiveKit (WebRTC)](https://docs.pipecat.ai/server/services/transport/livekit), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), [WhatsApp](https://docs.pipecat.ai/server/services/transport/whatsapp), Local |
| Serializers | [Exotel](https://docs.pipecat.ai/server/services/serializers/exotel), [Genesys](https://docs.pipecat.ai/server/services/serializers/genesys), [Plivo](https://docs.pipecat.ai/server/services/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/services/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/services/serializers/telnyx), [Vonage](https://docs.pipecat.ai/server/services/serializers/vonage) |
| Video | [HeyGen](https://docs.pipecat.ai/server/services/video/heygen), [LemonSlice](https://docs.pipecat.ai/server/services/transport/lemonslice), [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/google-imagen), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp Viva](https://docs.pipecat.ai/guides/features/krisp-viva), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [ai-coustics](https://docs.pipecat.ai/server/utilities/audio/aic-filter), [RNNoise](https://docs.pipecat.ai/server/utilities/audio/rnnoise-filter) |
| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
| Community | [Browse community integrations →](https://docs.pipecat.ai/server/services/community-integrations) |
| Category | Services |
| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Gradium](https://docs.pipecat.ai/server/services/stt/gradium), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Sarvam](https://docs.pipecat.ai/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/server/services/llm/mistral), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova) [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
| Text-to-Speech | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Camb AI](https://docs.pipecat.ai/server/services/tts/camb), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Gradium](https://docs.pipecat.ai/server/services/tts/gradium), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Hume](https://docs.pipecat.ai/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [Resemble](https://docs.pipecat.ai/server/services/tts/resemble), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [Speechmatics](https://docs.pipecat.ai/server/services/tts/speechmatics), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [Grok Voice Agent](https://docs.pipecat.ai/server/services/s2s/grok), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai), [Ultravox](https://docs.pipecat.ai/server/services/s2s/ultravox), |
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local |
| Serializers | [Exotel](https://docs.pipecat.ai/server/utilities/serializers/exotel), [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx), [Vonage](https://docs.pipecat.ai/server/utilities/serializers/vonage) |
| Video | [HeyGen](https://docs.pipecat.ai/server/services/video/heygen), [LemonSlice](https://docs.pipecat.ai/server/services/video/lemonslice), [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/google-imagen), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [ai-coustics](https://docs.pipecat.ai/server/utilities/audio/aic-filter) |
| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
📚 [View full services documentation →](https://docs.pipecat.ai/server/services/supported-services)
@@ -142,7 +137,7 @@ You can get started with Pipecat running on your local machine, then move your a
## 🧪 Code examples
- [Foundational](https://github.com/pipecat-ai/pipecat/tree/main/examples) — small snippets that build on each other, introducing one or two concepts at a time
- [Foundational](https://github.com/pipecat-ai/pipecat/tree/main/examples/foundational) — small snippets that build on each other, introducing one or two concepts at a time
- [Example apps](https://github.com/pipecat-ai/pipecat-examples) — complete applications that you can use as starting points for development
## 🛠️ Contributing to the framework
@@ -166,6 +161,7 @@ You can get started with Pipecat running on your local machine, then move your a
```bash
uv sync --group dev --all-extras \
--no-extra gstreamer \
--no-extra krisp \
--no-extra local \
```

View File

@@ -0,0 +1 @@
- Changed tool result JSON serialization to use `ensure_ascii=False`, preserving UTF-8 characters instead of escaping them. This reduces context size and token usage for non-English languages.

View File

@@ -0,0 +1 @@
- `OpenAIRealtimeSTTService`'s `noise_reduction` parameter is now part of `OpenAIRealtimeSTTSettings`, making it runtime-updatable via `STTUpdateSettingsFrame`. The direct `noise_reduction` init argument is deprecated as of 0.0.106.

View File

@@ -0,0 +1 @@
- Updated `sarvamai` dependency from `0.1.26a2` (alpha) to `0.1.26` (stable release).

1
changelog/4000.fixed.md Normal file
View File

@@ -0,0 +1 @@
- Fixed an issue where the default model for `OpenAILLMService` and `AzureLLMService` was mistakenly reverted to `gpt-4o`. The defaults are now restored to `gpt-4.1`.

View File

@@ -0,0 +1 @@
- `SimliVideoService` now extends `AIService` instead of `FrameProcessor`, aligning it with the HeyGen and Tavus video services. It supports `SimliVideoService.Settings(...)` for configuration and uses `start()`/`stop()`/`cancel()` lifecycle methods. Existing constructor usage (`api_key`, `face_id`, etc.) remains unchanged.

View File

@@ -0,0 +1 @@
- `SimliVideoService.InputParams` is deprecated. Use the direct constructor parameters `max_session_length`, `max_idle_time`, and `enable_logging` instead.

1
changelog/4004.added.md Normal file
View File

@@ -0,0 +1 @@
- Added optional `service` field to `ServiceUpdateSettingsFrame` (and its subclasses `LLMUpdateSettingsFrame`, `TTSUpdateSettingsFrame`, `STTUpdateSettingsFrame`) to target a specific service instance. When `service` is set, only the matching service applies the settings; others forward the frame unchanged. This enables updating a single service when multiple services of the same type exist in the pipeline.

1
changelog/4005.added.md Normal file
View File

@@ -0,0 +1 @@
- Added `sip_provider` and `room_geo` parameters to `configure()` in the Daily runner. These convenience parameters let callers specify a SIP provider name and geographic region directly without manually constructing `DailyRoomProperties` and `DailyRoomSipParams`.

1
changelog/4006.fixed.md Normal file
View File

@@ -0,0 +1 @@
- Fixed a race condition where `EndTaskFrame` could cause the pipeline to shut down before in-flight frames (e.g. LLM function call responses) finished processing. `EndTaskFrame` and `StopTaskFrame` now flow through the pipeline as `ControlFrame`s, ensuring all pending work is flushed before shutdown begins. `CancelTaskFrame` and `InterruptionTaskFrame` remain immediate (`SystemFrame`).

View File

@@ -0,0 +1 @@
- Fixed `TTSService` potentially canceling in-flight audio during shutdown. The stop sequence now waits for all queued audio contexts to finish processing before canceling the stop frame task.

1
changelog/4007.fixed.md Normal file
View File

@@ -0,0 +1 @@
- Fixed `ParallelPipeline` dropping or misordering frames during lifecycle synchronization. Buffered frames are now flushed in the correct order relative to synchronization frames (`StartFrame` goes first, `EndFrame`/`CancelFrame` go after), and frames added to the buffer during flush are also drained.

1
changelog/4009.added.md Normal file
View File

@@ -0,0 +1 @@
- Added `PerplexityLLMAdapter` that automatically transforms conversation messages to satisfy Perplexity's stricter API constraints (strict role alternation, no non-initial system messages, last message must be user/tool). Previously, certain conversation histories could cause Perplexity API errors that didn't occur with OpenAI (`PerplexityLLMService` subclasses `OpenAILLMService` since Perplexity uses an OpenAI-compatible API).

View File

@@ -0,0 +1 @@
- Deprecated `LocalSmartTurnAnalyzerV2` and `LocalCoreMLSmartTurnAnalyzer`. Use `LocalSmartTurnAnalyzerV3` instead. Instantiating these analyzers will now emit a `DeprecationWarning`.

View File

@@ -0,0 +1 @@
- Update `pipecat-ai-small-webrtc-prebuilt` to `2.4.0`.

1
changelog/4024.fixed.md Normal file
View File

@@ -0,0 +1 @@
- Fixed `Language` enum values (e.g. `Language.ES`) not being converted to service-specific codes when passed via `settings=Service.Settings(language=Language.ES)` at init time. This caused API errors (e.g. 400 from Rime) because the raw enum was sent instead of the expected language code (e.g. `"spa"`). Runtime updates via `UpdateSettingsFrame` were unaffected. The fix centralizes conversion in the base `TTSService` and `STTService` classes so all services handle this consistently.

1
changelog/4026.fixed.md Normal file
View File

@@ -0,0 +1 @@
- Fixed `DeepgramSTTService` ignoring the `base_url` scheme when using `ws://` or `http://`. Previously these were silently overwritten with `wss://` / `https://`, breaking air-gapped or private deployments that don't use TLS. All scheme choices (`wss://`, `https://`, `ws://`, `http://`, or bare hostname) are now respected.

View File

@@ -0,0 +1 @@
- Bumped PyJWT minimum version from 2.10.1 to 2.12.0 in the `livekit` extra to address CVE-2026-32597 (GHSA-752w-5fwx-jx9f), where PyJWT <= 2.11.0 accepted unknown `crit` header extensions.

1
changelog/4037.fixed.md Normal file
View File

@@ -0,0 +1 @@
- Fixed `LLMSwitcher.register_function()` and `register_direct_function()` not accepting or forwarding the `timeout_secs` parameter.

1
changelog/4046.fixed.md Normal file
View File

@@ -0,0 +1 @@
Fixed `SonioxSTTService` and `OpenAIRealtimeSTTService` crash when language parameters contain plain strings instead of `Language` enum values.

1
changelog/4047.added.md Normal file
View File

@@ -0,0 +1 @@
- Added DTMF input event support to the Daily transport. Incoming DTMF tones are now received via Daily's `on_dtmf_event` callback and pushed into the pipeline as `InputDTMFFrame`, enabling bots to react to keypad presses from phone callers.

View File

@@ -0,0 +1 @@
- Updated `daily-python` dependency to 0.25.0.

View File

@@ -0,0 +1 @@
- Added `enable_dialout` parameter to `configure()` in `pipecat.runner.daily` to support dial-out rooms. Also narrowed misleading `Optional` type hints and deduplicated token expiry calculation.

1
changelog/4057.fixed.md Normal file
View File

@@ -0,0 +1 @@
- Fixed premature user turn stops caused by late transcriptions arriving between turns. A stale transcript from the previous turn could persist into the next turn and trigger a stop before the current turn's real transcript arrived. Stop strategies are now reset at both turn start and turn stop to prevent state from leaking across turn boundaries.

1
changelog/4058.fixed.md Normal file
View File

@@ -0,0 +1 @@
- Fixed raw language strings like `"de-DE"` silently failing when passed to TTS/STT services (e.g. ElevenLabs producing no audio). Raw strings now go through the same `Language` enum resolution as enum values, so regional codes like `"de-DE"` are properly converted to service-expected formats like `"de"`. Unrecognized strings log a warning instead of failing silently.

1
changelog/4063.fixed.md Normal file
View File

@@ -0,0 +1 @@
- Fixed Deepgram STT list-type settings (`keyterm`, `keywords`, `search`, `redact`, `replace`) being stringified instead of passed as lists to the SDK, which caused them to be sent as literal strings (e.g. `"['pipecat']"`) in the WebSocket query params.

View File

@@ -1 +0,0 @@
- ⚠️ Added WebSocket-based `OpenAIResponsesLLMService` as the new default for the OpenAI Responses API. It maintains a persistent connection to `wss://api.openai.com/v1/responses` and automatically uses `previous_response_id` to send only incremental context, falling back to full context on reconnection or cache miss. The previous HTTP-based implementation is now available as `OpenAIResponsesHttpLLMService`.

View File

@@ -1 +0,0 @@
- ⚠️ Removed `OpenPipeLLMService` and the `openpipe` extra. OpenPipe was acquired by CoreWeave and the package is no longer maintained. If you were using `openpipe` as an LLM provider, switch to the underlying provider directly (e.g. `openai`). The OpenPipe interface can still be used with `OpenAILLMService` by specifying a `base_url`.

View File

@@ -1 +0,0 @@
- ⚠️ Updated `langchain` extra to require langchain 1.x (from 0.3.x), langchain-community 0.4.x (from 0.3.x), and langchain-openai 1.x (from 0.3.x). If you pin these packages in your project, update your pins accordingly.

View File

@@ -1 +0,0 @@
- Fixed `InworldHttpTTSService` streaming responses crashing with `UnicodeDecodeError` when multi-byte UTF-8 characters were split across chunk boundaries. This caused TTS audio to cut off mid-sentence intermittently.

View File

@@ -1 +0,0 @@
- Fixed a crash (`JSONDecodeError`) when a user interruption occurs while the LLM is streaming function call arguments. Previously, the incomplete JSON arguments were passed directly to `json.loads()`, causing an unhandled exception. Affected services: OpenAI, Google (OpenAI-compatible), and SambaNova.

View File

@@ -1 +0,0 @@
- ⚠️ Removed deprecated `observers` field from `PipelineParams`. Pass observers directly to `PipelineTask` constructor instead.

View File

@@ -1 +0,0 @@
- ⚠️ Removed deprecated `on_pipeline_ended`, `on_pipeline_cancelled`, and `on_pipeline_stopped` events from `PipelineTask`. Use `on_pipeline_finished` instead.

View File

@@ -1 +0,0 @@
- ⚠️ Removed `AudioBufferProcessor.user_continuous_stream` parameter. Use `user_audio_passthrough` instead.

View File

@@ -1 +0,0 @@
- ⚠️ Removed deprecated `camera_in_enabled`, `camera_in_is_live`, `camera_in_width`, `camera_in_height`, `camera_out_enabled`, `camera_out_is_live`, `camera_out_width`, `camera_out_height`, and `camera_out_color` transport params. Use the `video_in_*` and `video_out_*` equivalents instead.

View File

@@ -1 +0,0 @@
- ⚠️ Removed `RTVIObserver.errors_enabled` parameter.

View File

@@ -1 +0,0 @@
- ⚠️ Removed deprecated `vad_enabled` and `vad_audio_passthrough` transport params.

View File

@@ -1 +0,0 @@
- ⚠️ Removed `TTSService.say()`. Push a `TTSSpeakFrame` into the pipeline instead.

View File

@@ -1 +0,0 @@
- ⚠️ Removed `DailyRunner.configure_with_args()`. Use `PipelineRunner` with `RunnerArguments` instead.

View File

@@ -1 +0,0 @@
- ⚠️ Removed deprecated RTVI models, frames, and processor methods including `RTVIConfig`, `RTVIServiceConfig`, `RTVIServiceOptionConfig`, various `RTVI*Data` models, `RTVIActionFrame`, and `RTVIProcessor.handle_function_call`/`handle_function_call_start`. Use the updated RTVI processor API instead.

View File

@@ -1 +0,0 @@
- ⚠️ Removed `FrameProcessor.wait_for_task()`. Use `create_task()` and manage tasks with the built-in `TaskManager` instead.

View File

@@ -1 +0,0 @@
- ⚠️ Removed `KrispFilter`. The `krisp` extra has been removed from `pyproject.toml`.

View File

@@ -1 +0,0 @@
- ⚠️ Removed `LLMService.request_image_frame()`. Push a `UserImageRequestFrame` instead.

View File

@@ -1 +0,0 @@
- ⚠️ Removed `create_default_resampler()` from `pipecat.audio.utils`.

View File

@@ -1 +0,0 @@
- ⚠️ Removed `FalSmartTurnAnalyzer` and `LocalSmartTurnAnalyzer`.

View File

@@ -1 +0,0 @@
- ⚠️ Removed deprecated transport frames: `TransportMessageFrame`, `TransportMessageUrgentFrame`, `InputTransportMessageUrgentFrame`, `DailyTransportMessageFrame`, and `DailyTransportMessageUrgentFrame`. Use `OutputTransportMessageFrame`, `OutputTransportMessageUrgentFrame`, `InputTransportMessageFrame`, `DailyOutputTransportMessageFrame`, and `DailyOutputTransportMessageUrgentFrame` instead.

View File

@@ -1 +0,0 @@
- ⚠️ Removed deprecated `KeypadEntryFrame` alias.

View File

@@ -1 +0,0 @@
- ⚠️ Removed deprecated interruption frames: `StartInterruptionFrame` and `BotInterruptionFrame`. Use `InterruptionFrame` and `InterruptionTaskFrame` instead.

View File

@@ -1 +0,0 @@
- ⚠️ Removed `LLMService.start_callback` parameter. Register an `on_llm_response_start` event handler instead.

View File

@@ -1 +0,0 @@
- ⚠️ Removed single-argument function call support from `LLMService`. Functions must use named parameters instead of a single `arguments` parameter.

View File

@@ -1 +0,0 @@
- ⚠️ Removed `NoisereduceFilter`. Use system-level noise reduction or a service-based alternative instead.

View File

@@ -1 +0,0 @@
- ⚠️ Removed deprecated `pipecat.services.riva` package. Use `pipecat.services.nvidia.stt` and `pipecat.services.nvidia.tts` instead (`RivaSTTService``NvidiaSTTService`, `RivaTTSService``NvidiaTTSService`).

View File

@@ -1 +0,0 @@
- ⚠️ Removed deprecated `pipecat.services.nim` package. Use `pipecat.services.nvidia.llm` instead (`NimLLMService``NvidiaLLMService`).

View File

@@ -1 +0,0 @@
- ⚠️ Removed deprecated `pipecat.services.gemini_multimodal_live` package. Use `pipecat.services.google.gemini_live` instead. Note that class names no longer include "Multimodal" (e.g. `GeminiMultimodalLiveLLMService``GeminiLiveLLMService`).

View File

@@ -1 +0,0 @@
- ⚠️ Removed deprecated `pipecat.services.aws_nova_sonic` package. Use `pipecat.services.aws.nova_sonic` instead.

View File

@@ -1 +0,0 @@
- ⚠️ Removed deprecated `pipecat.services.openai_realtime` package. Use `pipecat.services.openai.realtime` instead.

View File

@@ -1 +0,0 @@
- ⚠️ Removed deprecated `OpenAIRealtimeBetaLLMService` and `AzureRealtimeBetaLLMService`. Use `OpenAIRealtimeLLMService` and `AzureRealtimeLLMService` from `pipecat.services.openai.realtime` and `pipecat.services.azure.realtime` instead.

View File

@@ -1 +0,0 @@
- ⚠️ Removed deprecated `pipecat.services.deepgram.stt_sagemaker` and `pipecat.services.deepgram.tts_sagemaker` modules. Use `pipecat.services.deepgram.sagemaker.stt` and `pipecat.services.deepgram.sagemaker.tts` instead.

View File

@@ -1 +0,0 @@
- ⚠️ Removed deprecated `GoogleLLMOpenAIBetaService` from `pipecat.services.google.openai`. Use `GoogleLLMService` from `pipecat.services.google.llm` instead.

View File

@@ -1 +0,0 @@
- ⚠️ Removed deprecated `pipecat.services.google.llm_vertex` module. Use `pipecat.services.google.vertex.llm` instead.

View File

@@ -1 +0,0 @@
- ⚠️ Removed deprecated `pipecat.services.google.gemini_live.llm_vertex` module. Use `pipecat.services.google.gemini_live.vertex.llm` instead.

View File

@@ -1 +0,0 @@
- ⚠️ Removed deprecated `pipecat.services.ai_services` module. Import from `pipecat.services.ai_service`, `pipecat.services.llm_service`, `pipecat.services.stt_service`, `pipecat.services.tts_service`, etc. instead.

View File

@@ -1 +0,0 @@
- Changed `GrokLLMService` default model from `grok-3-beta` to `grok-3`, now that the model is generally available.

View File

@@ -1 +0,0 @@
- `GoogleImageGenService` now defaults to `imagen-4.0-generate-001` (previously `imagen-3.0-generate-002`).

View File

@@ -1 +0,0 @@
- ⚠️ `BaseOpenAILLMService.get_chat_completions()` now accepts an `LLMContext` instead of `OpenAILLMInvocationParams`. If you override this method, update your signature accordingly.

View File

@@ -1,22 +0,0 @@
- ⚠️ Removed deprecated service-specific context and aggregator machinery, which was superseded by the universal `LLMContext` system.
Service-specific classes removed: `AnthropicLLMContext`, `AnthropicContextAggregatorPair`, `AWSBedrockLLMContext`, `AWSBedrockContextAggregatorPair`, `OpenAIContextAggregatorPair`, and their user/assistant aggregators. Also removed `create_context_aggregator()` from `LLMService`, `OpenAILLMService`, `AnthropicLLMService`, and `AWSBedrockLLMService`.
Base aggregator classes removed (from `pipecat.processors.aggregators.llm_response`): `BaseLLMResponseAggregator`, `LLMContextResponseAggregator`, `LLMUserContextAggregator`, `LLMAssistantContextAggregator`, `LLMUserResponseAggregator`, `LLMAssistantResponseAggregator`.
From the developer's point of view, migrating will usually be a matter of going from this:
```python
context = OpenAILLMContext(messages, tools)
context_aggregator = llm.create_context_aggregator(context)
```
To this:
```python
from pipecat.processors.aggregators.llm_context import LLMContext
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
context = LLMContext(messages, tools)
context_aggregator = LLMContextAggregatorPair(context)
```

View File

@@ -1 +0,0 @@
- ⚠️ Removed deprecated frame types `LLMMessagesFrame` and `OpenAILLMContextAssistantTimestampFrame` from `pipecat.frames.frames`. Instead of `LLMMessagesFrame`, use `LLMContextFrame` with the new messages, or `LLMMessagesUpdateFrame` with `run_llm=True`.

View File

@@ -1 +0,0 @@
- ⚠️ Removed `GatedOpenAILLMContextAggregator` (from `pipecat.processors.aggregators.gated_open_ai_llm_context`). Use `GatedLLMContextAggregator` (from `pipecat.processors.aggregators.gated_llm_context`) instead.

View File

@@ -1 +0,0 @@
- ⚠️ Removed `VisionImageFrameAggregator` (from `pipecat.processors.aggregators.vision_image_frame`). Vision/image handling is now built into `LLMContext` (from `pipecat.processors.aggregators.llm_context`). See the `12*` examples for the recommended replacement pattern.

View File

@@ -1 +0,0 @@
- ⚠️ Removed deprecated compatibility modules: `pipecat.services.openai_realtime_beta` (use `pipecat.services.openai.realtime`), `pipecat.services.openai_realtime.context`, `pipecat.services.openai_realtime.frames`, `pipecat.services.openai.realtime.context`, `pipecat.services.openai.realtime.frames`, `pipecat.services.gemini_multimodal_live` (use `pipecat.services.google.gemini_live`), `pipecat.services.aws_nova_sonic.context` (use `pipecat.services.aws.nova_sonic`), `pipecat.services.google.openai` and `pipecat.services.google.llm_openai` (use `pipecat.services.google.llm`).

View File

@@ -1,18 +0,0 @@
- ⚠️ Removed `OpenAILLMContext`, `OpenAILLMContextFrame`, and `OpenAILLMContext.from_messages()`. Use `LLMContext` (from `pipecat.processors.aggregators.llm_context`) and `LLMContextFrame` (from `pipecat.frames.frames`) instead. All services now exclusively use the universal `LLMContext`.
From the developer's point of view, migrating will usually be a matter of going from this:
```python
context = OpenAILLMContext(messages, tools)
context_aggregator = llm.create_context_aggregator(context)
```
To this:
```python
from pipecat.processors.aggregators.llm_context import LLMContext
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
context = LLMContext(messages, tools)
context_aggregator = LLMContextAggregatorPair(context)
```

View File

@@ -1 +0,0 @@
- Added `group_parallel_tools` parameter to `LLMService` (default `True`). When `True`, all function calls from the same LLM response batch share a group ID and the LLM is triggered exactly once after the last call completes. Set to `False` to trigger inference independently for each function call result as it arrives.

View File

@@ -1 +0,0 @@
- Added `is_async=True` support to `register_function()` and `register_direct_function()`. When enabled, the LLM continues the conversation immediately without waiting for the function result. The result is injected back into the context as a `developer` message once available, triggering a new LLM inference at that point.

View File

@@ -1 +0,0 @@
- When multiple function calls are returned in a single LLM response, the LLM is now triggered exactly once after the last call in the batch completes, rather than waiting for all function calls.

View File

@@ -1 +0,0 @@
- Fixed `BaseOutputTransport` discarding pending `UninterruptibleFrame` items (e.g. function-call context updates) when an interruption arrived. The audio task is now kept alive and only interruptible frames are drained when uninterruptible frames are present in the queue.

View File

@@ -1 +0,0 @@
- Fixed spurious LLM inference being triggered when a function call result arrived while the user was actively speaking. The context frame is now suppressed until the user stops speaking.

View File

@@ -1 +0,0 @@
- Fixed an issue where `UninterruptibleFrame` items queued in `FrameProcessor` could be incorrectly dropped on interruption. Previously only the frame currently being processed was checked; now the entire process queue is scanned so pending uninterruptible frames are always delivered.

View File

@@ -2,7 +2,7 @@
# Build docs using uv
echo "Installing dependencies with uv..."
uv sync --group docs --all-extras --no-extra gstreamer --no-extra local_smart_turn --no-extra moondream --no-extra riva --no-extra mlx-whisper
uv sync --group docs --all-extras --no-extra krisp --no-extra gstreamer --no-extra local_smart_turn --no-extra moondream --no-extra riva --no-extra mlx-whisper
# Check if sphinx-build is available
if ! uv run sphinx-build --version &> /dev/null; then

View File

@@ -48,6 +48,8 @@ autodoc_default_options = {
# Mock imports for optional dependencies
autodoc_mock_imports = [
# Krisp - has build issues on some platforms
"pipecat_ai_krisp",
"krisp",
"krisp_audio",
# System-specific GUI libraries
"_tkinter",
@@ -96,6 +98,7 @@ autodoc_mock_imports = [
"cartesia",
"camb",
"sarvamai",
"openpipe",
"openai.types.beta.realtime",
"langchain_core",
"langchain_core.messages",

View File

@@ -80,6 +80,9 @@ GOOGLE_TEST_CREDENTIALS=...
# Gradium
GRAPDIUM_API_KEY=...
# Grok
GROK_API_KEY=...
# Groq
GROQ_API_KEY=...
@@ -121,21 +124,18 @@ MINIMAX_GROUP_ID=...
# Mistral
MISTRAL_API_KEY=...
# Nebius
NEBIUS_API_KEY=...
# Neuphonic
NEUPHONIC_API_KEY=...
# Novita
NOVITA_API_KEY=...
# NVIDIA
NVIDIA_API_KEY=...
# OpenAI
OPENAI_API_KEY=...
# OpenPipe
OPENPIPE_API_KEY=...
# OpenRouter
OPENROUTER_API_KEY=...
@@ -176,9 +176,6 @@ SENTRY_DSN=...
SIMLI_API_KEY=...
SIMLI_FACE_ID=...
# Smallest
SMALLEST_API_KEY=...
# Smart turn
LOCAL_SMART_TURN_MODEL_PATH=...
FAL_SMART_TURN_API_KEY=...
@@ -212,6 +209,3 @@ WHATSAPP_TOKEN=...
WHATSAPP_WEBHOOK_VERIFICATION_TOKEN=...
WHATSAPP_PHONE_NUMBER_ID=...
WHATSAPP_APP_SECRET=...
# xAI / Grok
XAI_API_KEY=...

View File

@@ -1,150 +1,31 @@
# Pipecat Examples
This directory contains examples showing how to build voice and multimodal agents with Pipecat.
This directory contains examples to help you learn how to build with Pipecat.
## Setup
## Getting Started
1. Follow the [README](https://github.com/pipecat-ai/pipecat/blob/main/README.md#%EF%B8%8F-contributing-to-the-framework) steps to get your local environment configured.
New to Pipecat? Start here:
> **Run from root directory**: Make sure you are running the steps from the root directory.
- **[Quickstart](quickstart/)** - Get your first voice AI bot running in 5 minutes _(coming soon)_
- **[Client/Server Web](client-server-web/)** - Learn to build web applications with Pipecat's client SDKs _(coming soon)_
- **[Phone Bot with Twilio](phone-bot-twilio/)** - Connect your bot to a phone number _(coming soon)_
> **Using local audio?**: The `LocalAudioTransport` requires a system dependency for `portaudio`. Install the dependency to use the transport.
## Foundational Examples
2. Copy the [`env.example`](../env.example) file and add API keys for services you plan to use:
Single-file examples that introduce core Pipecat concepts one at a time. These examples:
```bash
cp env.example .env
# Edit .env with your API keys
```
- Build on each other progressively
- Focus on specific features or integrations
- Are used for testing with every Pipecat release
3. Run any example:
See the **[Foundational Examples README](foundational/)** for the complete list.
```bash
uv run python getting-started/01-say-one-thing.py
```
## More Advanced Examples
4. Open the web interface at http://localhost:7860/client/ and click "Connect"
Ready to explore complex use cases? Visit **[pipecat-examples](https://github.com/pipecat-ai/pipecat-examples)** for:
## Running examples with other transports
Most examples support running with other transports, like Twilio or Daily.
### Daily
You need to create a Daily account at https://dashboard.daily.co/u/signup. Once signed up, you can create your own room from the dashboard and set the environment variables `DAILY_ROOM_URL` and `DAILY_API_KEY`. Alternatively, you can let the example create a room for you (still needs `DAILY_API_KEY` environment variable). Then, start any example with `-t daily`:
```bash
uv run getting-started/06-voice-agent.py -t daily
```
### Twilio
It is also possible to run the example through a Twilio phone number. You will need to setup a few things:
1. Install and run [ngrok](https://ngrok.com/download).
```bash
ngrok http 7860
```
2. Configure your Twilio phone number. One way is to setup a TwiML app and set the request URL to the ngrok URL from step (1). Then, set your phone number to use the new TwiML app.
Then, run the example with:
```bash
uv run getting-started/06-voice-agent.py -t twilio -x NGROK_HOST_NAME
```
## Directory Structure
### [`getting-started/`](./getting-started/)
Progressive introduction to Pipecat, from minimal TTS to a full voice agent with function calling.
### [`voice/`](./voice/)
Full STT + LLM + TTS voice agent pipelines showcasing different speech service providers (Deepgram, ElevenLabs, Cartesia, etc.)
### [`function-calling/`](./function-calling/)
Function calling with different LLM providers (OpenAI, Anthropic, Google, etc.)
### [`transcription/`](./transcription/)
Speech-to-text examples with various STT providers.
### [`vision/`](./vision/)
Image description and vision capabilities with different multimodal LLMs.
### [`realtime/`](./realtime/)
Realtime and multimodal live APIs (OpenAI Realtime, Gemini Live, AWS Nova Sonic, Ultravox, Grok).
### [`persistent-context/`](./persistent-context/)
Maintaining conversation context across sessions with different providers.
### [`context-summarization/`](./context-summarization/)
Summarizing conversation context to manage token limits.
### [`update-settings/`](./update-settings/)
Changing service settings at runtime, organized by service type:
- **[`stt/`](./update-settings/stt/)** — Speech-to-text settings
- **[`tts/`](./update-settings/tts/)** — Text-to-speech settings
- **[`llm/`](./update-settings/llm/)** — LLM settings
### [`turn-management/`](./turn-management/)
Turn detection, interruption handling, and user input management.
### [`thinking-and-mcp/`](./thinking-and-mcp/)
LLM thinking/reasoning modes and MCP (Model Context Protocol) tool server integration.
### [`transports/`](./transports/)
Transport layer examples (WebRTC, Daily, LiveKit).
### [`video-avatar/`](./video-avatar/)
Video avatar integrations (Tavus, HeyGen, Simli, LemonSlice).
### [`video-processing/`](./video-processing/)
Video processing, mirroring, GStreamer, and custom video tracks.
### [`audio/`](./audio/)
Audio recording, background sounds, and sound effects.
### [`observability/`](./observability/)
Pipeline monitoring: observers, heartbeats, and Sentry metrics.
### [`rag/`](./rag/)
Retrieval-augmented generation, grounding, and long-term memory (Mem0, Gemini).
### [`features/`](./features/)
Miscellaneous features: wake phrases, live translation, service switching, voice switching, and more.
## Advanced Usage
### Customizing Network Settings
```bash
uv run python <example-name> --host 0.0.0.0 --port 8080
```
### Troubleshooting
- **No audio/video**: Check browser permissions for microphone and camera
- **Connection errors**: Verify API keys in `.env` file
- **Port conflicts**: Use `--port` to change the port
For more examples, visit the [pipecat-examples repository](https://github.com/pipecat-ai/pipecat-examples).
- Production-ready applications
- Multi-platform client implementations
- Telephony integrations
- Multimodal and creative applications
- Deployment and monitoring examples

View File

@@ -0,0 +1,71 @@
#
# Copyright (c) 2024-2026, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import os
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from pipecat.frames.frames import EndFrame, TTSSpeakFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineTask
from pipecat.runner.types import RunnerArguments
from pipecat.runner.utils import create_transport
from pipecat.services.piper.tts import PiperHttpTTSService
from pipecat.transports.base_transport import BaseTransport, TransportParams
from pipecat.transports.daily.transport import DailyParams
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
load_dotenv(override=True)
# We use lambdas to defer transport parameter creation until the transport
# type is selected at runtime.
transport_params = {
"daily": lambda: DailyParams(audio_out_enabled=True),
"twilio": lambda: FastAPIWebsocketParams(audio_out_enabled=True),
"webrtc": lambda: TransportParams(audio_out_enabled=True),
}
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
# Create an HTTP session
async with aiohttp.ClientSession() as session:
tts = PiperHttpTTSService(
base_url=os.getenv("PIPER_BASE_URL"),
aiohttp_session=session,
sample_rate=24000,
)
task = PipelineTask(
Pipeline([tts, transport.output()]),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
# Register an event handler so we can play the audio when the client joins
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
await task.queue_frames([TTSSpeakFrame(f"Hello there!"), EndFrame()])
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport, runner_args)
if __name__ == "__main__":
from pipecat.runner.run import main
main()

View File

@@ -0,0 +1,72 @@
#
# Copyright (c) 2024-2026, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import os
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from pipecat.frames.frames import EndFrame, TTSSpeakFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineTask
from pipecat.runner.types import RunnerArguments
from pipecat.runner.utils import create_transport
from pipecat.services.rime.tts import RimeHttpTTSService
from pipecat.transports.base_transport import BaseTransport, TransportParams
from pipecat.transports.daily.transport import DailyParams
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
load_dotenv(override=True)
# We use lambdas to defer transport parameter creation until the transport
# type is selected at runtime.
transport_params = {
"daily": lambda: DailyParams(audio_out_enabled=True),
"twilio": lambda: FastAPIWebsocketParams(audio_out_enabled=True),
"webrtc": lambda: TransportParams(audio_out_enabled=True),
}
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
# Create an HTTP session
async with aiohttp.ClientSession() as session:
tts = RimeHttpTTSService(
api_key=os.getenv("RIME_API_KEY", ""),
aiohttp_session=session,
settings=RimeHttpTTSService.Settings(
voice="rex",
),
)
task = PipelineTask(
Pipeline([tts, transport.output()]),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
# Register an event handler so we can play the audio when the client joins
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
await task.queue_frames([TTSSpeakFrame(f"Hello there!"), EndFrame()])
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport, runner_args)
if __name__ == "__main__":
from pipecat.runner.run import main
main()

View File

@@ -0,0 +1,64 @@
#
# Copyright (c) 2024-2026, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import os
import sys
from dotenv import load_dotenv
from loguru import logger
from pipecat.frames.frames import TTSSpeakFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineTask
from pipecat.runner.livekit import configure
from pipecat.services.cartesia.tts import CartesiaTTSService
from pipecat.transports.livekit.transport import LiveKitParams, LiveKitTransport
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def main():
(url, token, room_name) = await configure()
transport = LiveKitTransport(
url=url,
token=token,
room_name=room_name,
params=LiveKitParams(audio_out_enabled=True),
)
tts = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
settings=CartesiaTTSService.Settings(
voice="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
),
)
runner = PipelineRunner()
task = PipelineTask(Pipeline([tts, transport.output()]))
# Register an event handler so we can play the audio when the
# participant joins.
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant_id):
await asyncio.sleep(1)
await task.queue_frame(
TTSSpeakFrame(
"Hello there! How are you doing today? Would you like to talk about the weather?"
)
)
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,64 @@
#
# Copyright (c) 2024-2026, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import os
from dotenv import load_dotenv
from loguru import logger
from pipecat.frames.frames import EndFrame, TTSSpeakFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineTask
from pipecat.runner.types import RunnerArguments
from pipecat.runner.utils import create_transport
from pipecat.services.nvidia.tts import NvidiaTTSService
from pipecat.transports.base_transport import BaseTransport, TransportParams
from pipecat.transports.daily.transport import DailyParams
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
load_dotenv(override=True)
# We use lambdas to defer transport parameter creation until the transport
# type is selected at runtime.
transport_params = {
"daily": lambda: DailyParams(audio_out_enabled=True),
"twilio": lambda: FastAPIWebsocketParams(audio_out_enabled=True),
"webrtc": lambda: TransportParams(audio_out_enabled=True),
}
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
tts = NvidiaTTSService(api_key=os.getenv("NVIDIA_API_KEY"))
task = PipelineTask(
Pipeline([tts, transport.output()]),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
# Register an event handler so we can play the audio when the client joins
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
await task.queue_frames([TTSSpeakFrame(f"Hello there!"), EndFrame()])
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport, runner_args)
if __name__ == "__main__":
from pipecat.runner.run import main
main()

View File

@@ -60,7 +60,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
context = LLMContext()
context.add_message({"role": "developer", "content": "Say hello to the world."})
context.add_message({"role": "user", "content": "Say hello to the world."})
await task.queue_frames([LLMContextFrame(context), EndFrame()])
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)

View File

@@ -0,0 +1,84 @@
#
# Copyright (c) 2024-2026, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import os
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from pipecat.frames.frames import TextFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineTask
from pipecat.runner.types import RunnerArguments
from pipecat.runner.utils import create_transport
from pipecat.services.fal.image import FalImageGenService
from pipecat.transports.base_transport import BaseTransport, TransportParams
from pipecat.transports.daily.transport import DailyParams
load_dotenv(override=True)
# We use lambdas to defer transport parameter creation until the transport
# type is selected at runtime.
transport_params = {
"daily": lambda: DailyParams(
video_out_enabled=True,
video_out_width=1024,
video_out_height=1024,
),
"webrtc": lambda: TransportParams(
video_out_enabled=True,
video_out_width=1024,
video_out_height=1024,
),
}
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
# Create an HTTP session
async with aiohttp.ClientSession() as session:
imagegen = FalImageGenService(
settings=FalImageGenService.Settings(
image_size="square_hd",
),
aiohttp_session=session,
key=os.getenv("FAL_KEY"),
)
task = PipelineTask(
Pipeline([imagegen, transport.output()]),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
# Register an event handler so we can play the audio when the client joins
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
await task.queue_frame(TextFrame("a cat in the style of picasso"))
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport, runner_args)
if __name__ == "__main__":
from pipecat.runner.run import main
main()

View File

@@ -109,9 +109,7 @@ async def run_example(webrtc_connection: SmallWebRTCConnection):
async def on_client_connected(transport, client):
logger.info(f"Client connected")
# Kick off the conversation.
context.add_message(
{"role": "developer", "content": "Please introduce yourself to the user."}
)
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")

Some files were not shown because too many files have changed in this diff Show More