Compare commits
216 Commits
vp-fix/mcp
...
mb/fix-gem
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3077395ffc | ||
|
|
2e5e109bb6 | ||
|
|
677ca04a18 | ||
|
|
e8c3f73968 | ||
|
|
74d11dc0aa | ||
|
|
6d3dfd8f64 | ||
|
|
ce9c214eec | ||
|
|
8c8b76e9d2 | ||
|
|
42a6fc703c | ||
|
|
c5c18335fd | ||
|
|
3159503c7f | ||
|
|
0340e25e9f | ||
|
|
af861b7975 | ||
|
|
6bb4e8295f | ||
|
|
f5f92dea63 | ||
|
|
cb1463f9f1 | ||
|
|
4c19f5584c | ||
|
|
ab91047300 | ||
|
|
3127cc6161 | ||
|
|
36319ecbf0 | ||
|
|
c6a1837844 | ||
|
|
aa355e3d32 | ||
|
|
9bd51cd88c | ||
|
|
fc1c3b48dc | ||
|
|
4278a37ebc | ||
|
|
7e045257e8 | ||
|
|
b8a1f45d4c | ||
|
|
8ec85f981d | ||
|
|
2f52905d32 | ||
|
|
f86cf98c6d | ||
|
|
84fcba772d | ||
|
|
b3bb6fdaa5 | ||
|
|
12b8af3d89 | ||
|
|
1c4ffb7845 | ||
|
|
8d4feede23 | ||
|
|
b11a3bc43f | ||
|
|
8dce66933f | ||
|
|
7291026695 | ||
|
|
686e250db1 | ||
|
|
e8d6f611cd | ||
|
|
f094ce80fb | ||
|
|
9fbe1bf2a3 | ||
|
|
d8b0e78bc8 | ||
|
|
675b7df408 | ||
|
|
30f39d7395 | ||
|
|
fe2ef9c712 | ||
|
|
173cf39aee | ||
|
|
ac43a70d36 | ||
|
|
8e4fd10e0f | ||
|
|
aeab417cd1 | ||
|
|
d263ad3c34 | ||
|
|
f3c454dc54 | ||
|
|
fc63790657 | ||
|
|
9ffcccdd84 | ||
|
|
503782c8b2 | ||
|
|
b834a893fe | ||
|
|
ba023248d9 | ||
|
|
457f55e99a | ||
|
|
f8318289d4 | ||
|
|
958d90819f | ||
|
|
403235eb48 | ||
|
|
698c2ba92e | ||
|
|
f013d5632b | ||
|
|
570849955c | ||
|
|
84b885682f | ||
|
|
989fb4deaa | ||
|
|
ab74605a26 | ||
|
|
49998d252b | ||
|
|
84566c1110 | ||
|
|
45aa95fa10 | ||
|
|
d1f7af0330 | ||
|
|
31b5a64382 | ||
|
|
d20013d7a6 | ||
|
|
804e3ea9ec | ||
|
|
a14d257cf2 | ||
|
|
a8660aabfe | ||
|
|
7dc763d512 | ||
|
|
36b15c92ef | ||
|
|
64ed0aae13 | ||
|
|
be81dac723 | ||
|
|
d942a713af | ||
|
|
e248c4c049 | ||
|
|
1d5dcf1698 | ||
|
|
f45a410f56 | ||
|
|
e38647151d | ||
|
|
1a02b5d61a | ||
|
|
4254c1f0e0 | ||
|
|
f91a113de7 | ||
|
|
e553bb010f | ||
|
|
245339e885 | ||
|
|
812cdc6822 | ||
|
|
153814ecc2 | ||
|
|
b1204cc430 | ||
|
|
c542167065 | ||
|
|
02116c58de | ||
|
|
dcd21e7ff4 | ||
|
|
5356f3028b | ||
|
|
cb2c1868b0 | ||
|
|
dac88c0a47 | ||
|
|
8e5fe8afda | ||
|
|
d07eebff20 | ||
|
|
ef4dcca4f1 | ||
|
|
fc3307bc63 | ||
|
|
da9a55a430 | ||
|
|
094d36904c | ||
|
|
746fadc2b5 | ||
|
|
8cce25d2d2 | ||
|
|
891f00cb5f | ||
|
|
1ca094dad7 | ||
|
|
346c585290 | ||
|
|
c134110399 | ||
|
|
f9117e6d4a | ||
|
|
360e4480e0 | ||
|
|
9b7e15c9bc | ||
|
|
00ea86fda8 | ||
|
|
5f75728207 | ||
|
|
9d274f0fb3 | ||
|
|
43ddbdf1ec | ||
|
|
565349d332 | ||
|
|
2dd1170229 | ||
|
|
5cf90cba98 | ||
|
|
981b7bdcb7 | ||
|
|
c4320e7f07 | ||
|
|
ea0be4d39c | ||
|
|
dca4e1090a | ||
|
|
ec574edd53 | ||
|
|
772fb57090 | ||
|
|
76601944c6 | ||
|
|
178985ec8a | ||
|
|
edc197d050 | ||
|
|
7ece8e3c4a | ||
|
|
7b45a56119 | ||
|
|
a544f885a3 | ||
|
|
375deac912 | ||
|
|
699ca38dc1 | ||
|
|
aeda60f761 | ||
|
|
b010dd58d2 | ||
|
|
225ea907d5 | ||
|
|
1443dfb070 | ||
|
|
4bef85e363 | ||
|
|
215b2dc7f3 | ||
|
|
874e2878be | ||
|
|
9131fa5c12 | ||
|
|
68a3070ad4 | ||
|
|
a7bf9f538c | ||
|
|
0acfb4dd49 | ||
|
|
8594401024 | ||
|
|
aa7a014518 | ||
|
|
27a8a973b1 | ||
|
|
8abda808ca | ||
|
|
7f3f23dcb9 | ||
|
|
be509e5647 | ||
|
|
9f0b18b03d | ||
|
|
6eccd16543 | ||
|
|
d8dc6bc7d0 | ||
|
|
d12a8529e2 | ||
|
|
aa061f7e2c | ||
|
|
e863293198 | ||
|
|
9c7d5a9de2 | ||
|
|
a451c42dc7 | ||
|
|
bc009d8f98 | ||
|
|
67ee802772 | ||
|
|
ceaa27ee6e | ||
|
|
42335e2ef0 | ||
|
|
7585864113 | ||
|
|
18852adc28 | ||
|
|
f11b6d7151 | ||
|
|
9df1e18b43 | ||
|
|
b8f9a21e0c | ||
|
|
c18d997ad8 | ||
|
|
56aaebe1b0 | ||
|
|
916af84974 | ||
|
|
3e911b5fa0 | ||
|
|
7c08779a2f | ||
|
|
988c08a5b6 | ||
|
|
7351298849 | ||
|
|
392134be46 | ||
|
|
9266e1e7ad | ||
|
|
e9eff4626f | ||
|
|
21aa50283e | ||
|
|
70469e3c0c | ||
|
|
6111df947e | ||
|
|
4eebfd65d9 | ||
|
|
c2358b273b | ||
|
|
3a10a528c0 | ||
|
|
f078b8b867 | ||
|
|
5490820338 | ||
|
|
10697636c9 | ||
|
|
e1638a9342 | ||
|
|
bfffefa95c | ||
|
|
fbb49ffc8d | ||
|
|
eace782752 | ||
|
|
b94071d37f | ||
|
|
796a10fe9c | ||
|
|
1ab07d312f | ||
|
|
8adb38f87c | ||
|
|
33f145d70a | ||
|
|
41e46ee69e | ||
|
|
60933b7a56 | ||
|
|
64e09d592e | ||
|
|
883de8ab08 | ||
|
|
793ed8f9e3 | ||
|
|
d8ea33e1a4 | ||
|
|
1d7404ef21 | ||
|
|
7af72eee3e | ||
|
|
bbb605accc | ||
|
|
929a0e33f4 | ||
|
|
3724ecd378 | ||
|
|
4c8734c5e1 | ||
|
|
283f6df205 | ||
|
|
f6a3678f93 | ||
|
|
3af93ed257 | ||
|
|
f37bf989dd | ||
|
|
86a16d53bc | ||
|
|
fa982a05c0 | ||
|
|
419c7d4450 |
@@ -1,30 +0,0 @@
|
||||
# flyctl launch added from .gitignore
|
||||
**/.vscode
|
||||
**/env
|
||||
**/__pycache__
|
||||
**/*~
|
||||
**/venv
|
||||
#*#
|
||||
|
||||
# Distribution / packaging
|
||||
**/.Python
|
||||
**/build
|
||||
**/develop-eggs
|
||||
**/dist
|
||||
**/downloads
|
||||
**/eggs
|
||||
**/.eggs
|
||||
**/lib
|
||||
**/lib64
|
||||
**/parts
|
||||
**/sdist
|
||||
**/var
|
||||
**/wheels
|
||||
**/share/python-wheels
|
||||
**/*.egg-info
|
||||
**/.installed.cfg
|
||||
**/*.egg
|
||||
**/MANIFEST
|
||||
**/.DS_Store
|
||||
**/.env
|
||||
fly.toml
|
||||
4
.github/workflows/format.yaml
vendored
4
.github/workflows/format.yaml
vendored
@@ -41,3 +41,7 @@ jobs:
|
||||
- name: Ruff linter (all rules)
|
||||
id: ruff-check
|
||||
run: uv run ruff check
|
||||
|
||||
- name: Type check (pyright)
|
||||
id: pyright
|
||||
run: uv run pyright
|
||||
|
||||
@@ -11,7 +11,7 @@ build:
|
||||
jobs:
|
||||
post_install:
|
||||
- pip install uv
|
||||
- UV_PROJECT_ENVIRONMENT=$READTHEDOCS_VIRTUALENV_PATH uv sync --group docs --all-extras --no-extra gstreamer --no-extra local_smart_turn --no-extra moondream --no-extra riva --no-extra mlx-whisper
|
||||
- UV_PROJECT_ENVIRONMENT=$READTHEDOCS_VIRTUALENV_PATH uv sync --group docs --all-extras --no-extra gstreamer --no-extra local_smart_turn --no-extra moondream --no-extra mlx-whisper
|
||||
|
||||
sphinx:
|
||||
configuration: docs/api/conf.py
|
||||
|
||||
678
CHANGELOG.md
678
CHANGELOG.md
@@ -7,6 +7,684 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
<!-- towncrier release notes start -->
|
||||
|
||||
## [1.0.0] - 2026-04-14
|
||||
|
||||
Migration guide: https://docs.pipecat.ai/pipecat/migration/migration-1.0
|
||||
|
||||
### Added
|
||||
|
||||
- Updated LemonSlice transport:
|
||||
- Added `on_avatar_connected` and `on_avatar_disconnected` events triggered
|
||||
when the avatar joins and leaves the room.
|
||||
- Added `api_url` parameter to `LemonSliceNewSessionRequest` to allow
|
||||
overriding the LemonSlice API endpoint.
|
||||
- Added support for passing arbitrary named parameters to the LemonSlice
|
||||
API endpoint.
|
||||
(PR [#3995](https://github.com/pipecat-ai/pipecat/pull/3995))
|
||||
|
||||
- Added Inworld Realtime LLM service with WebSocket-based cascade STT/LLM/TTS,
|
||||
semantic VAD, function calling, and Router support.
|
||||
(PR [#4140](https://github.com/pipecat-ai/pipecat/pull/4140))
|
||||
|
||||
- ⚠️ Added WebSocket-based `OpenAIResponsesLLMService` as the new default for
|
||||
the OpenAI Responses API. It maintains a persistent connection to
|
||||
`wss://api.openai.com/v1/responses` and automatically uses
|
||||
`previous_response_id` to send only incremental context, falling back to full
|
||||
context on reconnection or cache miss. The previous HTTP-based implementation
|
||||
is now available as `OpenAIResponsesHttpLLMService`.
|
||||
(PR [#4141](https://github.com/pipecat-ai/pipecat/pull/4141))
|
||||
|
||||
- Added `group_parallel_tools` parameter to `LLMService` (default `True`). When
|
||||
`True`, all function calls from the same LLM response batch share a group ID
|
||||
and the LLM is triggered exactly once after the last call completes. Set to
|
||||
`False` to trigger inference independently for each function call result as
|
||||
it arrives.
|
||||
(PR [#4217](https://github.com/pipecat-ai/pipecat/pull/4217))
|
||||
|
||||
- Added async function call support to `register_function()` and
|
||||
`register_direct_function()` via `cancel_on_interruption=False`. When set to
|
||||
`False`, the LLM continues the conversation immediately without waiting for
|
||||
the function result. The result is injected back into the context as a
|
||||
`developer` message once available, triggering a new LLM inference at that
|
||||
point.
|
||||
(PR [#4217](https://github.com/pipecat-ai/pipecat/pull/4217))
|
||||
|
||||
- Added `enable_prompt_caching` setting to `AWSBedrockLLMService` for Bedrock
|
||||
ConverseStream prompt caching.
|
||||
(PR [#4219](https://github.com/pipecat-ai/pipecat/pull/4219))
|
||||
|
||||
- Added support for streaming intermediate results from async function calls.
|
||||
Call `result_callback` multiple times with
|
||||
`properties=FunctionCallResultProperties(is_final=False)` to push incremental
|
||||
updates, then call it once more (with `is_final=True`, the default) to
|
||||
deliver the final result. Only valid for functions registered with
|
||||
`cancel_on_interruption=False`.
|
||||
(PR [#4230](https://github.com/pipecat-ai/pipecat/pull/4230))
|
||||
|
||||
- Added `LLMMessagesTransformFrame` to facilitate programmatically editing
|
||||
context in a frame-based way.
|
||||
|
||||
The previous approach required the caller to directly grab a reference to
|
||||
the context object, grab a "snapshot" of its messages _at that point in
|
||||
time_, transform the messages, and then push an `LLMMessagesUpdateFrame` with
|
||||
the transformed messages. This approach can lead to problems: what if there
|
||||
had already been a change to the context queued in the pipeline? The
|
||||
transformed messages would simply overwrite it without consideration.
|
||||
(PR [#4231](https://github.com/pipecat-ai/pipecat/pull/4231))
|
||||
|
||||
- The development runner now exports a module-level `app` FastAPI instance
|
||||
(`from pipecat.runner.run import app`) so you can register custom routes
|
||||
before calling `main()`.
|
||||
(PR [#4234](https://github.com/pipecat-ai/pipecat/pull/4234))
|
||||
|
||||
- `ToolsSchema` now accepts `custom_tools` for OpenAI LLM services
|
||||
(`OpenAILLMService`, `OpenAIResponsesLLMService`,
|
||||
`OpenAIResponsesHttpLLMService`, and `OpenAIRealtimeLLMService`), letting you
|
||||
pass provider-specific tools like `tool_search` alongside standard function
|
||||
tools.
|
||||
(PR [#4248](https://github.com/pipecat-ai/pipecat/pull/4248))
|
||||
|
||||
- Added enhancements to `NvidiaTTSService`:
|
||||
|
||||
- Cross-sentence stitching: multiple sentences within an LLM turn are fed
|
||||
into a single `SynthesizeOnline` gRPC stream for seamless audio across
|
||||
sentence boundaries (requires Magpie TTS model v1.7.0+).
|
||||
- `custom_dictionary` and `encoding` parameters for IPA-based custom
|
||||
pronunciation and output audio encoding.
|
||||
- Metrics generation (`can_generate_metrics` returns true) and
|
||||
`stop_all_metrics()` when an audio context is interrupted.
|
||||
- gRPC error handling around synthesis config retrieval
|
||||
(`GetRivaSynthesisConfig`).
|
||||
(PR [#4249](https://github.com/pipecat-ai/pipecat/pull/4249))
|
||||
|
||||
- Added `MistralTTSService` for streaming text-to-speech using Mistral's
|
||||
Voxtral TTS API (`voxtral-mini-tts-2603`). Supports SSE-based audio streaming
|
||||
with automatic resampling from the API's native 24kHz to any requested sample
|
||||
rate. Requires the `mistral` optional extra (`pip install
|
||||
pipecat-ai[mistral]`).
|
||||
(PR [#4251](https://github.com/pipecat-ai/pipecat/pull/4251))
|
||||
|
||||
- Added `truncate_large_values` parameter to `LLMContext.get_messages()`. When
|
||||
`True`, returns compact deep copies of messages with binary data (base64
|
||||
images, audio) replaced by short placeholders and long string values in
|
||||
LLM-specific messages recursively truncated. Useful for serialization,
|
||||
logging, and debugging tools.
|
||||
(PR [#4272](https://github.com/pipecat-ai/pipecat/pull/4272))
|
||||
|
||||
- `CartesiaSTTService` now supports runtime settings updates (e.g. changing
|
||||
`language` or `model` via `STTUpdateSettingsFrame`). The service
|
||||
automatically reconnects with the new parameters. Previously, settings
|
||||
updates were silently ignored.
|
||||
(PR [#4282](https://github.com/pipecat-ai/pipecat/pull/4282))
|
||||
|
||||
- Added `pcm_32000` and `pcm_48000` sample rate support to ElevenLabs TTS
|
||||
services.
|
||||
(PR [#4293](https://github.com/pipecat-ai/pipecat/pull/4293))
|
||||
|
||||
- Added `enable_logging` parameter to `ElevenLabsHttpTTSService`. Set to
|
||||
`False` to enable zero retention mode (enterprise only).
|
||||
(PR [#4293](https://github.com/pipecat-ai/pipecat/pull/4293))
|
||||
|
||||
### Changed
|
||||
|
||||
- Updated `onnxruntime` from 1.23.2 to 1.24.3, adding support for Python 3.14.
|
||||
(PR [#3984](https://github.com/pipecat-ai/pipecat/pull/3984))
|
||||
|
||||
- MCPClient now requires async with MCPClient(...) as mcp: or explicit
|
||||
start()/close() calls to manage the connection lifecycle.
|
||||
(PR [#4034](https://github.com/pipecat-ai/pipecat/pull/4034))
|
||||
|
||||
- ⚠️ Updated `langchain` extra to require langchain 1.x (from 0.3.x),
|
||||
langchain-community 0.4.x (from 0.3.x), and langchain-openai 1.x (from
|
||||
0.3.x). If you pin these packages in your project, update your pins
|
||||
accordingly.
|
||||
(PR [#4192](https://github.com/pipecat-ai/pipecat/pull/4192))
|
||||
|
||||
- `WebsocketService` reconnection errors are now non-fatal. When a websocket
|
||||
service exhausts its reconnection attempts (either via exponential backoff or
|
||||
quick failure detection), it emits a non-fatal `ErrorFrame` instead of a
|
||||
fatal one. This allows application-level failover (e.g. `ServiceSwitcher`) to
|
||||
handle the failure instead of killing the entire pipeline.
|
||||
(PR [#4201](https://github.com/pipecat-ai/pipecat/pull/4201))
|
||||
|
||||
- Changed `GrokLLMService` default model from `grok-3-beta` to `grok-3`, now
|
||||
that the model is generally available.
|
||||
(PR [#4209](https://github.com/pipecat-ai/pipecat/pull/4209))
|
||||
|
||||
- `GoogleImageGenService` now defaults to `imagen-4.0-generate-001` (previously
|
||||
`imagen-3.0-generate-002`).
|
||||
(PR [#4213](https://github.com/pipecat-ai/pipecat/pull/4213))
|
||||
|
||||
- ⚠️ `BaseOpenAILLMService.get_chat_completions()` now accepts an `LLMContext`
|
||||
instead of `OpenAILLMInvocationParams`. If you override this method, update
|
||||
your signature accordingly.
|
||||
(PR [#4215](https://github.com/pipecat-ai/pipecat/pull/4215))
|
||||
|
||||
- When multiple function calls are returned in a single LLM response, by
|
||||
default (when `group_parallel_tools=True`) the LLM is now triggered exactly
|
||||
once after the last call in the batch completes, rather than waiting for all
|
||||
function calls.
|
||||
(PR [#4217](https://github.com/pipecat-ai/pipecat/pull/4217))
|
||||
|
||||
- ⚠️ `LLMService.function_call_timeout_secs` now defaults to `None` instead of
|
||||
`10.0`. Deferred function calls will run indefinitely unless a timeout is
|
||||
explicitly set at the service level or per-call. If you relied on the
|
||||
previous 10-second default, pass `function_call_timeout_secs=10.0`
|
||||
explicitly.
|
||||
(PR [#4224](https://github.com/pipecat-ai/pipecat/pull/4224))
|
||||
|
||||
- Updated `NvidiaTTSService`:
|
||||
|
||||
- Made `api_key` optional for local NIM deployments.
|
||||
- Voice, language, and quality can be updated without reconnecting the gRPC
|
||||
client; new values take effect on the next synthesis turn, not for the
|
||||
current turn's in-flight requests.
|
||||
- Replaced per-sentence synchronous `synthesize_online` calls with async
|
||||
queue-backed gRPC streaming.
|
||||
- Streaming now uses asyncio tasks with explicit gRPC cancellation on
|
||||
interruption and stale-response filtering when a stream is aborted or
|
||||
replaced.
|
||||
- Renamed Riva references to Nemotron Speech in docs and messages.
|
||||
- Disabled automatic TTS start frames at the service level
|
||||
(`push_start_frame=False`) and emit `TTSStartedFrame` when a stitched
|
||||
synthesis stream is started for a context.
|
||||
(PR [#4249](https://github.com/pipecat-ai/pipecat/pull/4249))
|
||||
|
||||
### Removed
|
||||
|
||||
- ⚠️ Removed `OpenPipeLLMService` and the `openpipe` extra. OpenPipe was
|
||||
acquired by CoreWeave and the package is no longer maintained. If you were
|
||||
using `openpipe` as an LLM provider, switch to the underlying provider
|
||||
directly (e.g. `openai`). The OpenPipe interface can still be used with
|
||||
`OpenAILLMService` by specifying a `base_url`.
|
||||
(PR [#4191](https://github.com/pipecat-ai/pipecat/pull/4191))
|
||||
|
||||
- ⚠️ Removed `NoisereduceFilter`. Use system-level noise reduction or a
|
||||
service-based alternative instead.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed deprecated `vad_enabled` and `vad_audio_passthrough` transport
|
||||
params.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed deprecated `camera_in_enabled`, `camera_in_is_live`,
|
||||
`camera_in_width`, `camera_in_height`, `camera_out_enabled`,
|
||||
`camera_out_is_live`, `camera_out_width`, `camera_out_height`, and
|
||||
`camera_out_color` transport params. Use the `video_in_*` and `video_out_*`
|
||||
equivalents instead.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed `FrameProcessor.wait_for_task()`. Use `create_task()` and manage
|
||||
tasks with the built-in `TaskManager` instead.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed deprecated transport frames: `TransportMessageFrame`,
|
||||
`TransportMessageUrgentFrame`, `InputTransportMessageUrgentFrame`,
|
||||
`DailyTransportMessageFrame`, and `DailyTransportMessageUrgentFrame`. Use
|
||||
`OutputTransportMessageFrame`, `OutputTransportMessageUrgentFrame`,
|
||||
`InputTransportMessageFrame`, `DailyOutputTransportMessageFrame`, and
|
||||
`DailyOutputTransportMessageUrgentFrame` instead.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed `create_default_resampler()` from `pipecat.audio.utils`.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed `DailyRunner.configure_with_args()`. Use `PipelineRunner` with
|
||||
`RunnerArguments` instead.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed deprecated `on_pipeline_ended`, `on_pipeline_cancelled`, and
|
||||
`on_pipeline_stopped` events from `PipelineTask`. Use `on_pipeline_finished`
|
||||
instead.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed single-argument function call support from `LLMService`. Functions
|
||||
must use named parameters instead of a single `arguments` parameter.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed `FalSmartTurnAnalyzer` and `LocalSmartTurnAnalyzer`.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed `RTVIObserver.errors_enabled` parameter.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed deprecated RTVI models, frames, and processor methods including
|
||||
`RTVIConfig`, `RTVIServiceConfig`, `RTVIServiceOptionConfig`, various
|
||||
`RTVI*Data` models, `RTVIActionFrame`, and
|
||||
`RTVIProcessor.handle_function_call`/`handle_function_call_start`. Use the
|
||||
updated RTVI processor API instead.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed deprecated `KeypadEntryFrame` alias.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed deprecated interruption frames: `StartInterruptionFrame` and
|
||||
`BotInterruptionFrame`. Use `InterruptionFrame` and `InterruptionTaskFrame`
|
||||
instead.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed `LLMService.request_image_frame()`. Push a `UserImageRequestFrame`
|
||||
instead.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed `TTSService.say()`. Push a `TTSSpeakFrame` into the pipeline
|
||||
instead.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed `KrispFilter`. The `krisp` extra has been removed from
|
||||
`pyproject.toml`.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed `AudioBufferProcessor.user_continuous_stream` parameter. Use
|
||||
`user_audio_passthrough` instead.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed `LLMService.start_callback` parameter. Register an
|
||||
`on_llm_response_start` event handler instead.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed deprecated `observers` field from `PipelineParams`. Pass observers
|
||||
directly to `PipelineTask` constructor instead.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed deprecated `pipecat.services.openai_realtime` package. Use
|
||||
`pipecat.services.openai.realtime` instead.
|
||||
(PR [#4208](https://github.com/pipecat-ai/pipecat/pull/4208))
|
||||
|
||||
- ⚠️ Removed deprecated `pipecat.services.google.llm_vertex` module. Use
|
||||
`pipecat.services.google.vertex.llm` instead.
|
||||
(PR [#4208](https://github.com/pipecat-ai/pipecat/pull/4208))
|
||||
|
||||
- ⚠️ Removed deprecated `GoogleLLMOpenAIBetaService` from
|
||||
`pipecat.services.google.openai`. Use `GoogleLLMService` from
|
||||
`pipecat.services.google.llm` instead.
|
||||
(PR [#4208](https://github.com/pipecat-ai/pipecat/pull/4208))
|
||||
|
||||
- ⚠️ Removed deprecated `OpenAIRealtimeBetaLLMService` and
|
||||
`AzureRealtimeBetaLLMService`. Use `OpenAIRealtimeLLMService` and
|
||||
`AzureRealtimeLLMService` from `pipecat.services.openai.realtime` and
|
||||
`pipecat.services.azure.realtime` instead.
|
||||
(PR [#4208](https://github.com/pipecat-ai/pipecat/pull/4208))
|
||||
|
||||
- ⚠️ Removed deprecated `pipecat.services.ai_services` module. Import from
|
||||
`pipecat.services.ai_service`, `pipecat.services.llm_service`,
|
||||
`pipecat.services.stt_service`, `pipecat.services.tts_service`, etc. instead.
|
||||
(PR [#4208](https://github.com/pipecat-ai/pipecat/pull/4208))
|
||||
|
||||
- ⚠️ Removed deprecated `pipecat.services.gemini_multimodal_live` package. Use
|
||||
`pipecat.services.google.gemini_live` instead. Note that class names no
|
||||
longer include "Multimodal" (e.g. `GeminiMultimodalLiveLLMService` →
|
||||
`GeminiLiveLLMService`).
|
||||
(PR [#4208](https://github.com/pipecat-ai/pipecat/pull/4208))
|
||||
|
||||
- ⚠️ Removed deprecated `pipecat.services.google.gemini_live.llm_vertex`
|
||||
module. Use `pipecat.services.google.gemini_live.vertex.llm` instead.
|
||||
(PR [#4208](https://github.com/pipecat-ai/pipecat/pull/4208))
|
||||
|
||||
- ⚠️ Removed deprecated `pipecat.services.nim` package. Use
|
||||
`pipecat.services.nvidia.llm` instead (`NimLLMService` → `NvidiaLLMService`).
|
||||
(PR [#4208](https://github.com/pipecat-ai/pipecat/pull/4208))
|
||||
|
||||
- ⚠️ Removed deprecated `pipecat.services.deepgram.stt_sagemaker` and
|
||||
`pipecat.services.deepgram.tts_sagemaker` modules. Use
|
||||
`pipecat.services.deepgram.sagemaker.stt` and
|
||||
`pipecat.services.deepgram.sagemaker.tts` instead.
|
||||
(PR [#4208](https://github.com/pipecat-ai/pipecat/pull/4208))
|
||||
|
||||
- ⚠️ Removed deprecated `pipecat.services.aws_nova_sonic` package. Use
|
||||
`pipecat.services.aws.nova_sonic` instead.
|
||||
(PR [#4208](https://github.com/pipecat-ai/pipecat/pull/4208))
|
||||
|
||||
- ⚠️ Removed deprecated `pipecat.services.riva` package. Use
|
||||
`pipecat.services.nvidia.stt` and `pipecat.services.nvidia.tts` instead
|
||||
(`RivaSTTService` → `NvidiaSTTService`, `RivaTTSService` →
|
||||
`NvidiaTTSService`).
|
||||
(PR [#4208](https://github.com/pipecat-ai/pipecat/pull/4208))
|
||||
|
||||
- ⚠️ Removed deprecated compatibility modules:
|
||||
`pipecat.services.openai_realtime_beta` (use
|
||||
`pipecat.services.openai.realtime`),
|
||||
`pipecat.services.openai_realtime.context`,
|
||||
`pipecat.services.openai_realtime.frames`,
|
||||
`pipecat.services.openai.realtime.context`,
|
||||
`pipecat.services.openai.realtime.frames`,
|
||||
`pipecat.services.gemini_multimodal_live` (use
|
||||
`pipecat.services.google.gemini_live`),
|
||||
`pipecat.services.aws_nova_sonic.context` (use
|
||||
`pipecat.services.aws.nova_sonic`), `pipecat.services.google.openai` and
|
||||
`pipecat.services.google.llm_openai` (use `pipecat.services.google.llm`).
|
||||
(PR [#4215](https://github.com/pipecat-ai/pipecat/pull/4215))
|
||||
|
||||
- ⚠️ Removed `VisionImageFrameAggregator` (from
|
||||
`pipecat.processors.aggregators.vision_image_frame`). Vision/image handling
|
||||
is now built into `LLMContext` (from
|
||||
`pipecat.processors.aggregators.llm_context`). See the `12*` examples for the
|
||||
recommended replacement pattern.
|
||||
(PR [#4215](https://github.com/pipecat-ai/pipecat/pull/4215))
|
||||
|
||||
- ⚠️ Removed `OpenAILLMContext`, `OpenAILLMContextFrame`, and
|
||||
`OpenAILLMContext.from_messages()`. Use `LLMContext` (from
|
||||
`pipecat.processors.aggregators.llm_context`) and `LLMContextFrame` (from
|
||||
`pipecat.frames.frames`) instead. All services now exclusively use the
|
||||
universal `LLMContext`.
|
||||
|
||||
From the developer's point of view, migrating will usually be a matter of
|
||||
going from this:
|
||||
|
||||
```python
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
```
|
||||
|
||||
To this:
|
||||
|
||||
```python
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
|
||||
context = LLMContext(messages, tools)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
```
|
||||
(PR [#4215](https://github.com/pipecat-ai/pipecat/pull/4215))
|
||||
|
||||
- ⚠️ Removed deprecated frame types `LLMMessagesFrame` and
|
||||
`OpenAILLMContextAssistantTimestampFrame` from `pipecat.frames.frames`.
|
||||
Instead of `LLMMessagesFrame`, use `LLMContextFrame` with the new messages,
|
||||
or `LLMMessagesUpdateFrame` with `run_llm=True`.
|
||||
(PR [#4215](https://github.com/pipecat-ai/pipecat/pull/4215))
|
||||
|
||||
- ⚠️ Removed `GatedOpenAILLMContextAggregator` (from
|
||||
`pipecat.processors.aggregators.gated_open_ai_llm_context`). Use
|
||||
`GatedLLMContextAggregator` (from
|
||||
`pipecat.processors.aggregators.gated_llm_context`) instead.
|
||||
(PR [#4215](https://github.com/pipecat-ai/pipecat/pull/4215))
|
||||
|
||||
- ⚠️ Removed deprecated service-specific context and aggregator machinery,
|
||||
which was superseded by the universal `LLMContext` system.
|
||||
|
||||
Service-specific classes removed: `AnthropicLLMContext`,
|
||||
`AnthropicContextAggregatorPair`, `AWSBedrockLLMContext`,
|
||||
`AWSBedrockContextAggregatorPair`, `OpenAIContextAggregatorPair`, and their
|
||||
user/assistant aggregators. Also removed `create_context_aggregator()` from
|
||||
`LLMService`, `OpenAILLMService`, `AnthropicLLMService`, and
|
||||
`AWSBedrockLLMService`.
|
||||
|
||||
Base aggregator classes removed (from
|
||||
`pipecat.processors.aggregators.llm_response`): `BaseLLMResponseAggregator`,
|
||||
`LLMContextResponseAggregator`, `LLMUserContextAggregator`,
|
||||
`LLMAssistantContextAggregator`, `LLMUserResponseAggregator`,
|
||||
`LLMAssistantResponseAggregator`.
|
||||
|
||||
From the developer's point of view, migrating will usually be a matter of
|
||||
going from this:
|
||||
|
||||
```python
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
```
|
||||
|
||||
To this:
|
||||
|
||||
```python
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
|
||||
context = LLMContext(messages, tools)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
```
|
||||
(PR [#4215](https://github.com/pipecat-ai/pipecat/pull/4215))
|
||||
|
||||
- ⚠️ Removed deprecated service parameters and shims that have been replaced by
|
||||
the `settings=Service.Settings(...)` pattern or direct `__init__` parameters:
|
||||
- `PollyTTSService` alias (use `AWSTTSService`)
|
||||
- `TTSService`: `text_aggregator`, `text_filter` init params
|
||||
- `AWSNovaSonicLLMService`: `send_transcription_frames` init param
|
||||
- `DeepgramSTTService`: `url` init param (use `base_url`)
|
||||
- `FishAudioTTSService`: `model` init param (use `reference_id` or
|
||||
`settings`)
|
||||
- `GladiaSTTService`: `language` and `confidence` from `GladiaInputParams`,
|
||||
`InputParams` class alias
|
||||
- `GeminiTTSService`: `api_key` init param
|
||||
- `GeminiLiveLLMService`: `base_url` init param (use `http_options`)
|
||||
- `GoogleVertexLLMService`: `InputParams` class with
|
||||
`location`/`project_id` fields (use direct init params); `project_id` is now
|
||||
required, `location` defaults to `"us-east4"`
|
||||
- `MiniMaxHttpTTSService`: `english_normalization` from `InputParams` (use
|
||||
`text_normalization`)
|
||||
- `SimliVideoService`: `simli_config` init param (use `api_key`/`face_id`),
|
||||
`use_turn_server` init param; `api_key` and `face_id` are now required
|
||||
- `AnthropicLLMService`: `enable_prompt_caching_beta` from `InputParams`
|
||||
(use `enable_prompt_caching`)
|
||||
(PR [#4220](https://github.com/pipecat-ai/pipecat/pull/4220))
|
||||
|
||||
- ⚠️ Removed deprecated `pipecat.transports.services` and
|
||||
`pipecat.transports.network` module aliases. Update imports to use
|
||||
`pipecat.transports.daily.transport`, `pipecat.transports.livekit.transport`,
|
||||
`pipecat.transports.websocket.*`, `pipecat.transports.webrtc.*`, and
|
||||
`pipecat.transports.daily.utils` respectively.
|
||||
(PR [#4225](https://github.com/pipecat-ai/pipecat/pull/4225))
|
||||
|
||||
- ⚠️ Removed deprecated `pipecat.sync` package. Use `pipecat.utils.sync`
|
||||
instead.
|
||||
(PR [#4225](https://github.com/pipecat-ai/pipecat/pull/4225))
|
||||
|
||||
- ⚠️ Removed deprecated `TranscriptionMessage`, `ThoughtTranscriptionMessage`,
|
||||
and `TranscriptionUpdateFrame` from `pipecat.frames.frames`.
|
||||
(PR [#4228](https://github.com/pipecat-ai/pipecat/pull/4228))
|
||||
|
||||
- ⚠️ Removed deprecated `allow_interruptions` parameter from `PipelineParams`,
|
||||
`StartFrame`, and `FrameProcessor`. Interruptions are now always allowed by
|
||||
default. Use `LLMUserAggregator`'s `user_turn_strategies` /
|
||||
`user_mute_strategies` parameters to control interruption behavior.
|
||||
(PR [#4228](https://github.com/pipecat-ai/pipecat/pull/4228))
|
||||
|
||||
- ⚠️ Removed deprecated `STTMuteFilter`, `STTMuteConfig`, and `STTMuteStrategy`
|
||||
from `pipecat.processors.filters.stt_mute_filter`. Use
|
||||
`pipecat.turns.user_mute` strategies with `LLMUserAggregator`'s
|
||||
`user_mute_strategies` parameter instead.
|
||||
(PR [#4228](https://github.com/pipecat-ai/pipecat/pull/4228))
|
||||
|
||||
- ⚠️ Removed deprecated `pipecat.processors.transcript_processor` module
|
||||
(`TranscriptProcessor`, `TranscriptProcessorConfig`). Use pipeline observers
|
||||
instead.
|
||||
(PR [#4228](https://github.com/pipecat-ai/pipecat/pull/4228))
|
||||
|
||||
- ⚠️ Removed deprecated `EmulateUserStartedSpeakingFrame` and
|
||||
`EmulateUserStoppedSpeakingFrame` frames, and the `emulated` field from
|
||||
`UserStartedSpeakingFrame` / `UserStoppedSpeakingFrame`.
|
||||
(PR [#4228](https://github.com/pipecat-ai/pipecat/pull/4228))
|
||||
|
||||
- ⚠️ Removed deprecated `interruption_strategies` parameter from
|
||||
`PipelineParams`, `StartFrame`, and `FrameProcessor`. Use
|
||||
`LLMUserAggregator`'s `user_turn_strategies` parameter instead.
|
||||
(PR [#4228](https://github.com/pipecat-ai/pipecat/pull/4228))
|
||||
|
||||
- ⚠️ Removed deprecated `pipecat.audio.interruptions` module
|
||||
(`BaseInterruptionStrategy`, `MinWordsInterruptionStrategy`). Use
|
||||
`pipecat.turns.user_start.MinWordsUserTurnStartStrategy` with
|
||||
`LLMUserAggregator`'s `user_turn_strategies` parameter instead.
|
||||
(PR [#4228](https://github.com/pipecat-ai/pipecat/pull/4228))
|
||||
|
||||
- ⚠️ Removed deprecated `pipecat.utils.tracing.class_decorators` module. Use
|
||||
`pipecat.utils.tracing.service_decorators` instead.
|
||||
(PR [#4228](https://github.com/pipecat-ai/pipecat/pull/4228))
|
||||
|
||||
- ⚠️ Removed deprecated `add_pattern_pair` method from `PatternPairAggregator`.
|
||||
Use `add_pattern` instead.
|
||||
(PR [#4228](https://github.com/pipecat-ai/pipecat/pull/4228))
|
||||
|
||||
- ⚠️ Removed deprecated `UserResponseAggregator` class from
|
||||
`pipecat.processors.aggregators.user_response`. Use `LLMUserAggregator`
|
||||
instead.
|
||||
(PR [#4228](https://github.com/pipecat-ai/pipecat/pull/4228))
|
||||
|
||||
- ⚠️ Removed `ExternalUserTurnStrategies` and the automatic fallback to it in
|
||||
`LLMUserAggregator` when a `SpeechControlParamsFrame` was received from the
|
||||
transport.
|
||||
(PR [#4229](https://github.com/pipecat-ai/pipecat/pull/4229))
|
||||
|
||||
- ⚠️ Removed `vad_analyzer` and `turn_analyzer` parameters from
|
||||
`TransportParams` and all transport input classes, along with all deprecated
|
||||
VAD/turn analysis logic in `BaseInputTransport`. VAD and turn detection are
|
||||
now handled entirely by `LLMUserAggregator`.
|
||||
(PR [#4229](https://github.com/pipecat-ai/pipecat/pull/4229))
|
||||
|
||||
- ⚠️ Removed deprecated `TranscriptionUserTurnStopStrategy` alias (deprecated
|
||||
in 0.0.102). Use `SpeechTimeoutUserTurnStopStrategy` instead.
|
||||
(PR [#4232](https://github.com/pipecat-ai/pipecat/pull/4232))
|
||||
|
||||
- ⚠️ Removed deprecated `vad_events` setting and `should_interrupt` parameter
|
||||
from `DeepgramSTTService` (deprecated in 0.0.99). Use Silero VAD for voice
|
||||
activity detection instead.
|
||||
(PR [#4232](https://github.com/pipecat-ai/pipecat/pull/4232))
|
||||
|
||||
- ⚠️ Removed deprecated `send_transcription_frames` parameter from
|
||||
`OpenAIRealtimeLLMService` (deprecated in 0.0.92). Transcription frames are
|
||||
always sent.
|
||||
(PR [#4232](https://github.com/pipecat-ai/pipecat/pull/4232))
|
||||
|
||||
- ⚠️ Removed deprecated `UserIdleProcessor` (deprecated in 0.0.100). Use
|
||||
`LLMUserAggregator` with the `user_idle_timeout` parameter instead.
|
||||
(PR [#4232](https://github.com/pipecat-ai/pipecat/pull/4232))
|
||||
|
||||
- ⚠️ Removed deprecated `UserBotLatencyLogObserver` (deprecated in 0.0.102).
|
||||
Use `UserBotLatencyObserver` with its `on_latency_measured` event handler
|
||||
instead.
|
||||
(PR [#4232](https://github.com/pipecat-ai/pipecat/pull/4232))
|
||||
|
||||
- ⚠️ Removed the `riva` install extra. Use `nvidia` instead (`pip install
|
||||
"pipecat-ai[nvidia]"`).
|
||||
(PR [#4235](https://github.com/pipecat-ai/pipecat/pull/4235))
|
||||
|
||||
- Removed the empty `remote-smart-turn` install extra (was already a no-op).
|
||||
(PR [#4235](https://github.com/pipecat-ai/pipecat/pull/4235))
|
||||
|
||||
- ⚠️ Removed `DeprecatedModuleProxy` and all service `__init__.py` re-export
|
||||
shims. Flat imports like `from pipecat.services.openai import
|
||||
OpenAILLMService` no longer work. Use the full submodule path instead: `from
|
||||
pipecat.services.openai.llm import OpenAILLMService`. This is already the
|
||||
established pattern across all examples and internal code.
|
||||
(PR [#4239](https://github.com/pipecat-ai/pipecat/pull/4239))
|
||||
|
||||
- ⚠️ Removed deprecated `PIPECAT_OBSERVER_FILES` environment variable support.
|
||||
Use `PIPECAT_SETUP_FILES` instead.
|
||||
(PR [#4267](https://github.com/pipecat-ai/pipecat/pull/4267))
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed `IdleFrameProcessor` where `asyncio.Event` was unconditionally cleared
|
||||
in a `finally` block instead of only on the success path.
|
||||
(PR [#3796](https://github.com/pipecat-ai/pipecat/pull/3796))
|
||||
|
||||
- Fixed MCPClient opening a new connection for every tool call instead of
|
||||
reusing the session.
|
||||
(PR [#4034](https://github.com/pipecat-ai/pipecat/pull/4034))
|
||||
|
||||
- GoogleLLMService now applies a low-latency thinking default
|
||||
(`thinking_level="minimal"`) for Gemini 3+ Flash models.
|
||||
(PR [#4067](https://github.com/pipecat-ai/pipecat/pull/4067))
|
||||
|
||||
- Fixed `WebsocketService` entering an infinite reconnection loop when a server
|
||||
accepts the WebSocket handshake but immediately closes the connection (e.g.
|
||||
invalid API key, close code 1008). The service now detects connections that
|
||||
fail repeatedly within seconds of being established and stops retrying after
|
||||
3 consecutive quick failures.
|
||||
(PR [#4201](https://github.com/pipecat-ai/pipecat/pull/4201))
|
||||
|
||||
- Fixed `InworldHttpTTSService` streaming responses crashing with
|
||||
`UnicodeDecodeError` when multi-byte UTF-8 characters were split across chunk
|
||||
boundaries. This caused TTS audio to cut off mid-sentence intermittently.
|
||||
(PR [#4202](https://github.com/pipecat-ai/pipecat/pull/4202))
|
||||
|
||||
- Fixed a crash (`JSONDecodeError`) when a user interruption occurs while the
|
||||
LLM is streaming function call arguments. Previously, the incomplete JSON
|
||||
arguments were passed directly to `json.loads()`, causing an unhandled
|
||||
exception. Affected services: OpenAI, Google (OpenAI-compatible), and
|
||||
SambaNova.
|
||||
(PR [#4203](https://github.com/pipecat-ai/pipecat/pull/4203))
|
||||
|
||||
- Fixed `BaseOutputTransport` discarding pending `UninterruptibleFrame` items
|
||||
(e.g. function-call context updates) when an interruption arrived. The audio
|
||||
task is now kept alive and only interruptible frames are drained when
|
||||
uninterruptible frames are present in the queue.
|
||||
(PR [#4217](https://github.com/pipecat-ai/pipecat/pull/4217))
|
||||
|
||||
- Fixed spurious LLM inference being triggered when a function call result
|
||||
arrived while the user was actively speaking. The context frame is now
|
||||
suppressed until the user stops speaking.
|
||||
(PR [#4217](https://github.com/pipecat-ai/pipecat/pull/4217))
|
||||
|
||||
- Fixed `CartesiaTTSService` failing with "Context has closed" errors when
|
||||
switching voice, model, or language via `TTSUpdateSettingsFrame`. The service
|
||||
now automatically flushes the current audio context and opens a fresh one
|
||||
when these settings change.
|
||||
(PR [#4220](https://github.com/pipecat-ai/pipecat/pull/4220))
|
||||
|
||||
- Fixed duplicate LLM replies that could occur when multiple async function
|
||||
call results arrived while an LLM request was already queued.
|
||||
(PR [#4230](https://github.com/pipecat-ai/pipecat/pull/4230))
|
||||
|
||||
- Fixed undefined `_warn_deprecated_param` calls in `OpenAIRealtimeLLMService`
|
||||
and `GrokRealtimeLLMService` for the deprecated `session_properties` init
|
||||
parameter.
|
||||
(PR [#4232](https://github.com/pipecat-ai/pipecat/pull/4232))
|
||||
|
||||
- Fixed Gemini Live bot hanging after a session resumption reconnect. Audio,
|
||||
video, and text input were silently dropped after reconnecting because the
|
||||
internal `_ready_for_realtime_input` flag was not being reset.
|
||||
(PR [#4242](https://github.com/pipecat-ai/pipecat/pull/4242))
|
||||
|
||||
- Fixed `VADController` getting stuck in the `SPEAKING` state when audio frames
|
||||
stop arriving mid-speech (e.g. user mutes mic). A new `audio_idle_timeout`
|
||||
parameter (default 1s, set to 0 to disable) forces a transition back to
|
||||
`QUIET` and emits `on_speech_stopped` when no audio is received while
|
||||
speaking.
|
||||
(PR [#4244](https://github.com/pipecat-ai/pipecat/pull/4244))
|
||||
|
||||
- Fixed `PipelineRunner._gc_collect()` blocking the event loop by running
|
||||
`gc.collect()` synchronously. Now offloaded via `asyncio.to_thread` to avoid
|
||||
stalling concurrent pipeline tasks.
|
||||
(PR [#4255](https://github.com/pipecat-ai/pipecat/pull/4255))
|
||||
|
||||
- Fixed `ElevenLabsTTSService` incorrectly enabling `auto_mode` when using
|
||||
`TextAggregationMode.TOKEN`. Auto mode disables server-side buffering and is
|
||||
designed for complete sentences — enabling it with token streaming degraded
|
||||
speech quality. The default is now derived automatically from the aggregation
|
||||
strategy: `auto_mode=True` for `SENTENCE`, `auto_mode=False` for `TOKEN`.
|
||||
Callers can still override by passing `auto_mode` explicitly.
|
||||
(PR [#4265](https://github.com/pipecat-ai/pipecat/pull/4265))
|
||||
|
||||
- Fixed `ValueError: write to closed file` during pipeline shutdown when
|
||||
observers were active. Observer proxy tasks are now cancelled before observer
|
||||
resources are cleaned up.
|
||||
(PR [#4267](https://github.com/pipecat-ai/pipecat/pull/4267))
|
||||
|
||||
- Fixed delayed turn completion when STT transcripts arrive after the p99
|
||||
timeout. Previously, a late transcript (beyond the p99 window) would fall
|
||||
through to the 5-second `user_turn_stop_timeout` fallback. Now the turn stop
|
||||
triggers immediately when the late transcript arrives.
|
||||
(PR [#4283](https://github.com/pipecat-ai/pipecat/pull/4283))
|
||||
|
||||
- Fixed `ElevenLabsTTSService` ignoring `enable_logging=False` and
|
||||
`enable_ssml_parsing=False`. The truthy check treated `False` the same as
|
||||
`None` (both skipped), and Python's `str(False)` produced `"False"` instead
|
||||
of the lowercase `"false"` expected by the API.
|
||||
(PR [#4293](https://github.com/pipecat-ai/pipecat/pull/4293))
|
||||
|
||||
- Fixed `on_assistant_turn_stopped` not resetting internal state when the LLM
|
||||
returned no text tokens. Added `interrupted` field to
|
||||
`AssistantTurnStoppedMessage` to indicate whether the assistant turn was
|
||||
interrupted.
|
||||
(PR [#4294](https://github.com/pipecat-ai/pipecat/pull/4294))
|
||||
|
||||
- Fixed `LLMContextSummarizer` failing with "No messages to summarize" when
|
||||
using `system_instruction` instead of a system-role message at the start of
|
||||
the context. The summarizer previously scanned the entire context for the
|
||||
first system message, which could match a mid-conversation injection (e.g.
|
||||
idle notifications) instead of the initial prompt, causing the summarization
|
||||
range to be empty.
|
||||
(PR [#4295](https://github.com/pipecat-ai/pipecat/pull/4295))
|
||||
|
||||
## [0.0.108] - 2026-03-27
|
||||
|
||||
### Added
|
||||
|
||||
@@ -1,62 +0,0 @@
|
||||
# Changelog
|
||||
|
||||
All notable changes to the **<project name>** SDK will be documented in this file.
|
||||
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
Please make sure to add your changes to the appropriate categories:
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
|
||||
<!-- for new functionality -->
|
||||
|
||||
- n/a
|
||||
|
||||
### Changed
|
||||
|
||||
<!-- for changed functionality -->
|
||||
|
||||
- n/a
|
||||
|
||||
### Deprecated
|
||||
|
||||
<!-- for soon-to-be removed functionality -->
|
||||
|
||||
- n/a
|
||||
|
||||
### Removed
|
||||
|
||||
<!-- for removed functionality -->
|
||||
|
||||
- n/a
|
||||
|
||||
### Fixed
|
||||
|
||||
<!-- for fixed bugs -->
|
||||
|
||||
- n/a
|
||||
|
||||
### Performance
|
||||
|
||||
<!-- for performance-relevant changes -->
|
||||
|
||||
- n/a
|
||||
|
||||
### Security
|
||||
|
||||
<!-- for security-relevant changes -->
|
||||
|
||||
- n/a
|
||||
|
||||
### Other
|
||||
|
||||
<!-- for everything else -->
|
||||
|
||||
- n/a
|
||||
|
||||
## [0.1.0] - YYYY-MM-DD
|
||||
|
||||
Initial release.
|
||||
34
README.md
34
README.md
@@ -28,6 +28,10 @@
|
||||
|
||||
## 🌐 Pipecat Ecosystem
|
||||
|
||||
### 🧩 Multi-agent systems
|
||||
|
||||
Need multiple AI agents working together? [Pipecat Subagents](https://github.com/pipecat-ai/pipecat-subagents) lets you build distributed multi-agent systems where each agent runs its own pipeline and communicates through a shared message bus. Hand off conversations between specialists, dispatch background tasks, and scale agents across processes or machines.
|
||||
|
||||
### 📱 Client SDKs
|
||||
|
||||
Building client applications? You can connect to Pipecat from any platform using our official SDKs:
|
||||
@@ -79,26 +83,26 @@ Catch new features, interviews, and how-tos on our [Pipecat TV](https://www.yout
|
||||
<a href="https://github.com/pipecat-ai/pipecat-examples/tree/main/simple-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat-examples/main/simple-chatbot/image.png" width="400" /></a>
|
||||
<a href="https://github.com/pipecat-ai/pipecat-examples/tree/main/storytelling-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat-examples/main/storytelling-chatbot/image.png" width="400" /></a>
|
||||
<br/>
|
||||
<a href="https://github.com/pipecat-ai/pipecat-examples/tree/main/translation-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat-examples/main/translation-chatbot/image.png" width="400" /></a>
|
||||
<a href="https://github.com/pipecat-ai/pipecat-examples/tree/main/daily-multi-translation"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat-examples/main/daily-multi-translation/image.png" width="400" /></a>
|
||||
<a href="https://github.com/pipecat-ai/pipecat/blob/main/examples/vision/vision-moondream.py"><img src="https://github.com/pipecat-ai/pipecat/blob/main/examples/assets/moondream.png" width="400" /></a>
|
||||
</p>
|
||||
|
||||
## 🧩 Available services
|
||||
|
||||
| Category | Services |
|
||||
| ------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Gradium](https://docs.pipecat.ai/server/services/stt/gradium), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Sarvam](https://docs.pipecat.ai/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/server/services/llm/mistral), [Nebius](https://docs.pipecat.ai/server/services/llm/nebius), [Novita](https://docs.pipecat.ai/server/services/llm/novita), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nvidia), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova), [Sarvam](https://docs.pipecat.ai/server/services/llm/sarvam), [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
|
||||
| Text-to-Speech | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Camb AI](https://docs.pipecat.ai/server/services/tts/camb), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Gradium](https://docs.pipecat.ai/server/services/tts/gradium), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Hume](https://docs.pipecat.ai/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [Kokoro](https://docs.pipecat.ai/server/services/tts/kokoro), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [Resemble](https://docs.pipecat.ai/server/services/tts/resemble), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [Smallest](https://docs.pipecat.ai/server/services/tts/smallest), [Speechmatics](https://docs.pipecat.ai/server/services/tts/speechmatics), [xAI](https://docs.pipecat.ai/server/services/tts/xai), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
|
||||
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [Grok Voice Agent](https://docs.pipecat.ai/server/services/s2s/grok), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai), [Ultravox](https://docs.pipecat.ai/server/services/s2s/ultravox), |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [LiveKit (WebRTC)](https://docs.pipecat.ai/server/services/transport/livekit), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), [WhatsApp](https://docs.pipecat.ai/server/services/transport/whatsapp), Local |
|
||||
| Serializers | [Exotel](https://docs.pipecat.ai/server/services/serializers/exotel), [Genesys](https://docs.pipecat.ai/server/services/serializers/genesys), [Plivo](https://docs.pipecat.ai/server/services/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/services/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/services/serializers/telnyx), [Vonage](https://docs.pipecat.ai/server/services/serializers/vonage) |
|
||||
| Video | [HeyGen](https://docs.pipecat.ai/server/services/video/heygen), [LemonSlice](https://docs.pipecat.ai/server/services/transport/lemonslice), [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
|
||||
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
|
||||
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/google-imagen), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
|
||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp Viva](https://docs.pipecat.ai/guides/features/krisp-viva), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [ai-coustics](https://docs.pipecat.ai/server/utilities/audio/aic-filter), [RNNoise](https://docs.pipecat.ai/server/utilities/audio/rnnoise-filter) |
|
||||
| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
|
||||
| Community | [Browse community integrations →](https://docs.pipecat.ai/server/services/community-integrations) |
|
||||
| Category | Services |
|
||||
| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Gradium](https://docs.pipecat.ai/server/services/stt/gradium), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [Mistral](https://docs.pipecat.ai/server/services/stt/mistral), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Sarvam](https://docs.pipecat.ai/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/server/services/llm/mistral), [Nebius](https://docs.pipecat.ai/server/services/llm/nebius), [Novita](https://docs.pipecat.ai/server/services/llm/novita), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nvidia), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova), [Sarvam](https://docs.pipecat.ai/server/services/llm/sarvam), [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
|
||||
| Text-to-Speech | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Camb AI](https://docs.pipecat.ai/server/services/tts/camb), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Gradium](https://docs.pipecat.ai/server/services/tts/gradium), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Hume](https://docs.pipecat.ai/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [Kokoro](https://docs.pipecat.ai/server/services/tts/kokoro), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Mistral](https://docs.pipecat.ai/server/services/tts/mistral), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [Resemble](https://docs.pipecat.ai/server/services/tts/resemble), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [Smallest](https://docs.pipecat.ai/server/services/tts/smallest), [Speechmatics](https://docs.pipecat.ai/server/services/tts/speechmatics), [xAI](https://docs.pipecat.ai/server/services/tts/xai), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
|
||||
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [Grok Voice Agent](https://docs.pipecat.ai/server/services/s2s/grok), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai), [Ultravox](https://docs.pipecat.ai/server/services/s2s/ultravox), |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [LiveKit (WebRTC)](https://docs.pipecat.ai/server/services/transport/livekit), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), [WhatsApp](https://docs.pipecat.ai/server/services/transport/whatsapp), Local |
|
||||
| Serializers | [Exotel](https://docs.pipecat.ai/server/services/serializers/exotel), [Genesys](https://docs.pipecat.ai/server/services/serializers/genesys), [Plivo](https://docs.pipecat.ai/server/services/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/services/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/services/serializers/telnyx), [Vonage](https://docs.pipecat.ai/server/services/serializers/vonage) |
|
||||
| Video | [HeyGen](https://docs.pipecat.ai/server/services/video/heygen), [LemonSlice](https://docs.pipecat.ai/server/services/transport/lemonslice), [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
|
||||
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
|
||||
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/google-imagen), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
|
||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp Viva](https://docs.pipecat.ai/guides/features/krisp-viva), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [ai-coustics](https://docs.pipecat.ai/server/utilities/audio/aic-filter), [RNNoise](https://docs.pipecat.ai/server/utilities/audio/rnnoise-filter) |
|
||||
| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
|
||||
| Community | [Browse community integrations →](https://docs.pipecat.ai/server/services/community-integrations) |
|
||||
|
||||
📚 [View full services documentation →](https://docs.pipecat.ai/server/services/supported-services)
|
||||
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
- Updated `onnxruntime` from 1.23.2 to 1.24.3, adding support for Python 3.14.
|
||||
@@ -1 +0,0 @@
|
||||
- MCPClient now requires async with MCPClient(...) as mcp: or explicit start()/close() calls to manage the connection lifecycle.
|
||||
@@ -1 +0,0 @@
|
||||
- Fixed MCPClient opening a new connection for every tool call instead of reusing the session.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Added WebSocket-based `OpenAIResponsesLLMService` as the new default for the OpenAI Responses API. It maintains a persistent connection to `wss://api.openai.com/v1/responses` and automatically uses `previous_response_id` to send only incremental context, falling back to full context on reconnection or cache miss. The previous HTTP-based implementation is now available as `OpenAIResponsesHttpLLMService`.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed `OpenPipeLLMService` and the `openpipe` extra. OpenPipe was acquired by CoreWeave and the package is no longer maintained. If you were using `openpipe` as an LLM provider, switch to the underlying provider directly (e.g. `openai`). The OpenPipe interface can still be used with `OpenAILLMService` by specifying a `base_url`.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Updated `langchain` extra to require langchain 1.x (from 0.3.x), langchain-community 0.4.x (from 0.3.x), and langchain-openai 1.x (from 0.3.x). If you pin these packages in your project, update your pins accordingly.
|
||||
1
changelog/4199.added.2.md
Normal file
1
changelog/4199.added.2.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `VADUserTurnStopStrategy`, a VAD-only turn stop strategy that triggers the end of a user turn as soon as VAD reports the user stopped speaking. Intended for realtime speech-to-speech pipelines that rely solely on VAD and don't use a turn analyzer or STT transcriptions to decide end of turn.
|
||||
1
changelog/4199.added.md
Normal file
1
changelog/4199.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `VADTurnAnalyzerUserTurnStopStrategy`, a turn stop strategy that triggers immediately when the turn analyzer reports COMPLETE without waiting for STT transcriptions. This reduces end-of-turn latency for speech-to-speech pipelines (e.g. Gemini Live) where audio goes directly to the LLM.
|
||||
@@ -1 +0,0 @@
|
||||
- Fixed `InworldHttpTTSService` streaming responses crashing with `UnicodeDecodeError` when multi-byte UTF-8 characters were split across chunk boundaries. This caused TTS audio to cut off mid-sentence intermittently.
|
||||
@@ -1 +0,0 @@
|
||||
- Fixed a crash (`JSONDecodeError`) when a user interruption occurs while the LLM is streaming function call arguments. Previously, the incomplete JSON arguments were passed directly to `json.loads()`, causing an unhandled exception. Affected services: OpenAI, Google (OpenAI-compatible), and SambaNova.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated `observers` field from `PipelineParams`. Pass observers directly to `PipelineTask` constructor instead.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated `on_pipeline_ended`, `on_pipeline_cancelled`, and `on_pipeline_stopped` events from `PipelineTask`. Use `on_pipeline_finished` instead.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed `AudioBufferProcessor.user_continuous_stream` parameter. Use `user_audio_passthrough` instead.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated `camera_in_enabled`, `camera_in_is_live`, `camera_in_width`, `camera_in_height`, `camera_out_enabled`, `camera_out_is_live`, `camera_out_width`, `camera_out_height`, and `camera_out_color` transport params. Use the `video_in_*` and `video_out_*` equivalents instead.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed `RTVIObserver.errors_enabled` parameter.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated `vad_enabled` and `vad_audio_passthrough` transport params.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed `TTSService.say()`. Push a `TTSSpeakFrame` into the pipeline instead.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed `DailyRunner.configure_with_args()`. Use `PipelineRunner` with `RunnerArguments` instead.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated RTVI models, frames, and processor methods including `RTVIConfig`, `RTVIServiceConfig`, `RTVIServiceOptionConfig`, various `RTVI*Data` models, `RTVIActionFrame`, and `RTVIProcessor.handle_function_call`/`handle_function_call_start`. Use the updated RTVI processor API instead.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed `FrameProcessor.wait_for_task()`. Use `create_task()` and manage tasks with the built-in `TaskManager` instead.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed `KrispFilter`. The `krisp` extra has been removed from `pyproject.toml`.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed `LLMService.request_image_frame()`. Push a `UserImageRequestFrame` instead.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed `create_default_resampler()` from `pipecat.audio.utils`.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed `FalSmartTurnAnalyzer` and `LocalSmartTurnAnalyzer`.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated transport frames: `TransportMessageFrame`, `TransportMessageUrgentFrame`, `InputTransportMessageUrgentFrame`, `DailyTransportMessageFrame`, and `DailyTransportMessageUrgentFrame`. Use `OutputTransportMessageFrame`, `OutputTransportMessageUrgentFrame`, `InputTransportMessageFrame`, `DailyOutputTransportMessageFrame`, and `DailyOutputTransportMessageUrgentFrame` instead.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated `KeypadEntryFrame` alias.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated interruption frames: `StartInterruptionFrame` and `BotInterruptionFrame`. Use `InterruptionFrame` and `InterruptionTaskFrame` instead.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed `LLMService.start_callback` parameter. Register an `on_llm_response_start` event handler instead.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed single-argument function call support from `LLMService`. Functions must use named parameters instead of a single `arguments` parameter.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed `NoisereduceFilter`. Use system-level noise reduction or a service-based alternative instead.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated `pipecat.services.riva` package. Use `pipecat.services.nvidia.stt` and `pipecat.services.nvidia.tts` instead (`RivaSTTService` → `NvidiaSTTService`, `RivaTTSService` → `NvidiaTTSService`).
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated `pipecat.services.nim` package. Use `pipecat.services.nvidia.llm` instead (`NimLLMService` → `NvidiaLLMService`).
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated `pipecat.services.gemini_multimodal_live` package. Use `pipecat.services.google.gemini_live` instead. Note that class names no longer include "Multimodal" (e.g. `GeminiMultimodalLiveLLMService` → `GeminiLiveLLMService`).
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated `pipecat.services.aws_nova_sonic` package. Use `pipecat.services.aws.nova_sonic` instead.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated `pipecat.services.openai_realtime` package. Use `pipecat.services.openai.realtime` instead.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated `OpenAIRealtimeBetaLLMService` and `AzureRealtimeBetaLLMService`. Use `OpenAIRealtimeLLMService` and `AzureRealtimeLLMService` from `pipecat.services.openai.realtime` and `pipecat.services.azure.realtime` instead.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated `pipecat.services.deepgram.stt_sagemaker` and `pipecat.services.deepgram.tts_sagemaker` modules. Use `pipecat.services.deepgram.sagemaker.stt` and `pipecat.services.deepgram.sagemaker.tts` instead.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated `GoogleLLMOpenAIBetaService` from `pipecat.services.google.openai`. Use `GoogleLLMService` from `pipecat.services.google.llm` instead.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated `pipecat.services.google.llm_vertex` module. Use `pipecat.services.google.vertex.llm` instead.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated `pipecat.services.google.gemini_live.llm_vertex` module. Use `pipecat.services.google.gemini_live.vertex.llm` instead.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated `pipecat.services.ai_services` module. Import from `pipecat.services.ai_service`, `pipecat.services.llm_service`, `pipecat.services.stt_service`, `pipecat.services.tts_service`, etc. instead.
|
||||
@@ -1 +0,0 @@
|
||||
- Changed `GrokLLMService` default model from `grok-3-beta` to `grok-3`, now that the model is generally available.
|
||||
@@ -1 +0,0 @@
|
||||
- `GoogleImageGenService` now defaults to `imagen-4.0-generate-001` (previously `imagen-3.0-generate-002`).
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ `BaseOpenAILLMService.get_chat_completions()` now accepts an `LLMContext` instead of `OpenAILLMInvocationParams`. If you override this method, update your signature accordingly.
|
||||
@@ -1,22 +0,0 @@
|
||||
- ⚠️ Removed deprecated service-specific context and aggregator machinery, which was superseded by the universal `LLMContext` system.
|
||||
|
||||
Service-specific classes removed: `AnthropicLLMContext`, `AnthropicContextAggregatorPair`, `AWSBedrockLLMContext`, `AWSBedrockContextAggregatorPair`, `OpenAIContextAggregatorPair`, and their user/assistant aggregators. Also removed `create_context_aggregator()` from `LLMService`, `OpenAILLMService`, `AnthropicLLMService`, and `AWSBedrockLLMService`.
|
||||
|
||||
Base aggregator classes removed (from `pipecat.processors.aggregators.llm_response`): `BaseLLMResponseAggregator`, `LLMContextResponseAggregator`, `LLMUserContextAggregator`, `LLMAssistantContextAggregator`, `LLMUserResponseAggregator`, `LLMAssistantResponseAggregator`.
|
||||
|
||||
From the developer's point of view, migrating will usually be a matter of going from this:
|
||||
|
||||
```python
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
```
|
||||
|
||||
To this:
|
||||
|
||||
```python
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
|
||||
context = LLMContext(messages, tools)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
```
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated frame types `LLMMessagesFrame` and `OpenAILLMContextAssistantTimestampFrame` from `pipecat.frames.frames`. Instead of `LLMMessagesFrame`, use `LLMContextFrame` with the new messages, or `LLMMessagesUpdateFrame` with `run_llm=True`.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed `GatedOpenAILLMContextAggregator` (from `pipecat.processors.aggregators.gated_open_ai_llm_context`). Use `GatedLLMContextAggregator` (from `pipecat.processors.aggregators.gated_llm_context`) instead.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed `VisionImageFrameAggregator` (from `pipecat.processors.aggregators.vision_image_frame`). Vision/image handling is now built into `LLMContext` (from `pipecat.processors.aggregators.llm_context`). See the `12*` examples for the recommended replacement pattern.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated compatibility modules: `pipecat.services.openai_realtime_beta` (use `pipecat.services.openai.realtime`), `pipecat.services.openai_realtime.context`, `pipecat.services.openai_realtime.frames`, `pipecat.services.openai.realtime.context`, `pipecat.services.openai.realtime.frames`, `pipecat.services.gemini_multimodal_live` (use `pipecat.services.google.gemini_live`), `pipecat.services.aws_nova_sonic.context` (use `pipecat.services.aws.nova_sonic`), `pipecat.services.google.openai` and `pipecat.services.google.llm_openai` (use `pipecat.services.google.llm`).
|
||||
@@ -1,18 +0,0 @@
|
||||
- ⚠️ Removed `OpenAILLMContext`, `OpenAILLMContextFrame`, and `OpenAILLMContext.from_messages()`. Use `LLMContext` (from `pipecat.processors.aggregators.llm_context`) and `LLMContextFrame` (from `pipecat.frames.frames`) instead. All services now exclusively use the universal `LLMContext`.
|
||||
|
||||
From the developer's point of view, migrating will usually be a matter of going from this:
|
||||
|
||||
```python
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
```
|
||||
|
||||
To this:
|
||||
|
||||
```python
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
|
||||
context = LLMContext(messages, tools)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
```
|
||||
@@ -1 +0,0 @@
|
||||
- Added `enable_prompt_caching` setting to `AWSBedrockLLMService` for Bedrock ConverseStream prompt caching.
|
||||
@@ -1 +0,0 @@
|
||||
- Fixed `CartesiaTTSService` failing with "Context has closed" errors when switching voice, model, or language via `TTSUpdateSettingsFrame`. The service now automatically flushes the current audio context and opens a fresh one when these settings change.
|
||||
@@ -1,13 +0,0 @@
|
||||
- ⚠️ Removed deprecated service parameters and shims that have been replaced by the `settings=Service.Settings(...)` pattern or direct `__init__` parameters:
|
||||
- `PollyTTSService` alias (use `AWSTTSService`)
|
||||
- `TTSService`: `text_aggregator`, `text_filter` init params
|
||||
- `AWSNovaSonicLLMService`: `send_transcription_frames` init param
|
||||
- `DeepgramSTTService`: `url` init param (use `base_url`)
|
||||
- `FishAudioTTSService`: `model` init param (use `reference_id` or `settings`)
|
||||
- `GladiaSTTService`: `language` and `confidence` from `GladiaInputParams`, `InputParams` class alias
|
||||
- `GeminiTTSService`: `api_key` init param
|
||||
- `GeminiLiveLLMService`: `base_url` init param (use `http_options`)
|
||||
- `GoogleVertexLLMService`: `InputParams` class with `location`/`project_id` fields (use direct init params); `project_id` is now required, `location` defaults to `"us-east4"`
|
||||
- `MiniMaxHttpTTSService`: `english_normalization` from `InputParams` (use `text_normalization`)
|
||||
- `SimliVideoService`: `simli_config` init param (use `api_key`/`face_id`), `use_turn_server` init param; `api_key` and `face_id` are now required
|
||||
- `AnthropicLLMService`: `enable_prompt_caching_beta` from `InputParams` (use `enable_prompt_caching`)
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ `LLMService.function_call_timeout_secs` now defaults to `None` instead of `10.0`. Deferred function calls will run indefinitely unless a timeout is explicitly set at the service level or per-call. If you relied on the previous 10-second default, pass `function_call_timeout_secs=10.0` explicitly.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated `pipecat.sync` package. Use `pipecat.utils.sync` instead.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated `pipecat.transports.services` and `pipecat.transports.network` module aliases. Update imports to use `pipecat.transports.daily.transport`, `pipecat.transports.livekit.transport`, `pipecat.transports.websocket.*`, `pipecat.transports.webrtc.*`, and `pipecat.transports.daily.utils` respectively.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated `add_pattern_pair` method from `PatternPairAggregator`. Use `add_pattern` instead.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated `interruption_strategies` parameter from `PipelineParams`, `StartFrame`, and `FrameProcessor`. Use `LLMUserAggregator`'s `user_turn_strategies` parameter instead.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated `EmulateUserStartedSpeakingFrame` and `EmulateUserStoppedSpeakingFrame` frames, and the `emulated` field from `UserStartedSpeakingFrame` / `UserStoppedSpeakingFrame`.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated `pipecat.audio.interruptions` module (`BaseInterruptionStrategy`, `MinWordsInterruptionStrategy`). Use `pipecat.turns.user_start.MinWordsUserTurnStartStrategy` with `LLMUserAggregator`'s `user_turn_strategies` parameter instead.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated `pipecat.processors.transcript_processor` module (`TranscriptProcessor`, `TranscriptProcessorConfig`). Use pipeline observers instead.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated `TranscriptionMessage`, `ThoughtTranscriptionMessage`, and `TranscriptionUpdateFrame` from `pipecat.frames.frames`.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated `STTMuteFilter`, `STTMuteConfig`, and `STTMuteStrategy` from `pipecat.processors.filters.stt_mute_filter`. Use `pipecat.turns.user_mute` strategies with `LLMUserAggregator`'s `user_mute_strategies` parameter instead.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated `UserResponseAggregator` class from `pipecat.processors.aggregators.user_response`. Use `LLMUserAggregator` instead.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated `pipecat.utils.tracing.class_decorators` module. Use `pipecat.utils.tracing.service_decorators` instead.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed deprecated `allow_interruptions` parameter from `PipelineParams`, `StartFrame`, and `FrameProcessor`. Interruptions are now always allowed by default. Use `LLMUserAggregator`'s `user_turn_strategies` / `user_mute_strategies` parameters to control interruption behavior.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed `ExternalUserTurnStrategies` and the automatic fallback to it in `LLMUserAggregator` when a `SpeechControlParamsFrame` was received from the transport.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Removed `vad_analyzer` and `turn_analyzer` parameters from `TransportParams` and all transport input classes, along with all deprecated VAD/turn analysis logic in `BaseInputTransport`. VAD and turn detection are now handled entirely by `LLMUserAggregator`.
|
||||
1
changelog/4253.added.md
Normal file
1
changelog/4253.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `MistralSTTService` for real-time speech-to-text using Mistral's Voxtral Realtime API (`voxtral-mini-transcribe-realtime-2602`). Supports streaming transcription with interim results, automatic language detection, and VAD-driven utterance lifecycle.
|
||||
1
changelog/4304.fixed.md
Normal file
1
changelog/4304.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed `pipecat-ai[tavus]` not installing the required `daily-python` dependency. Installing the `tavus` extra now correctly pulls in `pipecat-ai[daily]`.
|
||||
1
changelog/4311.changed.md
Normal file
1
changelog/4311.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- STT services now reconnect safely when settings change: reconnection is deferred until the current user turn ends (i.e., until `UserStoppedSpeakingFrame` is received) rather than interrupting an active speech session. Audio frames received while the reconnect is in progress are buffered and replayed once the new connection is ready. `CartesiaSTTService` and `DeepgramSTTService` both use this new behavior.
|
||||
1
changelog/4311.fixed.md
Normal file
1
changelog/4311.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed audio loss and potential errors when STT settings were updated mid-speech. Previously, `CartesiaSTTService` and `DeepgramSTTService` would immediately disconnect and reconnect when settings changed, dropping any in-flight audio. Reconnection is now deferred until the user stops speaking, and audio arriving during the reconnect window is buffered and replayed.
|
||||
1
changelog/4313.added.2.md
Normal file
1
changelog/4313.added.2.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `buttons` field to `OutputDTMFFrame` and `OutputDTMFUrgentFrame` for sending multi-key DTMF sequences as a `list[KeypadEntry]`. Use `OutputDTMFFrame.from_string("123#")` (or the equivalent on `OutputDTMFUrgentFrame`) to build one from a dial string, and `to_string()` to convert back.
|
||||
1
changelog/4313.added.3.md
Normal file
1
changelog/4313.added.3.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `DailyOutputDTMFFrame` and `DailyOutputDTMFUrgentFrame` frames. In addition to the inherited `buttons`, they accept `session_id`, `digit_duration_ms` and `method`, which are forwarded to Daily's `send_dtmf` as `sessionId`, `digitDurationMs` and `method`.
|
||||
1
changelog/4313.added.md
Normal file
1
changelog/4313.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `DailyTransport.send_dtmf()` to expose the Daily call client's DTMF sending capability, enabling applications to send tones during a call (e.g. IVR navigation).
|
||||
1
changelog/4314.changed.md
Normal file
1
changelog/4314.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Reduced debug log noise for LLM services. The system instruction is now logged once when composed (e.g. when turn completion is enabled) instead of on every LLM call. Per-call logs now show only the conversation messages, consistent across Google, Anthropic, AWS, and OpenAI services.
|
||||
1
changelog/4324.added.md
Normal file
1
changelog/4324.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added incremental `pyright` type checking. A `pyrightconfig.json` at the repo root uses `typeCheckingMode: "basic"` with an explicit `include` list of modules that pass cleanly (`clocks`, `metrics`, `transcriptions`, `frames`, `observers`, `extensions`, `turns`, `pipeline`, `runner`). Remaining modules will be added in subsequent PRs. CI enforces the checked set via `uv run pyright` in the format workflow.
|
||||
1
changelog/4324.changed.md
Normal file
1
changelog/4324.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- `LiveKitRunnerArguments.token` is now a required `str` (previously `str | None` with a default of `None`). LiveKit requires a token to join a room, so the type now reflects reality. This only affects custom runners that construct `LiveKitRunnerArguments` directly; code consuming the argument from the standard runner is unaffected.
|
||||
1
changelog/4326.added.md
Normal file
1
changelog/4326.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added multilingual support to `DeepgramFluxSTTService` via a new `language_hints: list[Language]` setting. Works with Deepgram's new `flux-general-multi` model to bias transcription across English, Spanish, French, German, Hindi, Russian, Portuguese, Japanese, Italian, and Dutch. Omit the hints to use auto-detection, or pass a subset to bias toward expected languages. Hints can be updated mid-stream via `STTUpdateSettingsFrame` (sent as a Deepgram `Configure` control message, no reconnect) to support detect-then-lock flows.
|
||||
1
changelog/4326.changed.md
Normal file
1
changelog/4326.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- `TranscriptionFrame.language` and `InterimTranscriptionFrame.language` emitted by `DeepgramFluxSTTService` now reflect the language Deepgram detected for each turn (read from the `languages` field on Flux's `TurnInfo` event). On `flux-general-multi` this gives per-turn accuracy for downstream consumers (e.g. TTS voice selection). `flux-general-en` continues to emit `Language.EN`.
|
||||
@@ -1,108 +1,60 @@
|
||||
# Pipecat Documentation
|
||||
# Pipecat API Documentation
|
||||
|
||||
This directory contains the source files for auto-generating Pipecat's server API reference documentation.
|
||||
|
||||
## Setup
|
||||
|
||||
1. Install documentation dependencies:
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
2. Make the build scripts executable:
|
||||
|
||||
```bash
|
||||
chmod +x build-docs.sh rtd-test.py
|
||||
```
|
||||
This directory contains the source files for auto-generating Pipecat's API reference documentation.
|
||||
|
||||
## Building Documentation
|
||||
|
||||
From this directory, you can build the documentation in several ways:
|
||||
|
||||
### Local Build
|
||||
From this directory:
|
||||
|
||||
```bash
|
||||
# Using the build script (automatically opens docs when done)
|
||||
./build-docs.sh
|
||||
# Build docs (warnings shown but don't fail the build)
|
||||
cd docs/api && uv run ./build-docs.sh
|
||||
|
||||
# Or directly with sphinx-build
|
||||
sphinx-build -b html . _build/html -W --keep-going
|
||||
# Build with strict mode (warnings treated as errors)
|
||||
cd docs/api && uv run ./build-docs.sh --strict
|
||||
```
|
||||
|
||||
### ReadTheDocs Test Build
|
||||
The build script will:
|
||||
|
||||
To test the documentation build process exactly as it would run on ReadTheDocs:
|
||||
|
||||
```bash
|
||||
./rtd-test.py
|
||||
```
|
||||
|
||||
This script:
|
||||
|
||||
- Creates a fresh virtual environment
|
||||
- Installs all dependencies as specified in requirements files
|
||||
- Handles conflicting dependencies (like grpcio versions for Riva)
|
||||
- Builds the documentation in an isolated environment
|
||||
- Provides detailed logging of the build process
|
||||
|
||||
Use this script to verify your documentation will build correctly on ReadTheDocs before pushing changes.
|
||||
|
||||
## Viewing Documentation
|
||||
|
||||
The built documentation will be available at `_build/html/index.html`. To open:
|
||||
|
||||
```bash
|
||||
# On MacOS
|
||||
open _build/html/index.html
|
||||
|
||||
# On Linux
|
||||
xdg-open _build/html/index.html
|
||||
|
||||
# On Windows
|
||||
start _build/html/index.html
|
||||
```
|
||||
1. Install documentation dependencies via `uv sync --group docs`
|
||||
2. Clean previous build output
|
||||
3. Run `sphinx-build` to generate HTML documentation
|
||||
4. Open the result in your browser (macOS)
|
||||
|
||||
## Directory Structure
|
||||
|
||||
```
|
||||
.
|
||||
├── api/ # Auto-generated API documentation
|
||||
├── _build/ # Built documentation
|
||||
├── _static/ # Static files (images, css, etc.)
|
||||
├── conf.py # Sphinx configuration
|
||||
├── api/ # Auto-generated API documentation (created during build)
|
||||
├── _build/ # Built documentation output
|
||||
├── conf.py # Sphinx configuration (mock imports, extensions, etc.)
|
||||
├── index.rst # Main documentation entry point
|
||||
├── requirements-base.txt # Base documentation dependencies
|
||||
├── requirements-riva.txt # Riva-specific dependencies
|
||||
├── build-docs.sh # Local build script
|
||||
└── rtd-test.py # ReadTheDocs test build script
|
||||
└── rtd-test.sh # ReadTheDocs test build script (uses pip, not uv)
|
||||
```
|
||||
|
||||
## Notes
|
||||
## How It Works
|
||||
|
||||
- Documentation is auto-generated from Python docstrings
|
||||
- Service modules are automatically detected and included
|
||||
- The build process matches our ReadTheDocs configuration
|
||||
- Warnings are treated as errors (-W flag) to maintain consistency
|
||||
- The --keep-going flag ensures all errors are reported
|
||||
- Dependencies are split into multiple requirements files to handle version conflicts
|
||||
- `conf.py` runs `sphinx-apidoc` during Sphinx's `setup()` phase to generate `.rst` files from Python source
|
||||
- Sphinx autodoc imports each module to extract docstrings
|
||||
- Modules with unavailable dependencies are listed in `autodoc_mock_imports` in `conf.py`
|
||||
- Napoleon extension converts Google-style docstrings to reStructuredText
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
If you encounter missing service modules:
|
||||
**Module not appearing in docs:**
|
||||
|
||||
1. Verify the service is installed with its extras: `pip install pipecat-ai[service-name]`
|
||||
2. Check the build logs for import errors
|
||||
3. Ensure the service module is properly initialized in the package
|
||||
4. Run `./rtd-test.py` to test in an isolated environment matching ReadTheDocs
|
||||
1. Check the build output for `autodoc: failed to import` warnings
|
||||
2. If the module has an unresolvable import dependency, add it to `autodoc_mock_imports` in `conf.py`
|
||||
3. Verify the module is importable: `uv run python -c "import pipecat.module.name"`
|
||||
|
||||
For dependency conflicts:
|
||||
**Duplicate object warnings:**
|
||||
|
||||
1. Check the requirements files for version specifications
|
||||
2. Use `rtd-test.py` to verify dependency resolution
|
||||
3. Consider adding service-specific requirements files if needed
|
||||
These come from re-export modules or Sphinx discovering the same class through multiple import paths. Usually cosmetic.
|
||||
|
||||
For more information:
|
||||
**Docstring formatting warnings:**
|
||||
|
||||
- [ReadTheDocs Configuration](.readthedocs.yaml)
|
||||
- [Sphinx Documentation](https://www.sphinx-doc.org/)
|
||||
Docstrings use reStructuredText, not Markdown. Common issues:
|
||||
- Use `Example::` with indented code blocks, not `` ```python ``
|
||||
- Ensure blank lines between directive content and subsequent sections
|
||||
- Use `Parameters:` (not `Attributes:`) for dataclass field documentation to avoid duplicate entries
|
||||
|
||||
@@ -1,8 +1,16 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Usage: ./build-docs.sh [--strict]
|
||||
# --strict: Treat warnings as errors (default: warnings only)
|
||||
|
||||
SPHINX_OPTS=""
|
||||
if [ "$1" = "--strict" ]; then
|
||||
SPHINX_OPTS="-W --keep-going"
|
||||
fi
|
||||
|
||||
# Build docs using uv
|
||||
echo "Installing dependencies with uv..."
|
||||
uv sync --group docs --all-extras --no-extra gstreamer --no-extra local_smart_turn --no-extra moondream --no-extra riva --no-extra mlx-whisper
|
||||
uv sync --group docs --all-extras --no-extra gstreamer --no-extra local_smart_turn --no-extra moondream --no-extra mlx-whisper
|
||||
|
||||
# Check if sphinx-build is available
|
||||
if ! uv run sphinx-build --version &> /dev/null; then
|
||||
@@ -14,8 +22,7 @@ fi
|
||||
rm -rf _build
|
||||
|
||||
echo "Building documentation..."
|
||||
# Build docs matching ReadTheDocs configuration
|
||||
uv run sphinx-build -b html -d _build/doctrees . _build/html -W --keep-going
|
||||
uv run sphinx-build -b html -d _build/doctrees . _build/html $SPHINX_OPTS
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "Documentation built successfully!"
|
||||
|
||||
@@ -4,6 +4,19 @@ import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
# Fix Pydantic v2 + Sphinx autodoc incompatibility: ConfigDict(extra="allow") fails
|
||||
# during Sphinx's import because __pydantic_extra__ annotation on BaseModel resolves to
|
||||
# `Dict[str, Any] | None` whose get_origin() is Union, not dict. Patch the check to
|
||||
# accept Union-wrapped dict types (i.e., Optional[Dict[str, Any]]).
|
||||
import pydantic._internal._generate_schema as _pydantic_gs
|
||||
|
||||
_ORIG_DICT_TYPES = _pydantic_gs.DICT_TYPES
|
||||
# Expand the accepted types to include Union (Optional[Dict[str, Any]])
|
||||
import types
|
||||
import typing
|
||||
|
||||
_pydantic_gs.DICT_TYPES = [*_ORIG_DICT_TYPES, typing.Union, types.UnionType]
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
||||
logger = logging.getLogger("sphinx-build")
|
||||
@@ -76,16 +89,6 @@ autodoc_mock_imports = [
|
||||
"einops",
|
||||
"intel_extension_for_pytorch",
|
||||
"huggingface_hub",
|
||||
# riva dependencies
|
||||
"riva",
|
||||
"riva.client",
|
||||
"riva.client.Auth",
|
||||
"riva.client.ASRService",
|
||||
"riva.client.StreamingRecognitionConfig",
|
||||
"riva.client.RecognitionConfig",
|
||||
"riva.client.AudioEncoding",
|
||||
"riva.client.proto.riva_tts_pb2",
|
||||
"riva.client.SpeechSynthesisService",
|
||||
# MLX dependencies (Apple Silicon specific)
|
||||
"mlx",
|
||||
"mlx_whisper", # Note: might need underscore format too
|
||||
@@ -107,6 +110,8 @@ autodoc_mock_imports = [
|
||||
"fastapi.middleware",
|
||||
"fastapi.responses",
|
||||
"uvicorn",
|
||||
# Deepgram dependencies
|
||||
"deepgram",
|
||||
]
|
||||
|
||||
# HTML output settings
|
||||
@@ -133,6 +138,8 @@ def import_core_modules():
|
||||
"pipecat.runner",
|
||||
"pipecat.serializers",
|
||||
"pipecat.transcriptions",
|
||||
"pipecat.turns",
|
||||
"pipecat.extensions",
|
||||
"pipecat.utils",
|
||||
]
|
||||
|
||||
@@ -177,7 +184,6 @@ def setup(app):
|
||||
logger.info(f"Source directory: {source_dir}")
|
||||
|
||||
excludes = [
|
||||
str(project_root / "src/pipecat/pipeline/to_be_updated"),
|
||||
str(project_root / "src/pipecat/examples"),
|
||||
str(project_root / "src/pipecat/tests"),
|
||||
"**/test_*.py",
|
||||
|
||||
@@ -32,4 +32,5 @@ Quick Links
|
||||
Services <api/pipecat.services>
|
||||
Transcriptions <api/pipecat.transcriptions>
|
||||
Transports <api/pipecat.transports>
|
||||
Turns <api/pipecat.turns>
|
||||
Utils <api/pipecat.utils>
|
||||
|
||||
@@ -34,7 +34,7 @@ from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
load_dotenv(override=True)
|
||||
|
||||
OFFICE_SOUND_FILE = os.path.join(
|
||||
os.path.dirname(__file__), "assets", "office-ambience-24000-mono.mp3"
|
||||
os.path.dirname(__file__), "../assets", "office-ambience-24000-mono.mp3"
|
||||
)
|
||||
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
|
||||
@@ -36,7 +36,7 @@ from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.google import GoogleLLMService
|
||||
from pipecat.services.google.llm import GoogleLLMService
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
|
||||
@@ -0,0 +1,210 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Example: async function call with intermediate updates.
|
||||
|
||||
The ``track_current_location`` tool simulates a GPS tracker reporting the
|
||||
device's position during a road trip from San Francisco to San Diego. It
|
||||
sends two intermediate updates (via ``params.result_callback`` with
|
||||
``is_final=False``) as the vehicle passes through cities along the way, then
|
||||
delivers the final destination (via ``params.result_callback``). Each update
|
||||
returns the same structure with a different city:
|
||||
|
||||
Update 1 – {gps, city: "San Francisco"} ← trip start
|
||||
Update 2 – {gps, city: "Los Angeles"} ← passing through
|
||||
Final – {gps, city: "San Diego"} ← destination reached
|
||||
|
||||
Because the function is registered with ``cancel_on_interruption=False``, the
|
||||
LLM can keep talking while the trip is in progress; each position update
|
||||
arrives as a developer message so the LLM can narrate the journey to the user.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import (
|
||||
FunctionCallResultProperties,
|
||||
LLMRunFrame,
|
||||
TTSSpeakFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.anthropic.llm import AnthropicLLMService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
async def track_current_location(params: FunctionCallParams):
|
||||
"""Simulate a GPS tracker reporting position during a road trip.
|
||||
|
||||
Step 1 – San Francisco (trip start) (update)
|
||||
Step 2 – Los Angeles (passing through) (update)
|
||||
Step 3 – San Diego (destination) (final result)
|
||||
"""
|
||||
|
||||
# First update: initial city estimate.
|
||||
gps = {"lat": 37.7310, "lng": -122.4527}
|
||||
await params.result_callback(
|
||||
{"gps": gps, "city": "San Francisco"},
|
||||
properties=FunctionCallResultProperties(is_final=False),
|
||||
)
|
||||
|
||||
# Second update: revised city estimate.
|
||||
await asyncio.sleep(10)
|
||||
gps = {"lat": 33.96003, "lng": -118.40639}
|
||||
await params.result_callback(
|
||||
{"gps": gps, "city": "Los Angeles"},
|
||||
properties=FunctionCallResultProperties(is_final=False),
|
||||
)
|
||||
|
||||
# Final result: confirmed city.
|
||||
await asyncio.sleep(10)
|
||||
gps = {"lat": 32.743569, "lng": -117.20466}
|
||||
await params.result_callback({"gps": gps, "city": "San Diego"})
|
||||
|
||||
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
settings=CartesiaTTSService.Settings(
|
||||
voice="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
),
|
||||
)
|
||||
|
||||
llm = AnthropicLLMService(
|
||||
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
||||
enable_async_tool_cancellation=True,
|
||||
settings=AnthropicLLMService.Settings(
|
||||
system_instruction=(
|
||||
"You are a helpful assistant in a voice conversation. "
|
||||
"Your responses will be spoken aloud, so avoid emojis, bullet points, or other "
|
||||
"formatting that can't be spoken. "
|
||||
"You have access to a function that starts tracking the user's location and "
|
||||
"provides regular updates on it. When you receive the final location, tell the user "
|
||||
"the destination has been reached."
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
# cancel_on_interruption=False makes this an async function call: the LLM
|
||||
# continues the conversation immediately and receives updates/result later.
|
||||
llm.register_function(
|
||||
"track_current_location",
|
||||
track_current_location,
|
||||
cancel_on_interruption=False,
|
||||
timeout_secs=30,
|
||||
)
|
||||
|
||||
@llm.event_handler("on_function_calls_cancelled")
|
||||
async def on_function_calls_cancelled(service, function_calls):
|
||||
for item in function_calls:
|
||||
logger.info(f"Function call cancelled: {item.function_name} [{item.tool_call_id}]")
|
||||
|
||||
location_function = FunctionSchema(
|
||||
name="track_current_location",
|
||||
description="Start tracking the user's current GPS location, reporting position updates until the user reaches their destination.",
|
||||
properties={},
|
||||
required=[],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[location_function])
|
||||
|
||||
context = LLMContext(tools=tools)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
180
examples/function-calling/function-calling-anthropic-async.py
Normal file
180
examples/function-calling/function-calling-anthropic-async.py
Normal file
@@ -0,0 +1,180 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.anthropic.llm import AnthropicLLMService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
async def fetch_weather_from_api(params: FunctionCallParams):
|
||||
# Simulate a long-running API call, so we can test async function calls (cancel_on_interruption=False).
|
||||
await asyncio.sleep(20)
|
||||
await params.result_callback({"conditions": "nice", "temperature": "75"})
|
||||
|
||||
|
||||
async def fetch_restaurant_recommendation(params: FunctionCallParams):
|
||||
await params.result_callback({"name": "The Golden Dragon"})
|
||||
|
||||
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
settings=CartesiaTTSService.Settings(
|
||||
voice="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
),
|
||||
)
|
||||
|
||||
llm = AnthropicLLMService(
|
||||
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
||||
enable_async_tool_cancellation=True,
|
||||
settings=AnthropicLLMService.Settings(
|
||||
system_instruction="You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way.",
|
||||
),
|
||||
)
|
||||
|
||||
# You can also register a function_name of None to get all functions
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function(
|
||||
"get_current_weather",
|
||||
fetch_weather_from_api,
|
||||
cancel_on_interruption=False,
|
||||
timeout_secs=30,
|
||||
)
|
||||
llm.register_function("get_restaurant_recommendation", fetch_restaurant_recommendation)
|
||||
|
||||
@llm.event_handler("on_function_calls_cancelled")
|
||||
async def on_function_calls_cancelled(service, function_calls):
|
||||
for item in function_calls:
|
||||
logger.info(f"Function call cancelled: {item.function_name} [{item.tool_call_id}]")
|
||||
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
},
|
||||
required=["location"],
|
||||
)
|
||||
restaurant_function = FunctionSchema(
|
||||
name="get_restaurant_recommendation",
|
||||
description="Get a restaurant recommendation",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
},
|
||||
required=["location"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function, restaurant_function])
|
||||
|
||||
context = LLMContext(tools=tools)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt,
|
||||
user_aggregator, # User spoken responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
assistant_aggregator, # Assistant spoken responses and tool context
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
@@ -0,0 +1,214 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Example: async function call with intermediate updates.
|
||||
|
||||
The ``track_current_location`` tool simulates a GPS tracker reporting the
|
||||
device's position during a road trip from San Francisco to San Diego. It
|
||||
sends two intermediate updates (via ``params.result_callback`` with
|
||||
``is_final=False``) as the vehicle passes through cities along the way, then
|
||||
delivers the final destination (via ``params.result_callback``). Each update
|
||||
returns the same structure with a different city:
|
||||
|
||||
Update 1 – {gps, city: "San Francisco"} ← trip start
|
||||
Update 2 – {gps, city: "Los Angeles"} ← passing through
|
||||
Final – {gps, city: "San Diego"} ← destination reached
|
||||
|
||||
Because the function is registered with ``cancel_on_interruption=False``, the
|
||||
LLM can keep talking while the trip is in progress; each position update
|
||||
arrives as a developer message so the LLM can narrate the journey to the user.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import (
|
||||
FunctionCallResultProperties,
|
||||
LLMRunFrame,
|
||||
TTSSpeakFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.google.llm import GoogleLLMService
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
async def track_current_location(params: FunctionCallParams):
|
||||
"""Simulate a GPS tracker reporting position during a road trip.
|
||||
|
||||
Step 1 – San Francisco (trip start) (update)
|
||||
Step 2 – Los Angeles (passing through) (update)
|
||||
Step 3 – San Diego (destination) (final result)
|
||||
"""
|
||||
|
||||
# First update: initial city estimate.
|
||||
gps = {"lat": 37.7310, "lng": -122.4527}
|
||||
await params.result_callback(
|
||||
{"gps": gps, "city": "San Francisco"},
|
||||
properties=FunctionCallResultProperties(is_final=False),
|
||||
)
|
||||
|
||||
# Second update: revised city estimate.
|
||||
await asyncio.sleep(10)
|
||||
gps = {"lat": 33.96003, "lng": -118.40639}
|
||||
await params.result_callback(
|
||||
{"gps": gps, "city": "Los Angeles"},
|
||||
properties=FunctionCallResultProperties(is_final=False),
|
||||
)
|
||||
|
||||
# Final result: confirmed city.
|
||||
await asyncio.sleep(10)
|
||||
gps = {"lat": 32.743569, "lng": -117.20466}
|
||||
await params.result_callback({"gps": gps, "city": "San Diego"})
|
||||
|
||||
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
settings=CartesiaTTSService.Settings(
|
||||
voice="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
),
|
||||
)
|
||||
|
||||
llm = GoogleLLMService(
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
enable_async_tool_cancellation=True,
|
||||
settings=GoogleLLMService.Settings(
|
||||
system_instruction=(
|
||||
"You are a helpful assistant in a voice conversation. "
|
||||
"Your responses will be spoken aloud, so avoid emojis, bullet points, or other "
|
||||
"formatting that can't be spoken. "
|
||||
"You have access to a function that starts tracking the user's location and "
|
||||
"provides regular updates on it. When you receive the final location, tell the user "
|
||||
"the destination has been reached."
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
# cancel_on_interruption=False makes this an async function call: the LLM
|
||||
# continues the conversation immediately and receives updates/result later.
|
||||
llm.register_function(
|
||||
"track_current_location",
|
||||
track_current_location,
|
||||
cancel_on_interruption=False,
|
||||
timeout_secs=30,
|
||||
)
|
||||
|
||||
@llm.event_handler("on_function_calls_started")
|
||||
async def on_function_calls_started(service, function_calls):
|
||||
await tts.queue_frame(TTSSpeakFrame("Sure, tracking your location now."))
|
||||
|
||||
@llm.event_handler("on_function_calls_cancelled")
|
||||
async def on_function_calls_cancelled(service, function_calls):
|
||||
for item in function_calls:
|
||||
logger.info(f"Function call cancelled: {item.function_name} [{item.tool_call_id}]")
|
||||
|
||||
location_function = FunctionSchema(
|
||||
name="track_current_location",
|
||||
description="Start tracking the user's current GPS location, reporting position updates until the user reaches their destination.",
|
||||
properties={},
|
||||
required=[],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[location_function])
|
||||
|
||||
context = LLMContext(tools=tools)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
256
examples/function-calling/function-calling-google-async.py
Normal file
256
examples/function-calling/function-calling-google-async.py
Normal file
@@ -0,0 +1,256 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, TTSSpeakFrame, UserImageRequestFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameDirection
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import (
|
||||
create_transport,
|
||||
get_transport_client_id,
|
||||
maybe_capture_participant_camera,
|
||||
)
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.google.llm import GoogleLLMService
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
async def get_weather(params: FunctionCallParams):
|
||||
# Simulate a long-running API call, so we can test async function calls (cancel_on_interruption=False).
|
||||
await asyncio.sleep(20)
|
||||
location = params.arguments["location"]
|
||||
await params.result_callback(f"The weather in {location} is currently 72 degrees and sunny.")
|
||||
|
||||
|
||||
async def fetch_restaurant_recommendation(params: FunctionCallParams):
|
||||
await params.result_callback({"name": "The Golden Dragon"})
|
||||
|
||||
|
||||
async def get_image(params: FunctionCallParams):
|
||||
"""Fetch the user image and push it to the LLM.
|
||||
|
||||
When called, this function pushes a UserImageRequestFrame upstream to the
|
||||
transport. As a result, the transport will request the user image and push a
|
||||
UserImageRawFrame downstream which will be added to the context by the LLM
|
||||
assistant aggregator. The result_callback will be invoked once the image is
|
||||
retrieved and processed.
|
||||
"""
|
||||
user_id = params.arguments["user_id"]
|
||||
question = params.arguments["question"]
|
||||
logger.debug(f"Requesting image with user_id={user_id}, question={question}")
|
||||
|
||||
# Request a user image frame and indicate that it should be added to the
|
||||
# context. Also associate it to the function call. Pass the result_callback
|
||||
# so it can be invoked when the image is actually retrieved.
|
||||
await params.llm.push_frame(
|
||||
UserImageRequestFrame(
|
||||
user_id=user_id,
|
||||
text=question,
|
||||
append_to_context=True,
|
||||
function_name=params.function_name,
|
||||
tool_call_id=params.tool_call_id,
|
||||
result_callback=params.result_callback,
|
||||
),
|
||||
FrameDirection.UPSTREAM,
|
||||
)
|
||||
|
||||
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
video_in_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
video_in_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
settings=CartesiaTTSService.Settings(
|
||||
voice="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
),
|
||||
)
|
||||
|
||||
system_prompt = """\
|
||||
You are a helpful assistant who converses with a user and answers questions. Respond concisely to general questions.
|
||||
|
||||
Your response will be turned into speech so use only simple words and punctuation.
|
||||
|
||||
You have access to three tools: get_weather, get_restaurant_recommendation, and get_image.
|
||||
|
||||
You can respond to questions about the weather using the get_weather tool.
|
||||
|
||||
You can answer questions about the user's video stream using the get_image tool. Some examples of phrases that \
|
||||
indicate you should use the get_image tool are:
|
||||
- What do you see?
|
||||
- What's in the video?
|
||||
- Can you describe the video?
|
||||
- Tell me about what you see.
|
||||
- Tell me something interesting about what you see.
|
||||
- What's happening in the video?
|
||||
"""
|
||||
|
||||
llm = GoogleLLMService(
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
enable_async_tool_cancellation=True,
|
||||
settings=GoogleLLMService.Settings(
|
||||
system_instruction=system_prompt,
|
||||
),
|
||||
)
|
||||
llm.register_function("get_weather", get_weather, cancel_on_interruption=False, timeout_secs=30)
|
||||
llm.register_function("get_image", get_image)
|
||||
llm.register_function("get_restaurant_recommendation", fetch_restaurant_recommendation)
|
||||
|
||||
@llm.event_handler("on_function_calls_started")
|
||||
async def on_function_calls_started(service, function_calls):
|
||||
await tts.queue_frame(TTSSpeakFrame("Let me check on that."))
|
||||
|
||||
@llm.event_handler("on_function_calls_cancelled")
|
||||
async def on_function_calls_cancelled(service, function_calls):
|
||||
for item in function_calls:
|
||||
logger.info(f"Function call cancelled: {item.function_name} [{item.tool_call_id}]")
|
||||
|
||||
weather_function = FunctionSchema(
|
||||
name="get_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location", "format"],
|
||||
)
|
||||
restaurant_function = FunctionSchema(
|
||||
name="get_restaurant_recommendation",
|
||||
description="Get a restaurant recommendation",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
},
|
||||
required=["location"],
|
||||
)
|
||||
get_image_function = FunctionSchema(
|
||||
name="get_image",
|
||||
description="Called when the user requests a description of their camera feed",
|
||||
properties={
|
||||
"user_id": {
|
||||
"type": "string",
|
||||
"description": "The ID of the user to grab the image from",
|
||||
},
|
||||
"question": {
|
||||
"type": "string",
|
||||
"description": "The question that the user is asking about the image",
|
||||
},
|
||||
},
|
||||
required=["user_id", "question"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function, get_image_function, restaurant_function])
|
||||
|
||||
context = LLMContext(tools=tools)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected: {client}")
|
||||
|
||||
await maybe_capture_participant_camera(transport, client)
|
||||
|
||||
client_id = get_transport_client_id(transport, client)
|
||||
|
||||
# Kick off the conversation.
|
||||
context.add_message(
|
||||
{
|
||||
"role": "developer",
|
||||
"content": f"Please introduce yourself to the user. Use '{client_id}' as the user ID during function calls.",
|
||||
}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
@@ -0,0 +1,214 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Example: async function call with intermediate updates.
|
||||
|
||||
The ``track_current_location`` tool simulates a GPS tracker reporting the
|
||||
device's position during a road trip from San Francisco to San Diego. It
|
||||
sends two intermediate updates (via ``params.result_callback`` with
|
||||
``is_final=False``) as the vehicle passes through cities along the way, then
|
||||
delivers the final destination (via ``params.result_callback``). Each update
|
||||
returns the same structure with a different city:
|
||||
|
||||
Update 1 – {gps, city: "San Francisco"} ← trip start
|
||||
Update 2 – {gps, city: "Los Angeles"} ← passing through
|
||||
Final – {gps, city: "San Diego"} ← destination reached
|
||||
|
||||
Because the function is registered with ``cancel_on_interruption=False``, the
|
||||
LLM can keep talking while the trip is in progress; each position update
|
||||
arrives as a developer message so the LLM can narrate the journey to the user.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import (
|
||||
FunctionCallResultProperties,
|
||||
LLMRunFrame,
|
||||
TTSSpeakFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
async def track_current_location(params: FunctionCallParams):
|
||||
"""Simulate a GPS tracker reporting position during a road trip.
|
||||
|
||||
Step 1 – San Francisco (trip start) (update)
|
||||
Step 2 – Los Angeles (passing through) (update)
|
||||
Step 3 – San Diego (destination) (final result)
|
||||
"""
|
||||
|
||||
# First update: initial city estimate.
|
||||
gps = {"lat": 37.7310, "lng": -122.4527}
|
||||
await params.result_callback(
|
||||
{"gps": gps, "city": "San Francisco"},
|
||||
properties=FunctionCallResultProperties(is_final=False),
|
||||
)
|
||||
|
||||
# Second update: revised city estimate.
|
||||
await asyncio.sleep(10)
|
||||
gps = {"lat": 33.96003, "lng": -118.40639}
|
||||
await params.result_callback(
|
||||
{"gps": gps, "city": "Los Angeles"},
|
||||
properties=FunctionCallResultProperties(is_final=False),
|
||||
)
|
||||
|
||||
# Final result: confirmed city.
|
||||
await asyncio.sleep(10)
|
||||
gps = {"lat": 32.743569, "lng": -117.20466}
|
||||
await params.result_callback({"gps": gps, "city": "San Diego"})
|
||||
|
||||
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
settings=CartesiaTTSService.Settings(
|
||||
voice="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
enable_async_tool_cancellation=True,
|
||||
settings=OpenAILLMService.Settings(
|
||||
system_instruction=(
|
||||
"You are a helpful assistant in a voice conversation. "
|
||||
"Your responses will be spoken aloud, so avoid emojis, bullet points, or other "
|
||||
"formatting that can't be spoken. "
|
||||
"You have access to a function that starts tracking the user's location and "
|
||||
"provides regular updates on it. When you receive the final location, tell the user "
|
||||
"the destination has been reached."
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
# cancel_on_interruption=False makes this an async function call: the LLM
|
||||
# continues the conversation immediately and receives updates/result later.
|
||||
llm.register_function(
|
||||
"track_current_location",
|
||||
track_current_location,
|
||||
cancel_on_interruption=False,
|
||||
timeout_secs=30,
|
||||
)
|
||||
|
||||
@llm.event_handler("on_function_calls_started")
|
||||
async def on_function_calls_started(service, function_calls):
|
||||
await tts.queue_frame(TTSSpeakFrame("Sure, tracking your location now."))
|
||||
|
||||
@llm.event_handler("on_function_calls_cancelled")
|
||||
async def on_function_calls_cancelled(service, function_calls):
|
||||
for item in function_calls:
|
||||
logger.info(f"Function call cancelled: {item.function_name} [{item.tool_call_id}]")
|
||||
|
||||
location_function = FunctionSchema(
|
||||
name="track_current_location",
|
||||
description="Start tracking the user's current GPS location, reporting position updates until the user reaches their destination.",
|
||||
properties={},
|
||||
required=[],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[location_function])
|
||||
|
||||
context = LLMContext(tools=tools)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
198
examples/function-calling/function-calling-openai-async.py
Normal file
198
examples/function-calling/function-calling-openai-async.py
Normal file
@@ -0,0 +1,198 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import (
|
||||
LLMRunFrame,
|
||||
TTSSpeakFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.openai.stt import OpenAISTTService
|
||||
from pipecat.services.openai.tts import OpenAITTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
async def fetch_weather_from_api(params: FunctionCallParams):
|
||||
# Simulate a long-running API call, so we can test async function calls.
|
||||
await asyncio.sleep(20)
|
||||
await params.result_callback({"conditions": "nice", "temperature": "75"})
|
||||
|
||||
|
||||
async def fetch_restaurant_recommendation(params: FunctionCallParams):
|
||||
await params.result_callback({"name": "The Golden Dragon"})
|
||||
|
||||
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = OpenAISTTService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
settings=OpenAISTTService.Settings(
|
||||
model="gpt-4o-transcribe",
|
||||
prompt="Expect words related weather, such as temperature and conditions. And restaurant names.",
|
||||
),
|
||||
)
|
||||
|
||||
tts = OpenAITTSService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
settings=OpenAITTSService.Settings(
|
||||
voice="ballad",
|
||||
),
|
||||
instructions="Please speak clearly and at a moderate pace.",
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
enable_async_tool_cancellation=True,
|
||||
settings=OpenAILLMService.Settings(
|
||||
system_instruction="You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way.",
|
||||
),
|
||||
)
|
||||
|
||||
# You can also register a function_name of None to get all functions
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function(
|
||||
"get_current_weather",
|
||||
fetch_weather_from_api,
|
||||
cancel_on_interruption=False,
|
||||
timeout_secs=30,
|
||||
)
|
||||
llm.register_function("get_restaurant_recommendation", fetch_restaurant_recommendation)
|
||||
|
||||
@llm.event_handler("on_function_calls_started")
|
||||
async def on_function_calls_started(service, function_calls):
|
||||
await tts.queue_frame(TTSSpeakFrame("Let me check on that."))
|
||||
|
||||
@llm.event_handler("on_function_calls_cancelled")
|
||||
async def on_function_calls_cancelled(service, function_calls):
|
||||
for item in function_calls:
|
||||
logger.info(f"Function call cancelled: {item.function_name} [{item.tool_call_id}]")
|
||||
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location", "format"],
|
||||
)
|
||||
restaurant_function = FunctionSchema(
|
||||
name="get_restaurant_recommendation",
|
||||
description="Get a restaurant recommendation",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
},
|
||||
required=["location"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function, restaurant_function])
|
||||
|
||||
context = LLMContext(tools=tools)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
@@ -0,0 +1,211 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Example: async function call with intermediate updates.
|
||||
|
||||
The ``track_current_location`` tool simulates a GPS tracker reporting the
|
||||
device's position during a road trip from San Francisco to San Diego. It
|
||||
sends two intermediate updates (via ``params.result_callback`` with
|
||||
``is_final=False``) as the vehicle passes through cities along the way, then
|
||||
delivers the final destination (via ``params.result_callback``). Each update returns the same structure with a
|
||||
different city:
|
||||
|
||||
Update 1 – {gps, city: "San Francisco"} ← trip start
|
||||
Update 2 – {gps, city: "Los Angeles"} ← passing through
|
||||
Final – {gps, city: "San Diego"} ← destination reached
|
||||
|
||||
Because the function is registered with ``cancel_on_interruption=False``, the
|
||||
LLM can keep talking while the trip is in progress; each position update
|
||||
arrives as a developer message so the LLM can narrate the journey to the user.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import (
|
||||
FunctionCallResultProperties,
|
||||
LLMRunFrame,
|
||||
TTSSpeakFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.services.openai.responses.llm import OpenAIResponsesLLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
async def track_current_location(params: FunctionCallParams):
|
||||
"""Simulate a GPS tracker reporting position during a road trip.
|
||||
|
||||
Step 1 – San Francisco (trip start) (update)
|
||||
Step 2 – Los Angeles (passing through) (update)
|
||||
Step 3 – San Diego (destination) (final result)
|
||||
"""
|
||||
|
||||
# First update: initial city estimate.
|
||||
gps = {"lat": 37.7310, "lng": -122.4527}
|
||||
await params.result_callback(
|
||||
{"gps": gps, "city": "San Francisco"},
|
||||
properties=FunctionCallResultProperties(is_final=False),
|
||||
)
|
||||
|
||||
# Second update: revised city estimate.
|
||||
await asyncio.sleep(10)
|
||||
gps = {"lat": 33.96003, "lng": -118.40639}
|
||||
await params.result_callback(
|
||||
{"gps": gps, "city": "Los Angeles"},
|
||||
properties=FunctionCallResultProperties(is_final=False),
|
||||
)
|
||||
|
||||
# Final result: confirmed city.
|
||||
await asyncio.sleep(10)
|
||||
gps = {"lat": 32.743569, "lng": -117.20466}
|
||||
await params.result_callback({"gps": gps, "city": "San Diego"})
|
||||
|
||||
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
settings=CartesiaTTSService.Settings(
|
||||
voice="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
),
|
||||
)
|
||||
|
||||
llm = OpenAIResponsesLLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
enable_async_tool_cancellation=True,
|
||||
settings=OpenAIResponsesLLMService.Settings(
|
||||
system_instruction=(
|
||||
"You are a helpful assistant in a voice conversation. "
|
||||
"Your responses will be spoken aloud, so avoid emojis, bullet points, or other "
|
||||
"formatting that can't be spoken. "
|
||||
"You have access to a function that starts tracking a moving device's location and "
|
||||
"provides regular updates on it. When you receive the final location, tell the user "
|
||||
"the destination has been reached and announce the final city."
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
# cancel_on_interruption=False makes this an async function call: the LLM
|
||||
# continues the conversation immediately and receives updates/result later.
|
||||
llm.register_function(
|
||||
"track_current_location",
|
||||
track_current_location,
|
||||
cancel_on_interruption=False,
|
||||
timeout_secs=30,
|
||||
)
|
||||
|
||||
@llm.event_handler("on_function_calls_started")
|
||||
async def on_function_calls_started(service, function_calls):
|
||||
await tts.queue_frame(TTSSpeakFrame("Sure, tracking your location now."))
|
||||
|
||||
@llm.event_handler("on_function_calls_cancelled")
|
||||
async def on_function_calls_cancelled(service, function_calls):
|
||||
for item in function_calls:
|
||||
logger.info(f"Function call cancelled: {item.function_name} [{item.tool_call_id}]")
|
||||
|
||||
location_function = FunctionSchema(
|
||||
name="track_current_location",
|
||||
description="Track the device's current GPS location during a road trip, reporting position updates as the vehicle moves through cities until it reaches the final destination.",
|
||||
properties={},
|
||||
required=[],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[location_function])
|
||||
|
||||
context = LLMContext(tools=tools)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
@@ -0,0 +1,197 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.services.openai.responses.llm import OpenAIResponsesLLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
async def fetch_weather_from_api(params: FunctionCallParams):
|
||||
# Simulate a long-running API call, so we can test async function calls.
|
||||
await asyncio.sleep(20)
|
||||
await params.result_callback({"conditions": "nice", "temperature": "75"})
|
||||
|
||||
|
||||
async def fetch_restaurant_recommendation(params: FunctionCallParams):
|
||||
await params.result_callback({"name": "The Golden Dragon"})
|
||||
|
||||
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
settings=CartesiaTTSService.Settings(
|
||||
voice="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
),
|
||||
)
|
||||
|
||||
llm = OpenAIResponsesLLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
enable_async_tool_cancellation=True,
|
||||
settings=OpenAIResponsesLLMService.Settings(
|
||||
system_instruction="You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way.",
|
||||
),
|
||||
)
|
||||
|
||||
# You can also register a function_name of None to get all functions
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function(
|
||||
"get_current_weather",
|
||||
fetch_weather_from_api,
|
||||
cancel_on_interruption=False,
|
||||
timeout_secs=30,
|
||||
)
|
||||
llm.register_function("get_restaurant_recommendation", fetch_restaurant_recommendation)
|
||||
|
||||
@llm.event_handler("on_connection_error")
|
||||
async def on_connection_error(service, error):
|
||||
logger.error(f"LLM connection error: {error}")
|
||||
|
||||
@llm.event_handler("on_function_calls_started")
|
||||
async def on_function_calls_started(service, function_calls):
|
||||
# Avoid appending this filler message to the LLM context — it would
|
||||
# alter the conversation history and prevent
|
||||
# OpenAIResponsesLLMService's previous_response_id optimization from
|
||||
# matching, forcing a full context resend.
|
||||
await tts.queue_frame(TTSSpeakFrame("Let me check on that.", append_to_context=False))
|
||||
|
||||
@llm.event_handler("on_function_calls_cancelled")
|
||||
async def on_function_calls_cancelled(service, function_calls):
|
||||
for item in function_calls:
|
||||
logger.info(f"Function call cancelled: {item.function_name} [{item.tool_call_id}]")
|
||||
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location", "format"],
|
||||
)
|
||||
restaurant_function = FunctionSchema(
|
||||
name="get_restaurant_recommendation",
|
||||
description="Get a restaurant recommendation",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
},
|
||||
required=["location"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function, restaurant_function])
|
||||
|
||||
context = LLMContext(tools=tools)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user