Compare commits
375 Commits
filipi/syn
...
aleix/mode
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cde4024b21 | ||
|
|
84fcba772d | ||
|
|
b3bb6fdaa5 | ||
|
|
12b8af3d89 | ||
|
|
1c4ffb7845 | ||
|
|
8d4feede23 | ||
|
|
b11a3bc43f | ||
|
|
8dce66933f | ||
|
|
7291026695 | ||
|
|
f094ce80fb | ||
|
|
9fbe1bf2a3 | ||
|
|
d8b0e78bc8 | ||
|
|
675b7df408 | ||
|
|
30f39d7395 | ||
|
|
fe2ef9c712 | ||
|
|
173cf39aee | ||
|
|
ac43a70d36 | ||
|
|
8e4fd10e0f | ||
|
|
aeab417cd1 | ||
|
|
d263ad3c34 | ||
|
|
f3c454dc54 | ||
|
|
fc63790657 | ||
|
|
9ffcccdd84 | ||
|
|
503782c8b2 | ||
|
|
b834a893fe | ||
|
|
ba023248d9 | ||
|
|
457f55e99a | ||
|
|
f8318289d4 | ||
|
|
958d90819f | ||
|
|
403235eb48 | ||
|
|
698c2ba92e | ||
|
|
f013d5632b | ||
|
|
570849955c | ||
|
|
84b885682f | ||
|
|
989fb4deaa | ||
|
|
ab74605a26 | ||
|
|
49998d252b | ||
|
|
84566c1110 | ||
|
|
45aa95fa10 | ||
|
|
d1f7af0330 | ||
|
|
31b5a64382 | ||
|
|
d20013d7a6 | ||
|
|
804e3ea9ec | ||
|
|
a14d257cf2 | ||
|
|
a8660aabfe | ||
|
|
7dc763d512 | ||
|
|
36b15c92ef | ||
|
|
64ed0aae13 | ||
|
|
be81dac723 | ||
|
|
d942a713af | ||
|
|
e248c4c049 | ||
|
|
1d5dcf1698 | ||
|
|
f45a410f56 | ||
|
|
e38647151d | ||
|
|
1a02b5d61a | ||
|
|
4254c1f0e0 | ||
|
|
f91a113de7 | ||
|
|
e553bb010f | ||
|
|
245339e885 | ||
|
|
812cdc6822 | ||
|
|
153814ecc2 | ||
|
|
b1204cc430 | ||
|
|
c542167065 | ||
|
|
02116c58de | ||
|
|
dcd21e7ff4 | ||
|
|
5356f3028b | ||
|
|
cb2c1868b0 | ||
|
|
dac88c0a47 | ||
|
|
8e5fe8afda | ||
|
|
d07eebff20 | ||
|
|
ef4dcca4f1 | ||
|
|
fc3307bc63 | ||
|
|
da9a55a430 | ||
|
|
094d36904c | ||
|
|
746fadc2b5 | ||
|
|
8cce25d2d2 | ||
|
|
891f00cb5f | ||
|
|
1ca094dad7 | ||
|
|
346c585290 | ||
|
|
c134110399 | ||
|
|
f9117e6d4a | ||
|
|
360e4480e0 | ||
|
|
9b7e15c9bc | ||
|
|
00ea86fda8 | ||
|
|
5f75728207 | ||
|
|
9d274f0fb3 | ||
|
|
43ddbdf1ec | ||
|
|
565349d332 | ||
|
|
2dd1170229 | ||
|
|
5cf90cba98 | ||
|
|
981b7bdcb7 | ||
|
|
c4320e7f07 | ||
|
|
ea0be4d39c | ||
|
|
dca4e1090a | ||
|
|
ec574edd53 | ||
|
|
772fb57090 | ||
|
|
76601944c6 | ||
|
|
178985ec8a | ||
|
|
edc197d050 | ||
|
|
7ece8e3c4a | ||
|
|
7b45a56119 | ||
|
|
a544f885a3 | ||
|
|
375deac912 | ||
|
|
699ca38dc1 | ||
|
|
aeda60f761 | ||
|
|
b010dd58d2 | ||
|
|
225ea907d5 | ||
|
|
1443dfb070 | ||
|
|
4bef85e363 | ||
|
|
215b2dc7f3 | ||
|
|
874e2878be | ||
|
|
9131fa5c12 | ||
|
|
68a3070ad4 | ||
|
|
a7bf9f538c | ||
|
|
0acfb4dd49 | ||
|
|
8594401024 | ||
|
|
aa7a014518 | ||
|
|
27a8a973b1 | ||
|
|
8abda808ca | ||
|
|
7f3f23dcb9 | ||
|
|
be509e5647 | ||
|
|
9f0b18b03d | ||
|
|
6eccd16543 | ||
|
|
d8dc6bc7d0 | ||
|
|
d12a8529e2 | ||
|
|
aa061f7e2c | ||
|
|
e863293198 | ||
|
|
9c7d5a9de2 | ||
|
|
a451c42dc7 | ||
|
|
bc009d8f98 | ||
|
|
67ee802772 | ||
|
|
ceaa27ee6e | ||
|
|
42335e2ef0 | ||
|
|
7585864113 | ||
|
|
18852adc28 | ||
|
|
f11b6d7151 | ||
|
|
9df1e18b43 | ||
|
|
b8f9a21e0c | ||
|
|
c18d997ad8 | ||
|
|
56aaebe1b0 | ||
|
|
916af84974 | ||
|
|
3e911b5fa0 | ||
|
|
7c08779a2f | ||
|
|
988c08a5b6 | ||
|
|
7351298849 | ||
|
|
392134be46 | ||
|
|
9266e1e7ad | ||
|
|
e9eff4626f | ||
|
|
21aa50283e | ||
|
|
70469e3c0c | ||
|
|
6111df947e | ||
|
|
4eebfd65d9 | ||
|
|
c2358b273b | ||
|
|
3a10a528c0 | ||
|
|
f078b8b867 | ||
|
|
5490820338 | ||
|
|
10697636c9 | ||
|
|
e1638a9342 | ||
|
|
bfffefa95c | ||
|
|
fbb49ffc8d | ||
|
|
eace782752 | ||
|
|
b94071d37f | ||
|
|
796a10fe9c | ||
|
|
1ab07d312f | ||
|
|
8adb38f87c | ||
|
|
33f145d70a | ||
|
|
41e46ee69e | ||
|
|
60933b7a56 | ||
|
|
64e09d592e | ||
|
|
883de8ab08 | ||
|
|
793ed8f9e3 | ||
|
|
d8ea33e1a4 | ||
|
|
1d7404ef21 | ||
|
|
dc909e2713 | ||
|
|
e22f9f84bb | ||
|
|
7af72eee3e | ||
|
|
57068f1b38 | ||
|
|
bbb605accc | ||
|
|
929a0e33f4 | ||
|
|
3724ecd378 | ||
|
|
4c8734c5e1 | ||
|
|
283f6df205 | ||
|
|
a29be38f48 | ||
|
|
976c644f90 | ||
|
|
34aa37f395 | ||
|
|
380867a87a | ||
|
|
cc3af59db4 | ||
|
|
f93d13efff | ||
|
|
c28b7e8f26 | ||
|
|
d1a2dee7a1 | ||
|
|
da1a1a59a4 | ||
|
|
134790b17c | ||
|
|
e5aa3bbc20 | ||
|
|
3be0ea05ef | ||
|
|
0c59819682 | ||
|
|
5b67dcd9e7 | ||
|
|
d503383c23 | ||
|
|
fa30268b84 | ||
|
|
2a118084bd | ||
|
|
87e8ed109a | ||
|
|
a5e1bbf4a3 | ||
|
|
f8267f1ea6 | ||
|
|
74acb0b7d0 | ||
|
|
41e3afbc2f | ||
|
|
d4824ffe8a | ||
|
|
2426f80789 | ||
|
|
5ce46df599 | ||
|
|
a6013ba437 | ||
|
|
279ca5a87b | ||
|
|
c6f79592d8 | ||
|
|
e74e497b8d | ||
|
|
d245b79bba | ||
|
|
8a794424dd | ||
|
|
f4743a6c91 | ||
|
|
ba32a48510 | ||
|
|
a9cafa2a3b | ||
|
|
58b1b7249e | ||
|
|
db8e73e5ca | ||
|
|
170f6dfe8b | ||
|
|
c763abc4ae | ||
|
|
197d96fc49 | ||
|
|
c8e9bf77fd | ||
|
|
48b25962e2 | ||
|
|
5d093c9ad7 | ||
|
|
d93f63deb5 | ||
|
|
09a57972f5 | ||
|
|
f83d062df9 | ||
|
|
a2a42b8703 | ||
|
|
e60a72e2d4 | ||
|
|
83f4989a78 | ||
|
|
5d2b288274 | ||
|
|
52ece87ac9 | ||
|
|
bc4bbb1895 | ||
|
|
eb014fffc4 | ||
|
|
e74930b954 | ||
|
|
6ed4109da9 | ||
|
|
53f809b7d5 | ||
|
|
a3c7f6c2af | ||
|
|
df68665ec1 | ||
|
|
bd6cbd7fe7 | ||
|
|
33ef6b3174 | ||
|
|
3ca656cae5 | ||
|
|
6a84d02156 | ||
|
|
080da8b94c | ||
|
|
d3021b4590 | ||
|
|
92e34ea6e8 | ||
|
|
ebab75765d | ||
|
|
110c88bf92 | ||
|
|
19e521b75a | ||
|
|
394599d031 | ||
|
|
0f47076703 | ||
|
|
3e255f3d21 | ||
|
|
565b9b961d | ||
|
|
692c3c74d1 | ||
|
|
7d309b3340 | ||
|
|
04e8444096 | ||
|
|
7501effad5 | ||
|
|
0c8ff9c4c3 | ||
|
|
53f6426b0b | ||
|
|
9e32ade44b | ||
|
|
2574d24400 | ||
|
|
27cb078716 | ||
|
|
ca636813a8 | ||
|
|
47b41a0ff7 | ||
|
|
f14638a1fd | ||
|
|
e1939ecd44 | ||
|
|
dc5b94f9e0 | ||
|
|
1d85aedcae | ||
|
|
e719cbbe6d | ||
|
|
f2ce7ececc | ||
|
|
bd7496fa27 | ||
|
|
0a8bcf58c4 | ||
|
|
0fb45c6114 | ||
|
|
657a5def57 | ||
|
|
30903042e5 | ||
|
|
9936ec16cb | ||
|
|
212aff15c9 | ||
|
|
f2b3f87661 | ||
|
|
77cfb181f6 | ||
|
|
0b256936c6 | ||
|
|
3922963c7a | ||
|
|
ab9f2a35b6 | ||
|
|
f19d1183d8 | ||
|
|
9ad4fe6344 | ||
|
|
04882f6f2a | ||
|
|
712e42533d | ||
|
|
7d8b436018 | ||
|
|
bf1856f610 | ||
|
|
248e0a4c90 | ||
|
|
89dcd57577 | ||
|
|
32022a952e | ||
|
|
65d9fcc315 | ||
|
|
b78ae40d3c | ||
|
|
ece4d0661e | ||
|
|
82a852c1ff | ||
|
|
5be1b9c8cb | ||
|
|
7913d4e188 | ||
|
|
c8dd7c2b57 | ||
|
|
77e5f4acc1 | ||
|
|
be8d4dfd87 | ||
|
|
bb2c60a998 | ||
|
|
7c644ed810 | ||
|
|
96ceec2a43 | ||
|
|
d249473f0b | ||
|
|
1da2018c85 | ||
|
|
af126ec7cf | ||
|
|
340e58bf5c | ||
|
|
7873159d0f | ||
|
|
c783101741 | ||
|
|
73b8bbf963 | ||
|
|
ebbe5acc8f | ||
|
|
dd1bea2a5f | ||
|
|
136e6a58be | ||
|
|
f0d04dde1c | ||
|
|
742a278c05 | ||
|
|
b16befc9e9 | ||
|
|
0c11eb6fd0 | ||
|
|
ea39389e03 | ||
|
|
4adf0fd585 | ||
|
|
465b9bcbc6 | ||
|
|
3f4814cf84 | ||
|
|
f6a3678f93 | ||
|
|
3af93ed257 | ||
|
|
f37bf989dd | ||
|
|
86a16d53bc | ||
|
|
0efef19d60 | ||
|
|
87b8f38a48 | ||
|
|
e1a3ddbb57 | ||
|
|
b5683556d4 | ||
|
|
26f85687d6 | ||
|
|
670ce30a1c | ||
|
|
1c8d31de70 | ||
|
|
9defff2a34 | ||
|
|
59d28f9fd2 | ||
|
|
f2a8a9e753 | ||
|
|
d1eb2699f3 | ||
|
|
2e0f5fc6e9 | ||
|
|
dd3ca6fbba | ||
|
|
171692aa30 | ||
|
|
81ddd103f9 | ||
|
|
8c9e189394 | ||
|
|
b6579dc763 | ||
|
|
abd63336e4 | ||
|
|
ccb9dc20f8 | ||
|
|
2177e28ee1 | ||
|
|
3eb7c2bcd9 | ||
|
|
878940f94e | ||
|
|
a3aeafcb2d | ||
|
|
63254fe337 | ||
|
|
39919f7889 | ||
|
|
f2e0f5d20c | ||
|
|
2724ef6d6f | ||
|
|
33fb8852e6 | ||
|
|
5fe48da2fb | ||
|
|
dccd98ec8a | ||
|
|
a84c69858e | ||
|
|
ca224219dc | ||
|
|
83dc979d19 | ||
|
|
fc76b3f2fb | ||
|
|
4670370dbb | ||
|
|
47e53890e3 | ||
|
|
195180b6f4 | ||
|
|
8b64166bb7 | ||
|
|
1d18995435 | ||
|
|
ea7324b2ba | ||
|
|
52ed7137af | ||
|
|
b33df03724 | ||
|
|
28fbe1db08 | ||
|
|
9240e92d9f | ||
|
|
5caf53f086 | ||
|
|
ac2716811c | ||
|
|
d313d56776 | ||
|
|
de8ba68589 | ||
|
|
fa982a05c0 | ||
|
|
419c7d4450 |
@@ -144,7 +144,7 @@ class InputParams(BaseModel):
|
||||
|
||||
#### Examples
|
||||
|
||||
Validated against `examples/foundational/07-interruptible.py`:
|
||||
Validated against `examples/07-interruptible.py`:
|
||||
|
||||
- Proper `create_transport()` usage
|
||||
- Correct pipeline structure
|
||||
|
||||
@@ -1,30 +0,0 @@
|
||||
# flyctl launch added from .gitignore
|
||||
**/.vscode
|
||||
**/env
|
||||
**/__pycache__
|
||||
**/*~
|
||||
**/venv
|
||||
#*#
|
||||
|
||||
# Distribution / packaging
|
||||
**/.Python
|
||||
**/build
|
||||
**/develop-eggs
|
||||
**/dist
|
||||
**/downloads
|
||||
**/eggs
|
||||
**/.eggs
|
||||
**/lib
|
||||
**/lib64
|
||||
**/parts
|
||||
**/sdist
|
||||
**/var
|
||||
**/wheels
|
||||
**/share/python-wheels
|
||||
**/*.egg-info
|
||||
**/.installed.cfg
|
||||
**/*.egg
|
||||
**/MANIFEST
|
||||
**/.DS_Store
|
||||
**/.env
|
||||
fly.toml
|
||||
4
.github/workflows/python-compatibility.yaml
vendored
@@ -14,7 +14,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ['3.10.19', '3.11.14', '3.12.12', '3.13.12']
|
||||
python-version: ['3.11.15', '3.12.13', '3.13.12', '3.14.3']
|
||||
|
||||
name: Python ${{ matrix.python-version }}
|
||||
steps:
|
||||
@@ -42,7 +42,7 @@ jobs:
|
||||
|
||||
- name: Test uv sync with all extras
|
||||
run: |
|
||||
uv sync --group dev --all-extras --no-extra krisp
|
||||
uv sync --group dev --all-extras
|
||||
|
||||
- name: Verify installation
|
||||
run: |
|
||||
|
||||
51
.github/workflows/sync-quickstart.yaml
vendored
@@ -1,51 +0,0 @@
|
||||
name: Sync Quickstart to pipecat-quickstart repo
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'examples/quickstart/**'
|
||||
workflow_dispatch: # Manual trigger
|
||||
|
||||
jobs:
|
||||
sync-quickstart:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout main repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Checkout quickstart repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: pipecat-ai/pipecat-quickstart
|
||||
token: ${{ secrets.QUICKSTART_SYNC_TOKEN }}
|
||||
path: quickstart-repo
|
||||
|
||||
- name: Sync files (excluding uv.lock and README.md)
|
||||
run: |
|
||||
# Copy all files except uv.lock and README.md
|
||||
find examples/quickstart -type f \
|
||||
-not -name "README.md" \
|
||||
-not -name "uv.lock" \
|
||||
-exec cp {} quickstart-repo/ \;
|
||||
|
||||
- name: Commit and push changes
|
||||
run: |
|
||||
cd quickstart-repo
|
||||
git config user.name "GitHub Action"
|
||||
git config user.email "action@github.com"
|
||||
git add .
|
||||
|
||||
# Only commit if there are changes
|
||||
if ! git diff --staged --quiet; then
|
||||
git commit -m "Sync from pipecat main repo
|
||||
|
||||
Updated files from examples/quickstart/
|
||||
Commit: ${{ github.sha }}
|
||||
"
|
||||
git push
|
||||
else
|
||||
echo "No changes to sync"
|
||||
fi
|
||||
@@ -1,8 +1,13 @@
|
||||
repos:
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.12.1
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: ruff
|
||||
language_version: python3
|
||||
args: [--fix]
|
||||
name: ruff
|
||||
entry: uv run ruff check --fix
|
||||
language: system
|
||||
types: [python]
|
||||
- id: ruff-format
|
||||
name: ruff-format
|
||||
entry: uv run ruff format
|
||||
language: system
|
||||
types: [python]
|
||||
|
||||
@@ -11,7 +11,7 @@ build:
|
||||
jobs:
|
||||
post_install:
|
||||
- pip install uv
|
||||
- UV_PROJECT_ENVIRONMENT=$READTHEDOCS_VIRTUALENV_PATH uv sync --group docs --all-extras --no-extra krisp --no-extra gstreamer --no-extra local_smart_turn --no-extra moondream --no-extra riva --no-extra mlx-whisper
|
||||
- UV_PROJECT_ENVIRONMENT=$READTHEDOCS_VIRTUALENV_PATH uv sync --group docs --all-extras --no-extra gstreamer --no-extra local_smart_turn --no-extra moondream --no-extra mlx-whisper
|
||||
|
||||
sphinx:
|
||||
configuration: docs/api/conf.py
|
||||
|
||||
980
CHANGELOG.md
@@ -7,6 +7,986 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
<!-- towncrier release notes start -->
|
||||
|
||||
## [1.0.0] - 2026-04-14
|
||||
|
||||
Migration guide: https://docs.pipecat.ai/pipecat/migration/migration-1.0
|
||||
|
||||
### Added
|
||||
|
||||
- Updated LemonSlice transport:
|
||||
- Added `on_avatar_connected` and `on_avatar_disconnected` events triggered
|
||||
when the avatar joins and leaves the room.
|
||||
- Added `api_url` parameter to `LemonSliceNewSessionRequest` to allow
|
||||
overriding the LemonSlice API endpoint.
|
||||
- Added support for passing arbitrary named parameters to the LemonSlice
|
||||
API endpoint.
|
||||
(PR [#3995](https://github.com/pipecat-ai/pipecat/pull/3995))
|
||||
|
||||
- Added Inworld Realtime LLM service with WebSocket-based cascade STT/LLM/TTS,
|
||||
semantic VAD, function calling, and Router support.
|
||||
(PR [#4140](https://github.com/pipecat-ai/pipecat/pull/4140))
|
||||
|
||||
- ⚠️ Added WebSocket-based `OpenAIResponsesLLMService` as the new default for
|
||||
the OpenAI Responses API. It maintains a persistent connection to
|
||||
`wss://api.openai.com/v1/responses` and automatically uses
|
||||
`previous_response_id` to send only incremental context, falling back to full
|
||||
context on reconnection or cache miss. The previous HTTP-based implementation
|
||||
is now available as `OpenAIResponsesHttpLLMService`.
|
||||
(PR [#4141](https://github.com/pipecat-ai/pipecat/pull/4141))
|
||||
|
||||
- Added `group_parallel_tools` parameter to `LLMService` (default `True`). When
|
||||
`True`, all function calls from the same LLM response batch share a group ID
|
||||
and the LLM is triggered exactly once after the last call completes. Set to
|
||||
`False` to trigger inference independently for each function call result as
|
||||
it arrives.
|
||||
(PR [#4217](https://github.com/pipecat-ai/pipecat/pull/4217))
|
||||
|
||||
- Added async function call support to `register_function()` and
|
||||
`register_direct_function()` via `cancel_on_interruption=False`. When set to
|
||||
`False`, the LLM continues the conversation immediately without waiting for
|
||||
the function result. The result is injected back into the context as a
|
||||
`developer` message once available, triggering a new LLM inference at that
|
||||
point.
|
||||
(PR [#4217](https://github.com/pipecat-ai/pipecat/pull/4217))
|
||||
|
||||
- Added `enable_prompt_caching` setting to `AWSBedrockLLMService` for Bedrock
|
||||
ConverseStream prompt caching.
|
||||
(PR [#4219](https://github.com/pipecat-ai/pipecat/pull/4219))
|
||||
|
||||
- Added support for streaming intermediate results from async function calls.
|
||||
Call `result_callback` multiple times with
|
||||
`properties=FunctionCallResultProperties(is_final=False)` to push incremental
|
||||
updates, then call it once more (with `is_final=True`, the default) to
|
||||
deliver the final result. Only valid for functions registered with
|
||||
`cancel_on_interruption=False`.
|
||||
(PR [#4230](https://github.com/pipecat-ai/pipecat/pull/4230))
|
||||
|
||||
- Added `LLMMessagesTransformFrame` to facilitate programmatically editing
|
||||
context in a frame-based way.
|
||||
|
||||
The previous approach required the caller to directly grab a reference to
|
||||
the context object, grab a "snapshot" of its messages _at that point in
|
||||
time_, transform the messages, and then push an `LLMMessagesUpdateFrame` with
|
||||
the transformed messages. This approach can lead to problems: what if there
|
||||
had already been a change to the context queued in the pipeline? The
|
||||
transformed messages would simply overwrite it without consideration.
|
||||
(PR [#4231](https://github.com/pipecat-ai/pipecat/pull/4231))
|
||||
|
||||
- The development runner now exports a module-level `app` FastAPI instance
|
||||
(`from pipecat.runner.run import app`) so you can register custom routes
|
||||
before calling `main()`.
|
||||
(PR [#4234](https://github.com/pipecat-ai/pipecat/pull/4234))
|
||||
|
||||
- `ToolsSchema` now accepts `custom_tools` for OpenAI LLM services
|
||||
(`OpenAILLMService`, `OpenAIResponsesLLMService`,
|
||||
`OpenAIResponsesHttpLLMService`, and `OpenAIRealtimeLLMService`), letting you
|
||||
pass provider-specific tools like `tool_search` alongside standard function
|
||||
tools.
|
||||
(PR [#4248](https://github.com/pipecat-ai/pipecat/pull/4248))
|
||||
|
||||
- Added enhancements to `NvidiaTTSService`:
|
||||
|
||||
- Cross-sentence stitching: multiple sentences within an LLM turn are fed
|
||||
into a single `SynthesizeOnline` gRPC stream for seamless audio across
|
||||
sentence boundaries (requires Magpie TTS model v1.7.0+).
|
||||
- `custom_dictionary` and `encoding` parameters for IPA-based custom
|
||||
pronunciation and output audio encoding.
|
||||
- Metrics generation (`can_generate_metrics` returns true) and
|
||||
`stop_all_metrics()` when an audio context is interrupted.
|
||||
- gRPC error handling around synthesis config retrieval
|
||||
(`GetRivaSynthesisConfig`).
|
||||
(PR [#4249](https://github.com/pipecat-ai/pipecat/pull/4249))
|
||||
|
||||
- Added `MistralTTSService` for streaming text-to-speech using Mistral's
|
||||
Voxtral TTS API (`voxtral-mini-tts-2603`). Supports SSE-based audio streaming
|
||||
with automatic resampling from the API's native 24kHz to any requested sample
|
||||
rate. Requires the `mistral` optional extra (`pip install
|
||||
pipecat-ai[mistral]`).
|
||||
(PR [#4251](https://github.com/pipecat-ai/pipecat/pull/4251))
|
||||
|
||||
- Added `truncate_large_values` parameter to `LLMContext.get_messages()`. When
|
||||
`True`, returns compact deep copies of messages with binary data (base64
|
||||
images, audio) replaced by short placeholders and long string values in
|
||||
LLM-specific messages recursively truncated. Useful for serialization,
|
||||
logging, and debugging tools.
|
||||
(PR [#4272](https://github.com/pipecat-ai/pipecat/pull/4272))
|
||||
|
||||
- `CartesiaSTTService` now supports runtime settings updates (e.g. changing
|
||||
`language` or `model` via `STTUpdateSettingsFrame`). The service
|
||||
automatically reconnects with the new parameters. Previously, settings
|
||||
updates were silently ignored.
|
||||
(PR [#4282](https://github.com/pipecat-ai/pipecat/pull/4282))
|
||||
|
||||
- Added `pcm_32000` and `pcm_48000` sample rate support to ElevenLabs TTS
|
||||
services.
|
||||
(PR [#4293](https://github.com/pipecat-ai/pipecat/pull/4293))
|
||||
|
||||
- Added `enable_logging` parameter to `ElevenLabsHttpTTSService`. Set to
|
||||
`False` to enable zero retention mode (enterprise only).
|
||||
(PR [#4293](https://github.com/pipecat-ai/pipecat/pull/4293))
|
||||
|
||||
### Changed
|
||||
|
||||
- Updated `onnxruntime` from 1.23.2 to 1.24.3, adding support for Python 3.14.
|
||||
(PR [#3984](https://github.com/pipecat-ai/pipecat/pull/3984))
|
||||
|
||||
- MCPClient now requires async with MCPClient(...) as mcp: or explicit
|
||||
start()/close() calls to manage the connection lifecycle.
|
||||
(PR [#4034](https://github.com/pipecat-ai/pipecat/pull/4034))
|
||||
|
||||
- ⚠️ Updated `langchain` extra to require langchain 1.x (from 0.3.x),
|
||||
langchain-community 0.4.x (from 0.3.x), and langchain-openai 1.x (from
|
||||
0.3.x). If you pin these packages in your project, update your pins
|
||||
accordingly.
|
||||
(PR [#4192](https://github.com/pipecat-ai/pipecat/pull/4192))
|
||||
|
||||
- `WebsocketService` reconnection errors are now non-fatal. When a websocket
|
||||
service exhausts its reconnection attempts (either via exponential backoff or
|
||||
quick failure detection), it emits a non-fatal `ErrorFrame` instead of a
|
||||
fatal one. This allows application-level failover (e.g. `ServiceSwitcher`) to
|
||||
handle the failure instead of killing the entire pipeline.
|
||||
(PR [#4201](https://github.com/pipecat-ai/pipecat/pull/4201))
|
||||
|
||||
- Changed `GrokLLMService` default model from `grok-3-beta` to `grok-3`, now
|
||||
that the model is generally available.
|
||||
(PR [#4209](https://github.com/pipecat-ai/pipecat/pull/4209))
|
||||
|
||||
- `GoogleImageGenService` now defaults to `imagen-4.0-generate-001` (previously
|
||||
`imagen-3.0-generate-002`).
|
||||
(PR [#4213](https://github.com/pipecat-ai/pipecat/pull/4213))
|
||||
|
||||
- ⚠️ `BaseOpenAILLMService.get_chat_completions()` now accepts an `LLMContext`
|
||||
instead of `OpenAILLMInvocationParams`. If you override this method, update
|
||||
your signature accordingly.
|
||||
(PR [#4215](https://github.com/pipecat-ai/pipecat/pull/4215))
|
||||
|
||||
- When multiple function calls are returned in a single LLM response, by
|
||||
default (when `group_parallel_tools=True`) the LLM is now triggered exactly
|
||||
once after the last call in the batch completes, rather than waiting for all
|
||||
function calls.
|
||||
(PR [#4217](https://github.com/pipecat-ai/pipecat/pull/4217))
|
||||
|
||||
- ⚠️ `LLMService.function_call_timeout_secs` now defaults to `None` instead of
|
||||
`10.0`. Deferred function calls will run indefinitely unless a timeout is
|
||||
explicitly set at the service level or per-call. If you relied on the
|
||||
previous 10-second default, pass `function_call_timeout_secs=10.0`
|
||||
explicitly.
|
||||
(PR [#4224](https://github.com/pipecat-ai/pipecat/pull/4224))
|
||||
|
||||
- Updated `NvidiaTTSService`:
|
||||
|
||||
- Made `api_key` optional for local NIM deployments.
|
||||
- Voice, language, and quality can be updated without reconnecting the gRPC
|
||||
client; new values take effect on the next synthesis turn, not for the
|
||||
current turn's in-flight requests.
|
||||
- Replaced per-sentence synchronous `synthesize_online` calls with async
|
||||
queue-backed gRPC streaming.
|
||||
- Streaming now uses asyncio tasks with explicit gRPC cancellation on
|
||||
interruption and stale-response filtering when a stream is aborted or
|
||||
replaced.
|
||||
- Renamed Riva references to Nemotron Speech in docs and messages.
|
||||
- Disabled automatic TTS start frames at the service level
|
||||
(`push_start_frame=False`) and emit `TTSStartedFrame` when a stitched
|
||||
synthesis stream is started for a context.
|
||||
(PR [#4249](https://github.com/pipecat-ai/pipecat/pull/4249))
|
||||
|
||||
### Removed
|
||||
|
||||
- ⚠️ Removed `OpenPipeLLMService` and the `openpipe` extra. OpenPipe was
|
||||
acquired by CoreWeave and the package is no longer maintained. If you were
|
||||
using `openpipe` as an LLM provider, switch to the underlying provider
|
||||
directly (e.g. `openai`). The OpenPipe interface can still be used with
|
||||
`OpenAILLMService` by specifying a `base_url`.
|
||||
(PR [#4191](https://github.com/pipecat-ai/pipecat/pull/4191))
|
||||
|
||||
- ⚠️ Removed `NoisereduceFilter`. Use system-level noise reduction or a
|
||||
service-based alternative instead.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed deprecated `vad_enabled` and `vad_audio_passthrough` transport
|
||||
params.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed deprecated `camera_in_enabled`, `camera_in_is_live`,
|
||||
`camera_in_width`, `camera_in_height`, `camera_out_enabled`,
|
||||
`camera_out_is_live`, `camera_out_width`, `camera_out_height`, and
|
||||
`camera_out_color` transport params. Use the `video_in_*` and `video_out_*`
|
||||
equivalents instead.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed `FrameProcessor.wait_for_task()`. Use `create_task()` and manage
|
||||
tasks with the built-in `TaskManager` instead.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed deprecated transport frames: `TransportMessageFrame`,
|
||||
`TransportMessageUrgentFrame`, `InputTransportMessageUrgentFrame`,
|
||||
`DailyTransportMessageFrame`, and `DailyTransportMessageUrgentFrame`. Use
|
||||
`OutputTransportMessageFrame`, `OutputTransportMessageUrgentFrame`,
|
||||
`InputTransportMessageFrame`, `DailyOutputTransportMessageFrame`, and
|
||||
`DailyOutputTransportMessageUrgentFrame` instead.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed `create_default_resampler()` from `pipecat.audio.utils`.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed `DailyRunner.configure_with_args()`. Use `PipelineRunner` with
|
||||
`RunnerArguments` instead.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed deprecated `on_pipeline_ended`, `on_pipeline_cancelled`, and
|
||||
`on_pipeline_stopped` events from `PipelineTask`. Use `on_pipeline_finished`
|
||||
instead.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed single-argument function call support from `LLMService`. Functions
|
||||
must use named parameters instead of a single `arguments` parameter.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed `FalSmartTurnAnalyzer` and `LocalSmartTurnAnalyzer`.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed `RTVIObserver.errors_enabled` parameter.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed deprecated RTVI models, frames, and processor methods including
|
||||
`RTVIConfig`, `RTVIServiceConfig`, `RTVIServiceOptionConfig`, various
|
||||
`RTVI*Data` models, `RTVIActionFrame`, and
|
||||
`RTVIProcessor.handle_function_call`/`handle_function_call_start`. Use the
|
||||
updated RTVI processor API instead.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed deprecated `KeypadEntryFrame` alias.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed deprecated interruption frames: `StartInterruptionFrame` and
|
||||
`BotInterruptionFrame`. Use `InterruptionFrame` and `InterruptionTaskFrame`
|
||||
instead.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed `LLMService.request_image_frame()`. Push a `UserImageRequestFrame`
|
||||
instead.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed `TTSService.say()`. Push a `TTSSpeakFrame` into the pipeline
|
||||
instead.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed `KrispFilter`. The `krisp` extra has been removed from
|
||||
`pyproject.toml`.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed `AudioBufferProcessor.user_continuous_stream` parameter. Use
|
||||
`user_audio_passthrough` instead.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed `LLMService.start_callback` parameter. Register an
|
||||
`on_llm_response_start` event handler instead.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed deprecated `observers` field from `PipelineParams`. Pass observers
|
||||
directly to `PipelineTask` constructor instead.
|
||||
(PR [#4204](https://github.com/pipecat-ai/pipecat/pull/4204))
|
||||
|
||||
- ⚠️ Removed deprecated `pipecat.services.openai_realtime` package. Use
|
||||
`pipecat.services.openai.realtime` instead.
|
||||
(PR [#4208](https://github.com/pipecat-ai/pipecat/pull/4208))
|
||||
|
||||
- ⚠️ Removed deprecated `pipecat.services.google.llm_vertex` module. Use
|
||||
`pipecat.services.google.vertex.llm` instead.
|
||||
(PR [#4208](https://github.com/pipecat-ai/pipecat/pull/4208))
|
||||
|
||||
- ⚠️ Removed deprecated `GoogleLLMOpenAIBetaService` from
|
||||
`pipecat.services.google.openai`. Use `GoogleLLMService` from
|
||||
`pipecat.services.google.llm` instead.
|
||||
(PR [#4208](https://github.com/pipecat-ai/pipecat/pull/4208))
|
||||
|
||||
- ⚠️ Removed deprecated `OpenAIRealtimeBetaLLMService` and
|
||||
`AzureRealtimeBetaLLMService`. Use `OpenAIRealtimeLLMService` and
|
||||
`AzureRealtimeLLMService` from `pipecat.services.openai.realtime` and
|
||||
`pipecat.services.azure.realtime` instead.
|
||||
(PR [#4208](https://github.com/pipecat-ai/pipecat/pull/4208))
|
||||
|
||||
- ⚠️ Removed deprecated `pipecat.services.ai_services` module. Import from
|
||||
`pipecat.services.ai_service`, `pipecat.services.llm_service`,
|
||||
`pipecat.services.stt_service`, `pipecat.services.tts_service`, etc. instead.
|
||||
(PR [#4208](https://github.com/pipecat-ai/pipecat/pull/4208))
|
||||
|
||||
- ⚠️ Removed deprecated `pipecat.services.gemini_multimodal_live` package. Use
|
||||
`pipecat.services.google.gemini_live` instead. Note that class names no
|
||||
longer include "Multimodal" (e.g. `GeminiMultimodalLiveLLMService` →
|
||||
`GeminiLiveLLMService`).
|
||||
(PR [#4208](https://github.com/pipecat-ai/pipecat/pull/4208))
|
||||
|
||||
- ⚠️ Removed deprecated `pipecat.services.google.gemini_live.llm_vertex`
|
||||
module. Use `pipecat.services.google.gemini_live.vertex.llm` instead.
|
||||
(PR [#4208](https://github.com/pipecat-ai/pipecat/pull/4208))
|
||||
|
||||
- ⚠️ Removed deprecated `pipecat.services.nim` package. Use
|
||||
`pipecat.services.nvidia.llm` instead (`NimLLMService` → `NvidiaLLMService`).
|
||||
(PR [#4208](https://github.com/pipecat-ai/pipecat/pull/4208))
|
||||
|
||||
- ⚠️ Removed deprecated `pipecat.services.deepgram.stt_sagemaker` and
|
||||
`pipecat.services.deepgram.tts_sagemaker` modules. Use
|
||||
`pipecat.services.deepgram.sagemaker.stt` and
|
||||
`pipecat.services.deepgram.sagemaker.tts` instead.
|
||||
(PR [#4208](https://github.com/pipecat-ai/pipecat/pull/4208))
|
||||
|
||||
- ⚠️ Removed deprecated `pipecat.services.aws_nova_sonic` package. Use
|
||||
`pipecat.services.aws.nova_sonic` instead.
|
||||
(PR [#4208](https://github.com/pipecat-ai/pipecat/pull/4208))
|
||||
|
||||
- ⚠️ Removed deprecated `pipecat.services.riva` package. Use
|
||||
`pipecat.services.nvidia.stt` and `pipecat.services.nvidia.tts` instead
|
||||
(`RivaSTTService` → `NvidiaSTTService`, `RivaTTSService` →
|
||||
`NvidiaTTSService`).
|
||||
(PR [#4208](https://github.com/pipecat-ai/pipecat/pull/4208))
|
||||
|
||||
- ⚠️ Removed deprecated compatibility modules:
|
||||
`pipecat.services.openai_realtime_beta` (use
|
||||
`pipecat.services.openai.realtime`),
|
||||
`pipecat.services.openai_realtime.context`,
|
||||
`pipecat.services.openai_realtime.frames`,
|
||||
`pipecat.services.openai.realtime.context`,
|
||||
`pipecat.services.openai.realtime.frames`,
|
||||
`pipecat.services.gemini_multimodal_live` (use
|
||||
`pipecat.services.google.gemini_live`),
|
||||
`pipecat.services.aws_nova_sonic.context` (use
|
||||
`pipecat.services.aws.nova_sonic`), `pipecat.services.google.openai` and
|
||||
`pipecat.services.google.llm_openai` (use `pipecat.services.google.llm`).
|
||||
(PR [#4215](https://github.com/pipecat-ai/pipecat/pull/4215))
|
||||
|
||||
- ⚠️ Removed `VisionImageFrameAggregator` (from
|
||||
`pipecat.processors.aggregators.vision_image_frame`). Vision/image handling
|
||||
is now built into `LLMContext` (from
|
||||
`pipecat.processors.aggregators.llm_context`). See the `12*` examples for the
|
||||
recommended replacement pattern.
|
||||
(PR [#4215](https://github.com/pipecat-ai/pipecat/pull/4215))
|
||||
|
||||
- ⚠️ Removed `OpenAILLMContext`, `OpenAILLMContextFrame`, and
|
||||
`OpenAILLMContext.from_messages()`. Use `LLMContext` (from
|
||||
`pipecat.processors.aggregators.llm_context`) and `LLMContextFrame` (from
|
||||
`pipecat.frames.frames`) instead. All services now exclusively use the
|
||||
universal `LLMContext`.
|
||||
|
||||
From the developer's point of view, migrating will usually be a matter of
|
||||
going from this:
|
||||
|
||||
```python
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
```
|
||||
|
||||
To this:
|
||||
|
||||
```python
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
|
||||
context = LLMContext(messages, tools)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
```
|
||||
(PR [#4215](https://github.com/pipecat-ai/pipecat/pull/4215))
|
||||
|
||||
- ⚠️ Removed deprecated frame types `LLMMessagesFrame` and
|
||||
`OpenAILLMContextAssistantTimestampFrame` from `pipecat.frames.frames`.
|
||||
Instead of `LLMMessagesFrame`, use `LLMContextFrame` with the new messages,
|
||||
or `LLMMessagesUpdateFrame` with `run_llm=True`.
|
||||
(PR [#4215](https://github.com/pipecat-ai/pipecat/pull/4215))
|
||||
|
||||
- ⚠️ Removed `GatedOpenAILLMContextAggregator` (from
|
||||
`pipecat.processors.aggregators.gated_open_ai_llm_context`). Use
|
||||
`GatedLLMContextAggregator` (from
|
||||
`pipecat.processors.aggregators.gated_llm_context`) instead.
|
||||
(PR [#4215](https://github.com/pipecat-ai/pipecat/pull/4215))
|
||||
|
||||
- ⚠️ Removed deprecated service-specific context and aggregator machinery,
|
||||
which was superseded by the universal `LLMContext` system.
|
||||
|
||||
Service-specific classes removed: `AnthropicLLMContext`,
|
||||
`AnthropicContextAggregatorPair`, `AWSBedrockLLMContext`,
|
||||
`AWSBedrockContextAggregatorPair`, `OpenAIContextAggregatorPair`, and their
|
||||
user/assistant aggregators. Also removed `create_context_aggregator()` from
|
||||
`LLMService`, `OpenAILLMService`, `AnthropicLLMService`, and
|
||||
`AWSBedrockLLMService`.
|
||||
|
||||
Base aggregator classes removed (from
|
||||
`pipecat.processors.aggregators.llm_response`): `BaseLLMResponseAggregator`,
|
||||
`LLMContextResponseAggregator`, `LLMUserContextAggregator`,
|
||||
`LLMAssistantContextAggregator`, `LLMUserResponseAggregator`,
|
||||
`LLMAssistantResponseAggregator`.
|
||||
|
||||
From the developer's point of view, migrating will usually be a matter of
|
||||
going from this:
|
||||
|
||||
```python
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
```
|
||||
|
||||
To this:
|
||||
|
||||
```python
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
|
||||
context = LLMContext(messages, tools)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
```
|
||||
(PR [#4215](https://github.com/pipecat-ai/pipecat/pull/4215))
|
||||
|
||||
- ⚠️ Removed deprecated service parameters and shims that have been replaced by
|
||||
the `settings=Service.Settings(...)` pattern or direct `__init__` parameters:
|
||||
- `PollyTTSService` alias (use `AWSTTSService`)
|
||||
- `TTSService`: `text_aggregator`, `text_filter` init params
|
||||
- `AWSNovaSonicLLMService`: `send_transcription_frames` init param
|
||||
- `DeepgramSTTService`: `url` init param (use `base_url`)
|
||||
- `FishAudioTTSService`: `model` init param (use `reference_id` or
|
||||
`settings`)
|
||||
- `GladiaSTTService`: `language` and `confidence` from `GladiaInputParams`,
|
||||
`InputParams` class alias
|
||||
- `GeminiTTSService`: `api_key` init param
|
||||
- `GeminiLiveLLMService`: `base_url` init param (use `http_options`)
|
||||
- `GoogleVertexLLMService`: `InputParams` class with
|
||||
`location`/`project_id` fields (use direct init params); `project_id` is now
|
||||
required, `location` defaults to `"us-east4"`
|
||||
- `MiniMaxHttpTTSService`: `english_normalization` from `InputParams` (use
|
||||
`text_normalization`)
|
||||
- `SimliVideoService`: `simli_config` init param (use `api_key`/`face_id`),
|
||||
`use_turn_server` init param; `api_key` and `face_id` are now required
|
||||
- `AnthropicLLMService`: `enable_prompt_caching_beta` from `InputParams`
|
||||
(use `enable_prompt_caching`)
|
||||
(PR [#4220](https://github.com/pipecat-ai/pipecat/pull/4220))
|
||||
|
||||
- ⚠️ Removed deprecated `pipecat.transports.services` and
|
||||
`pipecat.transports.network` module aliases. Update imports to use
|
||||
`pipecat.transports.daily.transport`, `pipecat.transports.livekit.transport`,
|
||||
`pipecat.transports.websocket.*`, `pipecat.transports.webrtc.*`, and
|
||||
`pipecat.transports.daily.utils` respectively.
|
||||
(PR [#4225](https://github.com/pipecat-ai/pipecat/pull/4225))
|
||||
|
||||
- ⚠️ Removed deprecated `pipecat.sync` package. Use `pipecat.utils.sync`
|
||||
instead.
|
||||
(PR [#4225](https://github.com/pipecat-ai/pipecat/pull/4225))
|
||||
|
||||
- ⚠️ Removed deprecated `TranscriptionMessage`, `ThoughtTranscriptionMessage`,
|
||||
and `TranscriptionUpdateFrame` from `pipecat.frames.frames`.
|
||||
(PR [#4228](https://github.com/pipecat-ai/pipecat/pull/4228))
|
||||
|
||||
- ⚠️ Removed deprecated `allow_interruptions` parameter from `PipelineParams`,
|
||||
`StartFrame`, and `FrameProcessor`. Interruptions are now always allowed by
|
||||
default. Use `LLMUserAggregator`'s `user_turn_strategies` /
|
||||
`user_mute_strategies` parameters to control interruption behavior.
|
||||
(PR [#4228](https://github.com/pipecat-ai/pipecat/pull/4228))
|
||||
|
||||
- ⚠️ Removed deprecated `STTMuteFilter`, `STTMuteConfig`, and `STTMuteStrategy`
|
||||
from `pipecat.processors.filters.stt_mute_filter`. Use
|
||||
`pipecat.turns.user_mute` strategies with `LLMUserAggregator`'s
|
||||
`user_mute_strategies` parameter instead.
|
||||
(PR [#4228](https://github.com/pipecat-ai/pipecat/pull/4228))
|
||||
|
||||
- ⚠️ Removed deprecated `pipecat.processors.transcript_processor` module
|
||||
(`TranscriptProcessor`, `TranscriptProcessorConfig`). Use pipeline observers
|
||||
instead.
|
||||
(PR [#4228](https://github.com/pipecat-ai/pipecat/pull/4228))
|
||||
|
||||
- ⚠️ Removed deprecated `EmulateUserStartedSpeakingFrame` and
|
||||
`EmulateUserStoppedSpeakingFrame` frames, and the `emulated` field from
|
||||
`UserStartedSpeakingFrame` / `UserStoppedSpeakingFrame`.
|
||||
(PR [#4228](https://github.com/pipecat-ai/pipecat/pull/4228))
|
||||
|
||||
- ⚠️ Removed deprecated `interruption_strategies` parameter from
|
||||
`PipelineParams`, `StartFrame`, and `FrameProcessor`. Use
|
||||
`LLMUserAggregator`'s `user_turn_strategies` parameter instead.
|
||||
(PR [#4228](https://github.com/pipecat-ai/pipecat/pull/4228))
|
||||
|
||||
- ⚠️ Removed deprecated `pipecat.audio.interruptions` module
|
||||
(`BaseInterruptionStrategy`, `MinWordsInterruptionStrategy`). Use
|
||||
`pipecat.turns.user_start.MinWordsUserTurnStartStrategy` with
|
||||
`LLMUserAggregator`'s `user_turn_strategies` parameter instead.
|
||||
(PR [#4228](https://github.com/pipecat-ai/pipecat/pull/4228))
|
||||
|
||||
- ⚠️ Removed deprecated `pipecat.utils.tracing.class_decorators` module. Use
|
||||
`pipecat.utils.tracing.service_decorators` instead.
|
||||
(PR [#4228](https://github.com/pipecat-ai/pipecat/pull/4228))
|
||||
|
||||
- ⚠️ Removed deprecated `add_pattern_pair` method from `PatternPairAggregator`.
|
||||
Use `add_pattern` instead.
|
||||
(PR [#4228](https://github.com/pipecat-ai/pipecat/pull/4228))
|
||||
|
||||
- ⚠️ Removed deprecated `UserResponseAggregator` class from
|
||||
`pipecat.processors.aggregators.user_response`. Use `LLMUserAggregator`
|
||||
instead.
|
||||
(PR [#4228](https://github.com/pipecat-ai/pipecat/pull/4228))
|
||||
|
||||
- ⚠️ Removed `ExternalUserTurnStrategies` and the automatic fallback to it in
|
||||
`LLMUserAggregator` when a `SpeechControlParamsFrame` was received from the
|
||||
transport.
|
||||
(PR [#4229](https://github.com/pipecat-ai/pipecat/pull/4229))
|
||||
|
||||
- ⚠️ Removed `vad_analyzer` and `turn_analyzer` parameters from
|
||||
`TransportParams` and all transport input classes, along with all deprecated
|
||||
VAD/turn analysis logic in `BaseInputTransport`. VAD and turn detection are
|
||||
now handled entirely by `LLMUserAggregator`.
|
||||
(PR [#4229](https://github.com/pipecat-ai/pipecat/pull/4229))
|
||||
|
||||
- ⚠️ Removed deprecated `TranscriptionUserTurnStopStrategy` alias (deprecated
|
||||
in 0.0.102). Use `SpeechTimeoutUserTurnStopStrategy` instead.
|
||||
(PR [#4232](https://github.com/pipecat-ai/pipecat/pull/4232))
|
||||
|
||||
- ⚠️ Removed deprecated `vad_events` setting and `should_interrupt` parameter
|
||||
from `DeepgramSTTService` (deprecated in 0.0.99). Use Silero VAD for voice
|
||||
activity detection instead.
|
||||
(PR [#4232](https://github.com/pipecat-ai/pipecat/pull/4232))
|
||||
|
||||
- ⚠️ Removed deprecated `send_transcription_frames` parameter from
|
||||
`OpenAIRealtimeLLMService` (deprecated in 0.0.92). Transcription frames are
|
||||
always sent.
|
||||
(PR [#4232](https://github.com/pipecat-ai/pipecat/pull/4232))
|
||||
|
||||
- ⚠️ Removed deprecated `UserIdleProcessor` (deprecated in 0.0.100). Use
|
||||
`LLMUserAggregator` with the `user_idle_timeout` parameter instead.
|
||||
(PR [#4232](https://github.com/pipecat-ai/pipecat/pull/4232))
|
||||
|
||||
- ⚠️ Removed deprecated `UserBotLatencyLogObserver` (deprecated in 0.0.102).
|
||||
Use `UserBotLatencyObserver` with its `on_latency_measured` event handler
|
||||
instead.
|
||||
(PR [#4232](https://github.com/pipecat-ai/pipecat/pull/4232))
|
||||
|
||||
- ⚠️ Removed the `riva` install extra. Use `nvidia` instead (`pip install
|
||||
"pipecat-ai[nvidia]"`).
|
||||
(PR [#4235](https://github.com/pipecat-ai/pipecat/pull/4235))
|
||||
|
||||
- Removed the empty `remote-smart-turn` install extra (was already a no-op).
|
||||
(PR [#4235](https://github.com/pipecat-ai/pipecat/pull/4235))
|
||||
|
||||
- ⚠️ Removed `DeprecatedModuleProxy` and all service `__init__.py` re-export
|
||||
shims. Flat imports like `from pipecat.services.openai import
|
||||
OpenAILLMService` no longer work. Use the full submodule path instead: `from
|
||||
pipecat.services.openai.llm import OpenAILLMService`. This is already the
|
||||
established pattern across all examples and internal code.
|
||||
(PR [#4239](https://github.com/pipecat-ai/pipecat/pull/4239))
|
||||
|
||||
- ⚠️ Removed deprecated `PIPECAT_OBSERVER_FILES` environment variable support.
|
||||
Use `PIPECAT_SETUP_FILES` instead.
|
||||
(PR [#4267](https://github.com/pipecat-ai/pipecat/pull/4267))
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed `IdleFrameProcessor` where `asyncio.Event` was unconditionally cleared
|
||||
in a `finally` block instead of only on the success path.
|
||||
(PR [#3796](https://github.com/pipecat-ai/pipecat/pull/3796))
|
||||
|
||||
- Fixed MCPClient opening a new connection for every tool call instead of
|
||||
reusing the session.
|
||||
(PR [#4034](https://github.com/pipecat-ai/pipecat/pull/4034))
|
||||
|
||||
- GoogleLLMService now applies a low-latency thinking default
|
||||
(`thinking_level="minimal"`) for Gemini 3+ Flash models.
|
||||
(PR [#4067](https://github.com/pipecat-ai/pipecat/pull/4067))
|
||||
|
||||
- Fixed `WebsocketService` entering an infinite reconnection loop when a server
|
||||
accepts the WebSocket handshake but immediately closes the connection (e.g.
|
||||
invalid API key, close code 1008). The service now detects connections that
|
||||
fail repeatedly within seconds of being established and stops retrying after
|
||||
3 consecutive quick failures.
|
||||
(PR [#4201](https://github.com/pipecat-ai/pipecat/pull/4201))
|
||||
|
||||
- Fixed `InworldHttpTTSService` streaming responses crashing with
|
||||
`UnicodeDecodeError` when multi-byte UTF-8 characters were split across chunk
|
||||
boundaries. This caused TTS audio to cut off mid-sentence intermittently.
|
||||
(PR [#4202](https://github.com/pipecat-ai/pipecat/pull/4202))
|
||||
|
||||
- Fixed a crash (`JSONDecodeError`) when a user interruption occurs while the
|
||||
LLM is streaming function call arguments. Previously, the incomplete JSON
|
||||
arguments were passed directly to `json.loads()`, causing an unhandled
|
||||
exception. Affected services: OpenAI, Google (OpenAI-compatible), and
|
||||
SambaNova.
|
||||
(PR [#4203](https://github.com/pipecat-ai/pipecat/pull/4203))
|
||||
|
||||
- Fixed `BaseOutputTransport` discarding pending `UninterruptibleFrame` items
|
||||
(e.g. function-call context updates) when an interruption arrived. The audio
|
||||
task is now kept alive and only interruptible frames are drained when
|
||||
uninterruptible frames are present in the queue.
|
||||
(PR [#4217](https://github.com/pipecat-ai/pipecat/pull/4217))
|
||||
|
||||
- Fixed spurious LLM inference being triggered when a function call result
|
||||
arrived while the user was actively speaking. The context frame is now
|
||||
suppressed until the user stops speaking.
|
||||
(PR [#4217](https://github.com/pipecat-ai/pipecat/pull/4217))
|
||||
|
||||
- Fixed `CartesiaTTSService` failing with "Context has closed" errors when
|
||||
switching voice, model, or language via `TTSUpdateSettingsFrame`. The service
|
||||
now automatically flushes the current audio context and opens a fresh one
|
||||
when these settings change.
|
||||
(PR [#4220](https://github.com/pipecat-ai/pipecat/pull/4220))
|
||||
|
||||
- Fixed duplicate LLM replies that could occur when multiple async function
|
||||
call results arrived while an LLM request was already queued.
|
||||
(PR [#4230](https://github.com/pipecat-ai/pipecat/pull/4230))
|
||||
|
||||
- Fixed undefined `_warn_deprecated_param` calls in `OpenAIRealtimeLLMService`
|
||||
and `GrokRealtimeLLMService` for the deprecated `session_properties` init
|
||||
parameter.
|
||||
(PR [#4232](https://github.com/pipecat-ai/pipecat/pull/4232))
|
||||
|
||||
- Fixed Gemini Live bot hanging after a session resumption reconnect. Audio,
|
||||
video, and text input were silently dropped after reconnecting because the
|
||||
internal `_ready_for_realtime_input` flag was not being reset.
|
||||
(PR [#4242](https://github.com/pipecat-ai/pipecat/pull/4242))
|
||||
|
||||
- Fixed `VADController` getting stuck in the `SPEAKING` state when audio frames
|
||||
stop arriving mid-speech (e.g. user mutes mic). A new `audio_idle_timeout`
|
||||
parameter (default 1s, set to 0 to disable) forces a transition back to
|
||||
`QUIET` and emits `on_speech_stopped` when no audio is received while
|
||||
speaking.
|
||||
(PR [#4244](https://github.com/pipecat-ai/pipecat/pull/4244))
|
||||
|
||||
- Fixed `PipelineRunner._gc_collect()` blocking the event loop by running
|
||||
`gc.collect()` synchronously. Now offloaded via `asyncio.to_thread` to avoid
|
||||
stalling concurrent pipeline tasks.
|
||||
(PR [#4255](https://github.com/pipecat-ai/pipecat/pull/4255))
|
||||
|
||||
- Fixed `ElevenLabsTTSService` incorrectly enabling `auto_mode` when using
|
||||
`TextAggregationMode.TOKEN`. Auto mode disables server-side buffering and is
|
||||
designed for complete sentences — enabling it with token streaming degraded
|
||||
speech quality. The default is now derived automatically from the aggregation
|
||||
strategy: `auto_mode=True` for `SENTENCE`, `auto_mode=False` for `TOKEN`.
|
||||
Callers can still override by passing `auto_mode` explicitly.
|
||||
(PR [#4265](https://github.com/pipecat-ai/pipecat/pull/4265))
|
||||
|
||||
- Fixed `ValueError: write to closed file` during pipeline shutdown when
|
||||
observers were active. Observer proxy tasks are now cancelled before observer
|
||||
resources are cleaned up.
|
||||
(PR [#4267](https://github.com/pipecat-ai/pipecat/pull/4267))
|
||||
|
||||
- Fixed delayed turn completion when STT transcripts arrive after the p99
|
||||
timeout. Previously, a late transcript (beyond the p99 window) would fall
|
||||
through to the 5-second `user_turn_stop_timeout` fallback. Now the turn stop
|
||||
triggers immediately when the late transcript arrives.
|
||||
(PR [#4283](https://github.com/pipecat-ai/pipecat/pull/4283))
|
||||
|
||||
- Fixed `ElevenLabsTTSService` ignoring `enable_logging=False` and
|
||||
`enable_ssml_parsing=False`. The truthy check treated `False` the same as
|
||||
`None` (both skipped), and Python's `str(False)` produced `"False"` instead
|
||||
of the lowercase `"false"` expected by the API.
|
||||
(PR [#4293](https://github.com/pipecat-ai/pipecat/pull/4293))
|
||||
|
||||
- Fixed `on_assistant_turn_stopped` not resetting internal state when the LLM
|
||||
returned no text tokens. Added `interrupted` field to
|
||||
`AssistantTurnStoppedMessage` to indicate whether the assistant turn was
|
||||
interrupted.
|
||||
(PR [#4294](https://github.com/pipecat-ai/pipecat/pull/4294))
|
||||
|
||||
- Fixed `LLMContextSummarizer` failing with "No messages to summarize" when
|
||||
using `system_instruction` instead of a system-role message at the start of
|
||||
the context. The summarizer previously scanned the entire context for the
|
||||
first system message, which could match a mid-conversation injection (e.g.
|
||||
idle notifications) instead of the initial prompt, causing the summarization
|
||||
range to be empty.
|
||||
(PR [#4295](https://github.com/pipecat-ai/pipecat/pull/4295))
|
||||
|
||||
## [0.0.108] - 2026-03-27
|
||||
|
||||
### Added
|
||||
|
||||
- Added `SarvamLLMService` with support for `sarvam-30b`, `sarvam-30b-16k`,
|
||||
`sarvam-105b` and `sarvam-105b-32k`.
|
||||
(PR [#3978](https://github.com/pipecat-ai/pipecat/pull/3978))
|
||||
|
||||
- Added `on_turn_context_created(context_id)` hook to `TTSService`. Override
|
||||
this to perform provider-specific setup (e.g. eagerly opening a server-side
|
||||
context) before text starts flowing. Called each time a new turn context ID
|
||||
is created.
|
||||
(PR [#4013](https://github.com/pipecat-ai/pipecat/pull/4013))
|
||||
|
||||
- Added `XAIHttpTTSService` for text-to-speech using xAI's HTTP TTS API.
|
||||
(PR [#4031](https://github.com/pipecat-ai/pipecat/pull/4031))
|
||||
|
||||
- Added support for "developer" role messages in conversation context across
|
||||
all LLM adapters. For non-OpenAI services (Anthropic, Google, AWS Bedrock),
|
||||
"developer" messages are converted to "user" messages (use
|
||||
`system_instruction` to set the system instruction). For OpenAI services,
|
||||
"developer" messages pass through in conversation history. For the Responses
|
||||
API, they are kept as "developer" role (matching the existing "system" →
|
||||
"developer" conversion).
|
||||
(PR [#4089](https://github.com/pipecat-ai/pipecat/pull/4089))
|
||||
|
||||
- Added `SmallestTTSService`, a WebSocket-based TTS service integration with
|
||||
Smallest AI's Waves API. Supports the Lightning v2 and v3.1 models with
|
||||
configurable voice, language, speed, consistency, similarity, and enhancement
|
||||
settings.
|
||||
(PR [#4092](https://github.com/pipecat-ai/pipecat/pull/4092))
|
||||
|
||||
- Added warnings in turn stop strategies when `VADParams.stop_secs` differs
|
||||
from the recommended default (0.2s) or when `stop_secs >= STT p99 latency`,
|
||||
which collapses the STT wait timeout to 0s and may cause delayed turn
|
||||
detection. The warnings guide developers to re-run the
|
||||
[stt-benchmark](https://github.com/pipecat-ai/stt-benchmark) with their VAD
|
||||
settings.
|
||||
(PR [#4115](https://github.com/pipecat-ai/pipecat/pull/4115))
|
||||
|
||||
- Added `domain` parameter to `AssemblyAISTTSettings` for specialized
|
||||
recognition modes such as Medical Mode (`domain="medical-v1"`).
|
||||
(PR [#4117](https://github.com/pipecat-ai/pipecat/pull/4117))
|
||||
|
||||
- Added `NovitaLLMService` for using Novita AI's LLM models via their
|
||||
OpenAI-compatible API.
|
||||
(PR [#4119](https://github.com/pipecat-ai/pipecat/pull/4119))
|
||||
|
||||
- Added `cleanup()` method to `VADAnalyzer` and `VADController` so VAD analyzer
|
||||
resources are properly released when no longer needed. Custom `VADAnalyzer`
|
||||
subclasses can override `cleanup()` to free any held resources.
|
||||
(PR [#4120](https://github.com/pipecat-ai/pipecat/pull/4120))
|
||||
|
||||
- Added `on_end_of_turn` event handler to `AssemblyAISTTService`. This fires
|
||||
after the final transcript is pushed, providing a reliable hook for
|
||||
end-of-turn logic that doesn't race with `TranscriptionFrame`. Works in both
|
||||
Pipecat and AssemblyAI turn detection modes.
|
||||
(PR [#4128](https://github.com/pipecat-ai/pipecat/pull/4128))
|
||||
|
||||
- Added `DeepgramFluxSageMakerSTTService` for running Deepgram Flux
|
||||
speech-to-text on AWS SageMaker endpoints. Use with
|
||||
`ExternalUserTurnStrategies` to take advantage of Flux's turn detection.
|
||||
(PR [#4143](https://github.com/pipecat-ai/pipecat/pull/4143))
|
||||
|
||||
- Added `Mem0MemoryService.get_memories()` convenience method for retrieving
|
||||
all stored memories outside the pipeline (e.g. to build a personalized
|
||||
greeting at connection time). This avoids the need to manually handle client
|
||||
type branching, filter construction, and async wrapping.
|
||||
(PR [#4156](https://github.com/pipecat-ai/pipecat/pull/4156))
|
||||
|
||||
### Changed
|
||||
|
||||
- Added context prewarming path for `InworldTTSService` to improve first audio
|
||||
latency.
|
||||
(PR [#4013](https://github.com/pipecat-ai/pipecat/pull/4013))
|
||||
|
||||
- Added `KrispVivaVadAnalyzer` for Voice Activity Detection using the Krisp
|
||||
VIVA SDK (requires `krisp_audio`).
|
||||
(PR [#4022](https://github.com/pipecat-ai/pipecat/pull/4022))
|
||||
|
||||
- Modified `InworldTTSService` to close context at end of turn instead of
|
||||
relying on idle timeout.
|
||||
(PR [#4028](https://github.com/pipecat-ai/pipecat/pull/4028))
|
||||
|
||||
- Added Gemini 3 support to the Gemini Live service.
|
||||
(PR [#4078](https://github.com/pipecat-ai/pipecat/pull/4078))
|
||||
|
||||
- `TTSService`: the default `stop_frame_timeout_s` (idle time before an
|
||||
automatic `TTSStoppedFrame` is pushed when `push_stop_frames=True`) has
|
||||
changed from `2.0` to `3.0` seconds.
|
||||
(PR [#4084](https://github.com/pipecat-ai/pipecat/pull/4084))
|
||||
|
||||
- ⚠️ `GeminiLLMAdapter` now only treats `messages[0]` as the initial system
|
||||
message, matching all other adapters. Previously it searched for the first
|
||||
"system" message anywhere in the conversation history. A "system" message
|
||||
appearing later in the list will now be converted to "user" instead of being
|
||||
extracted as the system instruction.
|
||||
(PR [#4089](https://github.com/pipecat-ai/pipecat/pull/4089))
|
||||
|
||||
- Fixed `InworldTtsService` to fallback to full text when TTS timestamps are
|
||||
not received.
|
||||
(PR [#4113](https://github.com/pipecat-ai/pipecat/pull/4113))
|
||||
|
||||
- ⚠️ Realtime services (Gemini Live, OpenAI Realtime, Grok Realtime, Nova
|
||||
Sonic) now prefer `system_instruction` from service settings over an initial
|
||||
system message in the LLM context, matching the behavior of non-realtime
|
||||
services. Previously, context-provided system instructions took precedence. A
|
||||
warning is now logged when both are set.
|
||||
(PR [#4130](https://github.com/pipecat-ai/pipecat/pull/4130))
|
||||
|
||||
- Bumped `nvidia-riva-client` minimum version to `>=2.25.1`.
|
||||
(PR [#4136](https://github.com/pipecat-ai/pipecat/pull/4136))
|
||||
|
||||
- Upgraded `protobuf` from 5.x to 6.x (`>=6.31.1,<7`).
|
||||
(PR [#4136](https://github.com/pipecat-ai/pipecat/pull/4136))
|
||||
|
||||
- Unrecognized language strings (e.g. Deepgram's `"multi"`) no longer produce a
|
||||
warning at startup. The log message has been downgraded to debug level since
|
||||
these are valid service-specific values that are passed through correctly.
|
||||
(PR [#4137](https://github.com/pipecat-ai/pipecat/pull/4137))
|
||||
|
||||
- `GrokLLMService` and `GrokRealtimeLLMService` now live in the
|
||||
`pipecat.services.xai` module alongside `XAIHttpTTSService`, since all three
|
||||
use the same xAI API. Update imports from `pipecat.services.grok.*` to
|
||||
`pipecat.services.xai.*` (e.g. `from pipecat.services.xai.llm import
|
||||
GrokLLMService`).
|
||||
(PR [#4142](https://github.com/pipecat-ai/pipecat/pull/4142))
|
||||
|
||||
- ⚠️ Bumped `mem0ai` dependency from `~=0.1.94` to `>=1.0.8,<2`. Users of the
|
||||
`mem0` extra will need to update their mem0ai package.
|
||||
(PR [#4156](https://github.com/pipecat-ai/pipecat/pull/4156))
|
||||
|
||||
### Deprecated
|
||||
|
||||
- `pipecat.services.grok.llm`, `pipecat.services.grok.realtime.llm`, and
|
||||
`pipecat.services.grok.realtime.events` are deprecated. The old import paths
|
||||
still work but emit a `DeprecationWarning`; use `pipecat.services.xai.llm`,
|
||||
`pipecat.services.xai.realtime.llm`, and
|
||||
`pipecat.services.xai.realtime.events` instead.
|
||||
(PR [#4142](https://github.com/pipecat-ai/pipecat/pull/4142))
|
||||
|
||||
### Removed
|
||||
|
||||
- ⚠️ `TTSService.add_word_timestamps()` no longer supports the `"Reset"` and
|
||||
`"TTSStoppedFrame"` sentinel strings. If you have a custom TTS service that
|
||||
called `await self.add_word_timestamps([("Reset", 0)])` or `await
|
||||
self.add_word_timestamps([("TTSStoppedFrame", 0), ("Reset", 0)], ctx_id)`,
|
||||
replace them with `await self.append_to_audio_context(ctx_id,
|
||||
TTSStoppedFrame(context_id=ctx_id))` and let `_handle_audio_context` manage
|
||||
the word-timestamp reset automatically.
|
||||
(PR [#4145](https://github.com/pipecat-ai/pipecat/pull/4145))
|
||||
|
||||
- Removed `SambaNovaSTTService`. SambaNova no longer offers speech-to-text
|
||||
audio models. Use another STT provider instead.
|
||||
(PR [#4154](https://github.com/pipecat-ai/pipecat/pull/4154))
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed Gemini Live (`GoogleGeminiLiveLLMService`) not honoring
|
||||
`settings.system_instruction`. The system instruction was being read from a
|
||||
deprecated constructor parameter instead of the settings object, causing it
|
||||
to be silently ignored.
|
||||
(PR [#4089](https://github.com/pipecat-ai/pipecat/pull/4089))
|
||||
|
||||
- Fixed `AWSBedrockLLMAdapter` sending an empty message list to the API when
|
||||
the only message in context was a system message. The lone system message is
|
||||
now converted to "user" role instead of being extracted, matching the
|
||||
existing Anthropic adapter behavior.
|
||||
(PR [#4089](https://github.com/pipecat-ai/pipecat/pull/4089))
|
||||
|
||||
- Fixed Gemini Live pipeline hanging indefinitely when an `EndFrame` was
|
||||
deferred while waiting for the bot to finish responding and `turn_complete`
|
||||
never arrived. As a possible root-cause fix, `turn_complete` messages are now
|
||||
handled even if they lack `usage_metadata`. As a fallback, the deferred
|
||||
`EndFrame` now has a 30-second safety timeout.
|
||||
(PR [#4125](https://github.com/pipecat-ai/pipecat/pull/4125))
|
||||
|
||||
- Fixed ElevenLabs WebSocket disconnections (1008 "Maximum simultaneous
|
||||
contexts exceeded") caused by rapid user interruptions. When interruptions
|
||||
arrived before any TTS text was generated, phantom contexts were created on
|
||||
the ElevenLabs server that were never closed, eventually exceeding the
|
||||
5-context limit.
|
||||
(PR [#4126](https://github.com/pipecat-ai/pipecat/pull/4126))
|
||||
|
||||
- Fixed the final sentence being dropped from the conversation context when
|
||||
using RTVI text input with non-word-timestamp TTS services. The
|
||||
`LLMFullResponseEndFrame` was racing ahead of the last `TTSTextFrame`,
|
||||
causing the `LLMAssistantAggregator` to finalize the context before the final
|
||||
sentence arrived.
|
||||
(PR [#4127](https://github.com/pipecat-ai/pipecat/pull/4127))
|
||||
|
||||
- Fixed audio crackling and popping in recordings when both user and bot are
|
||||
speaking. `AudioBufferProcessor` no longer injects silence into a track's
|
||||
buffer while that track is actively producing audio, preventing mid-utterance
|
||||
interruptions in the recorded output.
|
||||
(PR [#4135](https://github.com/pipecat-ai/pipecat/pull/4135))
|
||||
|
||||
- Fixed websocket TTS word timestamps so interrupted contexts cannot leak stale
|
||||
words or backward PTS values into later turns.
|
||||
(PR [#4145](https://github.com/pipecat-ai/pipecat/pull/4145))
|
||||
|
||||
- Fixed a race condition in `InterruptibleTTSService` where, if `run_tts` had
|
||||
been invoked but `BotStartedSpeakingFrame` had not yet been received, a user
|
||||
interruption could allow stale audio to leak through.
|
||||
(PR [#4145](https://github.com/pipecat-ai/pipecat/pull/4145))
|
||||
|
||||
- Fixed Gemini Live local VAD mode (`GeminiVADParams(disabled=True)` with
|
||||
external VAD) not working. The bot now correctly detects user speech and
|
||||
signals turn boundaries to the Gemini API.
|
||||
(PR [#4146](https://github.com/pipecat-ai/pipecat/pull/4146))
|
||||
|
||||
- Fixed Gemini Live message handling to process all `server_content` fields
|
||||
independently. Gemini 3.x can bundle multiple fields (e.g. `model_turn` and
|
||||
`output_transcription`) on the same message, but the previous `elif` chain
|
||||
only processed the first match, silently dropping the rest.
|
||||
(PR [#4147](https://github.com/pipecat-ai/pipecat/pull/4147))
|
||||
|
||||
- Fixed `ServiceSwitcher` with `ServiceSwitcherStrategyFailover` incorrectly
|
||||
triggering failover when `ErrorFrame`s from other pipeline stages (e.g. TTS)
|
||||
propagated upstream through the switcher. Previously, any non-fatal error
|
||||
passing through would be misattributed to the active service and trigger an
|
||||
unwanted service switch. Now only errors originating from the switcher's own
|
||||
managed services trigger failover.
|
||||
(PR [#4149](https://github.com/pipecat-ai/pipecat/pull/4149))
|
||||
|
||||
- Fixed `LiveKitOutputTransport` not clearing the `rtc.AudioSource` internal
|
||||
buffer on interruption, causing the bot to continue speaking for several
|
||||
seconds after being interrupted.
|
||||
(PR [#4151](https://github.com/pipecat-ai/pipecat/pull/4151))
|
||||
|
||||
- Fixed a crash in OpenAI LLM processing when the provider returns
|
||||
`chunk.choices[0].delta.audio = None`, which caused `'NoneType' object has no
|
||||
attribute 'get'` errors during audio transcript handling.
|
||||
(PR [#4152](https://github.com/pipecat-ai/pipecat/pull/4152))
|
||||
|
||||
- Fixed error floods in `DeepgramSTTService` when the WebSocket connection
|
||||
drops. With Deepgram SDK 6.x, `send_media()` raises exceptions on a dead
|
||||
connection instead of silently failing, causing every queued audio frame to
|
||||
log an error. Now `send_media()` failures are caught gracefully — a single
|
||||
warning is logged and audio frames are skipped until the existing
|
||||
reconnection logic restores the connection.
|
||||
(PR [#4153](https://github.com/pipecat-ai/pipecat/pull/4153))
|
||||
|
||||
- `Mem0MemoryService` no longer blocks the event loop during memory storage and
|
||||
retrieval. All Mem0 API calls now run in a background thread, and message
|
||||
storage is fire-and-forget so it doesn't delay downstream processing.
|
||||
(PR [#4156](https://github.com/pipecat-ai/pipecat/pull/4156))
|
||||
|
||||
- Fixed `Mem0MemoryService` failing to store messages when the context
|
||||
contained system or developer role messages. The Mem0 API only accepts user
|
||||
and assistant roles, so other roles are now filtered out before storing.
|
||||
(PR [#4156](https://github.com/pipecat-ai/pipecat/pull/4156))
|
||||
|
||||
- Added missing `on_dtmf_event` callback to `LemonSliceTransportClient.setup()`
|
||||
`DailyCallbacks` construction, fixing a `ValidationError` at pipeline setup
|
||||
time.
|
||||
(PR [#4161](https://github.com/pipecat-ai/pipecat/pull/4161))
|
||||
|
||||
- Fixed an issue in `InworldTTSService` where, in cases of fast interruption,
|
||||
we would continue receiving audio from the previous context.
|
||||
(PR [#4167](https://github.com/pipecat-ai/pipecat/pull/4167))
|
||||
|
||||
- Fixed a word timestamp interleaving issue in `InworldTTSService` when
|
||||
processing multiple sentences.
|
||||
(PR [#4167](https://github.com/pipecat-ai/pipecat/pull/4167))
|
||||
|
||||
- Fixed duplicate `TTSStoppedFrame` being pushed in TTS services using
|
||||
`push_stop_frames=True`. When the stop-frame timeout fired, a second
|
||||
`TTSStoppedFrame` could be pushed after the normal one at context completion.
|
||||
(PR [#4172](https://github.com/pipecat-ai/pipecat/pull/4172))
|
||||
|
||||
- ⚠️ Fixed `DeepgramSTTService` compatibility with deepgram-sdk 6.1.0. The SDK
|
||||
now requires explicit message objects for `send_keep_alive()`,
|
||||
`send_close_stream()`, and `send_finalize()`. The minimum deepgram-sdk
|
||||
version is now 6.1.0.
|
||||
(PR [#4174](https://github.com/pipecat-ai/pipecat/pull/4174))
|
||||
|
||||
- Fixed RTVI events not being delivered to clients when using WebSocket
|
||||
transports. `ProtobufFrameSerializer` now sets `ignore_rtvi_messages=False`
|
||||
by default.
|
||||
(PR [#4176](https://github.com/pipecat-ai/pipecat/pull/4176))
|
||||
|
||||
- Fixed a timing issue where turn detection timer tasks (idle controller,
|
||||
speech timeout, turn analyzer, and turn completion) could miss their first
|
||||
tick because the newly created asyncio task was not yet scheduled when the
|
||||
caller continued.
|
||||
(PR [#4183](https://github.com/pipecat-ai/pipecat/pull/4183))
|
||||
|
||||
- Fixed `FastAPIWebsocketTransport` intermittently hanging on shutdown when the
|
||||
remote side (e.g. Twilio) disconnects while audio is being sent. A race
|
||||
condition between the send and receive paths could cause the
|
||||
`on_client_disconnected` callback to be skipped, leaving the pipeline waiting
|
||||
for a disconnect signal that never came.
|
||||
(PR [#4186](https://github.com/pipecat-ai/pipecat/pull/4186))
|
||||
|
||||
### Performance
|
||||
|
||||
- `RimeTTSService` now handles Rime's `done` WebSocket message to complete
|
||||
audio contexts immediately, eliminating the 3-second idle timeout that
|
||||
previously added latency at the end of each utterance.
|
||||
(PR [#4172](https://github.com/pipecat-ai/pipecat/pull/4172))
|
||||
|
||||
## [0.0.107] - 2026-03-23
|
||||
|
||||
### Added
|
||||
|
||||
@@ -1,62 +0,0 @@
|
||||
# Changelog
|
||||
|
||||
All notable changes to the **<project name>** SDK will be documented in this file.
|
||||
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
Please make sure to add your changes to the appropriate categories:
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
|
||||
<!-- for new functionality -->
|
||||
|
||||
- n/a
|
||||
|
||||
### Changed
|
||||
|
||||
<!-- for changed functionality -->
|
||||
|
||||
- n/a
|
||||
|
||||
### Deprecated
|
||||
|
||||
<!-- for soon-to-be removed functionality -->
|
||||
|
||||
- n/a
|
||||
|
||||
### Removed
|
||||
|
||||
<!-- for removed functionality -->
|
||||
|
||||
- n/a
|
||||
|
||||
### Fixed
|
||||
|
||||
<!-- for fixed bugs -->
|
||||
|
||||
- n/a
|
||||
|
||||
### Performance
|
||||
|
||||
<!-- for performance-relevant changes -->
|
||||
|
||||
- n/a
|
||||
|
||||
### Security
|
||||
|
||||
<!-- for security-relevant changes -->
|
||||
|
||||
- n/a
|
||||
|
||||
### Other
|
||||
|
||||
<!-- for everything else -->
|
||||
|
||||
- n/a
|
||||
|
||||
## [0.1.0] - YYYY-MM-DD
|
||||
|
||||
Initial release.
|
||||
@@ -10,7 +10,7 @@ Pipecat is an open-source Python framework for building real-time voice and mult
|
||||
|
||||
```bash
|
||||
# Setup development environment
|
||||
uv sync --group dev --all-extras --no-extra gstreamer --no-extra krisp
|
||||
uv sync --group dev --all-extras --no-extra gstreamer
|
||||
|
||||
# Install pre-commit hooks
|
||||
uv run pre-commit install
|
||||
|
||||
@@ -23,7 +23,7 @@ Create your integration following the patterns and examples shown in the "Integr
|
||||
Your repository must contain these components:
|
||||
|
||||
- **Source code** - Complete implementation following Pipecat patterns
|
||||
- **Foundational example** - Single file example showing basic usage (see [Pipecat examples](https://github.com/pipecat-ai/pipecat/tree/main/examples/foundational))
|
||||
- **Foundational example** - Single file example showing basic usage (see [Pipecat examples](https://github.com/pipecat-ai/pipecat/tree/main/examples))
|
||||
- **README.md** - Must include:
|
||||
- Introduction and explanation of your integration
|
||||
- Installation instructions
|
||||
@@ -225,6 +225,17 @@ Vision services process images and provide analysis such as descriptions, object
|
||||
|
||||
### Naming Conventions
|
||||
|
||||
#### Package and Repository Naming
|
||||
|
||||
Use the `pipecat-{vendor}` naming convention for your PyPI package and repository:
|
||||
|
||||
- `pipecat-{vendor}` — for single-service integrations (e.g., `pipecat-deepdub`)
|
||||
- `pipecat-{vendor}-{type}` — when a vendor offers multiple service types (e.g., `pipecat-upliftai-stt`, `pipecat-upliftai-tts`)
|
||||
|
||||
This convention makes community packages easily discoverable via PyPI search and clearly identifies them as part of the Pipecat ecosystem.
|
||||
|
||||
#### Class Naming
|
||||
|
||||
- **STT:** `VendorSTTService`
|
||||
- **LLM:** `VendorLLMService`
|
||||
- **TTS:**
|
||||
@@ -406,8 +417,9 @@ Use Pipecat's tracing decorators:
|
||||
|
||||
### Packaging and Distribution
|
||||
|
||||
- Name your package `pipecat-{vendor}` (see [Naming Conventions](#naming-conventions))
|
||||
- Use [uv](https://docs.astral.sh/uv/) for packaging (encouraged)
|
||||
- Consider releasing to PyPI for easier installation
|
||||
- Publish to PyPI for easier installation
|
||||
- Follow semantic versioning principles
|
||||
- Maintain a changelog
|
||||
|
||||
|
||||
41
README.md
@@ -8,7 +8,7 @@
|
||||
|
||||
**Pipecat** is an open-source Python framework for building real-time voice and multimodal conversational agents. Orchestrate audio and video, AI services, different transports, and conversation pipelines effortlessly—so you can focus on what makes your agent unique.
|
||||
|
||||
> Want to dive right in? Try the [quickstart](https://docs.pipecat.ai/getting-started/quickstart).
|
||||
> Want to dive right in? Run `pipecat init quickstart` or follow the [quickstart guide](https://docs.pipecat.ai/getting-started/quickstart).
|
||||
|
||||
## 🚀 What You Can Build
|
||||
|
||||
@@ -79,26 +79,26 @@ Catch new features, interviews, and how-tos on our [Pipecat TV](https://www.yout
|
||||
<a href="https://github.com/pipecat-ai/pipecat-examples/tree/main/simple-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat-examples/main/simple-chatbot/image.png" width="400" /></a>
|
||||
<a href="https://github.com/pipecat-ai/pipecat-examples/tree/main/storytelling-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat-examples/main/storytelling-chatbot/image.png" width="400" /></a>
|
||||
<br/>
|
||||
<a href="https://github.com/pipecat-ai/pipecat-examples/tree/main/translation-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat-examples/main/translation-chatbot/image.png" width="400" /></a>
|
||||
<a href="https://github.com/pipecat-ai/pipecat/blob/main/examples/foundational/12-describe-video.py"><img src="https://github.com/pipecat-ai/pipecat/blob/main/examples/foundational/assets/moondream.png" width="400" /></a>
|
||||
<a href="https://github.com/pipecat-ai/pipecat-examples/tree/main/daily-multi-translation"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat-examples/main/daily-multi-translation/image.png" width="400" /></a>
|
||||
<a href="https://github.com/pipecat-ai/pipecat/blob/main/examples/vision/vision-moondream.py"><img src="https://github.com/pipecat-ai/pipecat/blob/main/examples/assets/moondream.png" width="400" /></a>
|
||||
</p>
|
||||
|
||||
## 🧩 Available services
|
||||
|
||||
| Category | Services |
|
||||
| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Gradium](https://docs.pipecat.ai/server/services/stt/gradium), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Sarvam](https://docs.pipecat.ai/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/server/services/llm/mistral), [Novita](https://docs.pipecat.ai/server/services/llm/novita), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nvidia), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova), [Sarvam](https://docs.pipecat.ai/server/services/llm/sarvam), [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
|
||||
| Text-to-Speech | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Camb AI](https://docs.pipecat.ai/server/services/tts/camb), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Gradium](https://docs.pipecat.ai/server/services/tts/gradium), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Hume](https://docs.pipecat.ai/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [Resemble](https://docs.pipecat.ai/server/services/tts/resemble), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [Smallest](https://docs.pipecat.ai/server/services/tts/smallest), [Speechmatics](https://docs.pipecat.ai/server/services/tts/speechmatics), [xAI](https://docs.pipecat.ai/server/services/tts/xai), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
|
||||
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [Grok Voice Agent](https://docs.pipecat.ai/server/services/s2s/grok), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai), [Ultravox](https://docs.pipecat.ai/server/services/s2s/ultravox), |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local |
|
||||
| Serializers | [Exotel](https://docs.pipecat.ai/server/utilities/serializers/exotel), [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx), [Vonage](https://docs.pipecat.ai/server/utilities/serializers/vonage) |
|
||||
| Video | [HeyGen](https://docs.pipecat.ai/server/services/video/heygen), [LemonSlice](https://docs.pipecat.ai/server/services/video/lemonslice), [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
|
||||
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
|
||||
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/google-imagen), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
|
||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [ai-coustics](https://docs.pipecat.ai/server/utilities/audio/aic-filter) |
|
||||
| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
|
||||
| Community | [Browse community integrations →](https://docs.pipecat.ai/server/services/community-integrations) |
|
||||
| Category | Services |
|
||||
| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Gradium](https://docs.pipecat.ai/server/services/stt/gradium), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [Mistral](https://docs.pipecat.ai/server/services/stt/mistral), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Sarvam](https://docs.pipecat.ai/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/server/services/llm/mistral), [Nebius](https://docs.pipecat.ai/server/services/llm/nebius), [Novita](https://docs.pipecat.ai/server/services/llm/novita), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nvidia), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova), [Sarvam](https://docs.pipecat.ai/server/services/llm/sarvam), [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
|
||||
| Text-to-Speech | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Camb AI](https://docs.pipecat.ai/server/services/tts/camb), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Gradium](https://docs.pipecat.ai/server/services/tts/gradium), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Hume](https://docs.pipecat.ai/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [Kokoro](https://docs.pipecat.ai/server/services/tts/kokoro), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Mistral](https://docs.pipecat.ai/server/services/tts/mistral), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [Resemble](https://docs.pipecat.ai/server/services/tts/resemble), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [Smallest](https://docs.pipecat.ai/server/services/tts/smallest), [Speechmatics](https://docs.pipecat.ai/server/services/tts/speechmatics), [xAI](https://docs.pipecat.ai/server/services/tts/xai), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
|
||||
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [Grok Voice Agent](https://docs.pipecat.ai/server/services/s2s/grok), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai), [Ultravox](https://docs.pipecat.ai/server/services/s2s/ultravox), |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [LiveKit (WebRTC)](https://docs.pipecat.ai/server/services/transport/livekit), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), [WhatsApp](https://docs.pipecat.ai/server/services/transport/whatsapp), Local |
|
||||
| Serializers | [Exotel](https://docs.pipecat.ai/server/services/serializers/exotel), [Genesys](https://docs.pipecat.ai/server/services/serializers/genesys), [Plivo](https://docs.pipecat.ai/server/services/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/services/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/services/serializers/telnyx), [Vonage](https://docs.pipecat.ai/server/services/serializers/vonage) |
|
||||
| Video | [HeyGen](https://docs.pipecat.ai/server/services/video/heygen), [LemonSlice](https://docs.pipecat.ai/server/services/transport/lemonslice), [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
|
||||
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
|
||||
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/google-imagen), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
|
||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp Viva](https://docs.pipecat.ai/guides/features/krisp-viva), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [ai-coustics](https://docs.pipecat.ai/server/utilities/audio/aic-filter), [RNNoise](https://docs.pipecat.ai/server/utilities/audio/rnnoise-filter) |
|
||||
| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
|
||||
| Community | [Browse community integrations →](https://docs.pipecat.ai/server/services/community-integrations) |
|
||||
|
||||
📚 [View full services documentation →](https://docs.pipecat.ai/server/services/supported-services)
|
||||
|
||||
@@ -142,15 +142,15 @@ You can get started with Pipecat running on your local machine, then move your a
|
||||
|
||||
## 🧪 Code examples
|
||||
|
||||
- [Foundational](https://github.com/pipecat-ai/pipecat/tree/main/examples/foundational) — small snippets that build on each other, introducing one or two concepts at a time
|
||||
- [Foundational](https://github.com/pipecat-ai/pipecat/tree/main/examples) — small snippets that build on each other, introducing one or two concepts at a time
|
||||
- [Example apps](https://github.com/pipecat-ai/pipecat-examples) — complete applications that you can use as starting points for development
|
||||
|
||||
## 🛠️ Contributing to the framework
|
||||
|
||||
### Prerequisites
|
||||
|
||||
**Minimum Python Version:** 3.10
|
||||
**Recommended Python Version:** 3.12
|
||||
**Minimum Python Version:** 3.11
|
||||
**Recommended Python Version:** >= 3.12
|
||||
|
||||
### Setup Steps
|
||||
|
||||
@@ -166,7 +166,6 @@ You can get started with Pipecat running on your local machine, then move your a
|
||||
```bash
|
||||
uv sync --group dev --all-extras \
|
||||
--no-extra gstreamer \
|
||||
--no-extra krisp \
|
||||
--no-extra local \
|
||||
```
|
||||
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
- Added `SarvamLLMService` with support for `sarvam-30b`, `sarvam-30b-16k`, `sarvam-105b` and `sarvam-105b-32k`
|
||||
@@ -1 +0,0 @@
|
||||
- Added `on_turn_context_created(context_id)` hook to `TTSService`. Override this to perform provider-specific setup (e.g. eagerly opening a server-side context) before text starts flowing. Called each time a new turn context ID is created.
|
||||
@@ -1 +0,0 @@
|
||||
- Added context prewarming path for `InworldTTSService` to improve first audio latency
|
||||
@@ -1 +0,0 @@
|
||||
- Added `KrispVivaVadAnalyzer` for Voice Activity Detection using the Krisp VIVA SDK (requires `krisp_audio`).
|
||||
@@ -1 +0,0 @@
|
||||
- Modeified `InworldTTSService` to close context at end of turn instead of relying on idle timeout
|
||||
@@ -1 +0,0 @@
|
||||
- Added `XAIHttpTTSService` for text-to-speech using xAI's HTTP TTS API.
|
||||
@@ -1 +0,0 @@
|
||||
- Added Gemini 3 support to the Gemini Live service.
|
||||
@@ -1 +0,0 @@
|
||||
- `TTSService`: the default `stop_frame_timeout_s` (idle time before an automatic `TTSStoppedFrame` is pushed when `push_stop_frames=True`) has changed from `2.0` to `3.0` seconds.
|
||||
@@ -1 +0,0 @@
|
||||
- Added support for "developer" role messages in conversation context across all LLM adapters. For non-OpenAI services (Anthropic, Google, AWS Bedrock), "developer" messages are converted to "user" messages (use `system_instruction` to set the system instruction). For OpenAI services, "developer" messages pass through in conversation history. For the Responses API, they are kept as "developer" role (matching the existing "system" → "developer" conversion).
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ `GeminiLLMAdapter` now only treats `messages[0]` as the initial system message, matching all other adapters. Previously it searched for the first "system" message anywhere in the conversation history. A "system" message appearing later in the list will now be converted to "user" instead of being extracted as the system instruction.
|
||||
@@ -1 +0,0 @@
|
||||
- Fixed Gemini Live (`GoogleGeminiLiveLLMService`) not honoring `settings.system_instruction`. The system instruction was being read from a deprecated constructor parameter instead of the settings object, causing it to be silently ignored.
|
||||
@@ -1 +0,0 @@
|
||||
- Fixed `AWSBedrockLLMAdapter` sending an empty message list to the API when the only message in context was a system message. The lone system message is now converted to "user" role instead of being extracted, matching the existing Anthropic adapter behavior.
|
||||
@@ -1 +0,0 @@
|
||||
- Added `SmallestTTSService`, a WebSocket-based TTS service integration with Smallest AI's Waves API. Supports the Lightning v2 and v3.1 models with configurable voice, language, speed, consistency, similarity, and enhancement settings.
|
||||
@@ -1 +0,0 @@
|
||||
- Fixed `InworldTtsService` to fallback to full text when TTS timestamps are not received
|
||||
@@ -1 +0,0 @@
|
||||
- Added warnings in turn stop strategies when `VADParams.stop_secs` differs from the recommended default (0.2s) or when `stop_secs >= STT p99 latency`, which collapses the STT wait timeout to 0s and may cause delayed turn detection. The warnings guide developers to re-run the [stt-benchmark](https://github.com/pipecat-ai/stt-benchmark) with their VAD settings.
|
||||
@@ -1 +0,0 @@
|
||||
- Added `domain` parameter to `AssemblyAISTTSettings` for specialized recognition modes such as Medical Mode (`domain="medical-v1"`).
|
||||
@@ -1 +0,0 @@
|
||||
- Added `NovitaLLMService` for using Novita AI's LLM models via their OpenAI-compatible API.
|
||||
@@ -1 +0,0 @@
|
||||
- Added `cleanup()` method to `VADAnalyzer` and `VADController` so VAD analyzer resources are properly released when no longer needed. Custom `VADAnalyzer` subclasses can override `cleanup()` to free any held resources.
|
||||
@@ -1 +0,0 @@
|
||||
- Fixed Gemini Live pipeline hanging indefinitely when an `EndFrame` was deferred while waiting for the bot to finish responding and `turn_complete` never arrived. As a possible root-cause fix, `turn_complete` messages are now handled even if they lack `usage_metadata`. As a fallback, the deferred `EndFrame` now has a 30-second safety timeout.
|
||||
@@ -1 +0,0 @@
|
||||
- Fixed ElevenLabs WebSocket disconnections (1008 "Maximum simultaneous contexts exceeded") caused by rapid user interruptions. When interruptions arrived before any TTS text was generated, phantom contexts were created on the ElevenLabs server that were never closed, eventually exceeding the 5-context limit.
|
||||
@@ -1 +0,0 @@
|
||||
- Fixed the final sentence being dropped from the conversation context when using RTVI text input with non-word-timestamp TTS services. The `LLMFullResponseEndFrame` was racing ahead of the last `TTSTextFrame`, causing the `LLMAssistantAggregator` to finalize the context before the final sentence arrived.
|
||||
@@ -1 +0,0 @@
|
||||
- Added `on_end_of_turn` event handler to `AssemblyAISTTService`. This fires after the final transcript is pushed, providing a reliable hook for end-of-turn logic that doesn't race with `TranscriptionFrame`. Works in both Pipecat and AssemblyAI turn detection modes.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Realtime services (Gemini Live, OpenAI Realtime, Grok Realtime, Nova Sonic) now prefer `system_instruction` from service settings over an initial system message in the LLM context, matching the behavior of non-realtime services. Previously, context-provided system instructions took precedence. A warning is now logged when both are set.
|
||||
@@ -1 +0,0 @@
|
||||
- Fixed audio crackling and popping in recordings when both user and bot are speaking. `AudioBufferProcessor` no longer injects silence into a track's buffer while that track is actively producing audio, preventing mid-utterance interruptions in the recorded output.
|
||||
@@ -1 +0,0 @@
|
||||
- Bumped `nvidia-riva-client` minimum version to `>=2.25.1`.
|
||||
@@ -1 +0,0 @@
|
||||
- Upgraded `protobuf` from 5.x to 6.x (`>=6.31.1,<7`).
|
||||
@@ -1 +0,0 @@
|
||||
- Unrecognized language strings (e.g. Deepgram's `"multi"`) no longer produce a warning at startup. The log message has been downgraded to debug level since these are valid service-specific values that are passed through correctly.
|
||||
@@ -1 +0,0 @@
|
||||
- `GrokLLMService` and `GrokRealtimeLLMService` now live in the `pipecat.services.xai` module alongside `XAIHttpTTSService`, since all three use the same xAI API. Update imports from `pipecat.services.grok.*` to `pipecat.services.xai.*` (e.g. `from pipecat.services.xai.llm import GrokLLMService`).
|
||||
@@ -1 +0,0 @@
|
||||
- `pipecat.services.grok.llm`, `pipecat.services.grok.realtime.llm`, and `pipecat.services.grok.realtime.events` are deprecated. The old import paths still work but emit a `DeprecationWarning`; use `pipecat.services.xai.llm`, `pipecat.services.xai.realtime.llm`, and `pipecat.services.xai.realtime.events` instead.
|
||||
@@ -1 +0,0 @@
|
||||
- Added `DeepgramFluxSageMakerSTTService` for running Deepgram Flux speech-to-text on AWS SageMaker endpoints. Use with `ExternalUserTurnStrategies` to take advantage of Flux's turn detection.
|
||||
@@ -1 +0,0 @@
|
||||
- Fixed websocket TTS word timestamps so interrupted contexts cannot leak stale words or backward PTS values into later turns.
|
||||
@@ -1 +0,0 @@
|
||||
- Fixed a race condition in `InterruptibleTTSService` where, if `run_tts` had been invoked but `BotStartedSpeakingFrame` had not yet been received, a user interruption could allow stale audio to leak through.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ `TTSService.add_word_timestamps()` no longer supports the `"Reset"` and `"TTSStoppedFrame"` sentinel strings. If you have a custom TTS service that called `await self.add_word_timestamps([("Reset", 0)])` or `await self.add_word_timestamps([("TTSStoppedFrame", 0), ("Reset", 0)], ctx_id)`, replace them with `await self.append_to_audio_context(ctx_id, TTSStoppedFrame(context_id=ctx_id))` and let `_handle_audio_context` manage the word-timestamp reset automatically.
|
||||
@@ -1 +0,0 @@
|
||||
- Fixed Gemini Live local VAD mode (`GeminiVADParams(disabled=True)` with external VAD) not working. The bot now correctly detects user speech and signals turn boundaries to the Gemini API.
|
||||
@@ -1 +0,0 @@
|
||||
- Fixed Gemini Live message handling to process all `server_content` fields independently. Gemini 3.x can bundle multiple fields (e.g. `model_turn` and `output_transcription`) on the same message, but the previous `elif` chain only processed the first match, silently dropping the rest.
|
||||
@@ -1 +0,0 @@
|
||||
- Fixed `ServiceSwitcher` with `ServiceSwitcherStrategyFailover` incorrectly triggering failover when `ErrorFrame`s from other pipeline stages (e.g. TTS) propagated upstream through the switcher. Previously, any non-fatal error passing through would be misattributed to the active service and trigger an unwanted service switch. Now only errors originating from the switcher's own managed services trigger failover.
|
||||
@@ -1 +0,0 @@
|
||||
- Fixed `LiveKitOutputTransport` not clearing the `rtc.AudioSource` internal buffer on interruption, causing the bot to continue speaking for several seconds after being interrupted.
|
||||
@@ -1 +0,0 @@
|
||||
- Fixed a crash in OpenAI LLM processing when the provider returns `chunk.choices[0].delta.audio = None`, which caused `'NoneType' object has no attribute 'get'` errors during audio transcript handling.
|
||||
@@ -1 +0,0 @@
|
||||
- Fixed error floods in `DeepgramSTTService` when the WebSocket connection drops. With Deepgram SDK 6.x, `send_media()` raises exceptions on a dead connection instead of silently failing, causing every queued audio frame to log an error. Now `send_media()` failures are caught gracefully — a single warning is logged and audio frames are skipped until the existing reconnection logic restores the connection.
|
||||
@@ -1 +0,0 @@
|
||||
- Removed `SambaNovaSTTService`. SambaNova no longer offers speech-to-text audio models. Use another STT provider instead.
|
||||
@@ -1 +0,0 @@
|
||||
- Added `Mem0MemoryService.get_memories()` convenience method for retrieving all stored memories outside the pipeline (e.g. to build a personalized greeting at connection time). This avoids the need to manually handle client type branching, filter construction, and async wrapping.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Bumped `mem0ai` dependency from `~=0.1.94` to `>=1.0.8,<2`. Users of the `mem0` extra will need to update their mem0ai package.
|
||||
@@ -1 +0,0 @@
|
||||
- Fixed `Mem0MemoryService` failing to store messages when the context contained system or developer role messages. The Mem0 API only accepts user and assistant roles, so other roles are now filtered out before storing.
|
||||
@@ -1 +0,0 @@
|
||||
- `Mem0MemoryService` no longer blocks the event loop during memory storage and retrieval. All Mem0 API calls now run in a background thread, and message storage is fire-and-forget so it doesn't delay downstream processing.
|
||||
@@ -1 +0,0 @@
|
||||
- Added missing `on_dtmf_event` callback to `LemonSliceTransportClient.setup()` `DailyCallbacks` construction, fixing a `ValidationError` at pipeline setup time.
|
||||
@@ -1 +0,0 @@
|
||||
- Fixed an issue in `InworldTTSService` where, in cases of fast interruption, we would continue receiving audio from the previous context.
|
||||
@@ -1 +0,0 @@
|
||||
- Fixed a word timestamp interleaving issue in `InworldTTSService` when processing multiple sentences.
|
||||
@@ -1 +0,0 @@
|
||||
- Fixed duplicate `TTSStoppedFrame` being pushed in TTS services using `push_stop_frames=True`. When the stop-frame timeout fired, a second `TTSStoppedFrame` could be pushed after the normal one at context completion.
|
||||
@@ -1 +0,0 @@
|
||||
- `RimeTTSService` now handles Rime's `done` WebSocket message to complete audio contexts immediately, eliminating the 3-second idle timeout that previously added latency at the end of each utterance.
|
||||
@@ -1 +0,0 @@
|
||||
- ⚠️ Fixed `DeepgramSTTService` compatibility with deepgram-sdk 6.1.0. The SDK now requires explicit message objects for `send_keep_alive()`, `send_close_stream()`, and `send_finalize()`. The minimum deepgram-sdk version is now 6.1.0.
|
||||
1
changelog/4253.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `MistralSTTService` for real-time speech-to-text using Mistral's Voxtral Realtime API (`voxtral-mini-transcribe-realtime-2602`). Supports streaming transcription with interim results, automatic language detection, and VAD-driven utterance lifecycle.
|
||||
1
changelog/4304.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed `pipecat-ai[tavus]` not installing the required `daily-python` dependency. Installing the `tavus` extra now correctly pulls in `pipecat-ai[daily]`.
|
||||
1
changelog/4311.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- STT services now reconnect safely when settings change: reconnection is deferred until the current user turn ends (i.e., until `UserStoppedSpeakingFrame` is received) rather than interrupting an active speech session. Audio frames received while the reconnect is in progress are buffered and replayed once the new connection is ready. `CartesiaSTTService` and `DeepgramSTTService` both use this new behavior.
|
||||
1
changelog/4311.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed audio loss and potential errors when STT settings were updated mid-speech. Previously, `CartesiaSTTService` and `DeepgramSTTService` would immediately disconnect and reconnect when settings changed, dropping any in-flight audio. Reconnection is now deferred until the user stops speaking, and audio arriving during the reconnect window is buffered and replayed.
|
||||
1
changelog/4313.added.2.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `buttons` field to `OutputDTMFFrame` and `OutputDTMFUrgentFrame` for sending multi-key DTMF sequences as a `list[KeypadEntry]`. Use `OutputDTMFFrame.from_string("123#")` (or the equivalent on `OutputDTMFUrgentFrame`) to build one from a dial string, and `to_string()` to convert back.
|
||||
1
changelog/4313.added.3.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `DailyOutputDTMFFrame` and `DailyOutputDTMFUrgentFrame` frames. In addition to the inherited `buttons`, they accept `session_id`, `digit_duration_ms` and `method`, which are forwarded to Daily's `send_dtmf` as `sessionId`, `digitDurationMs` and `method`.
|
||||
1
changelog/4313.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `DailyTransport.send_dtmf()` to expose the Daily call client's DTMF sending capability, enabling applications to send tones during a call (e.g. IVR navigation).
|
||||
@@ -1,108 +1,60 @@
|
||||
# Pipecat Documentation
|
||||
# Pipecat API Documentation
|
||||
|
||||
This directory contains the source files for auto-generating Pipecat's server API reference documentation.
|
||||
|
||||
## Setup
|
||||
|
||||
1. Install documentation dependencies:
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
2. Make the build scripts executable:
|
||||
|
||||
```bash
|
||||
chmod +x build-docs.sh rtd-test.py
|
||||
```
|
||||
This directory contains the source files for auto-generating Pipecat's API reference documentation.
|
||||
|
||||
## Building Documentation
|
||||
|
||||
From this directory, you can build the documentation in several ways:
|
||||
|
||||
### Local Build
|
||||
From this directory:
|
||||
|
||||
```bash
|
||||
# Using the build script (automatically opens docs when done)
|
||||
./build-docs.sh
|
||||
# Build docs (warnings shown but don't fail the build)
|
||||
cd docs/api && uv run ./build-docs.sh
|
||||
|
||||
# Or directly with sphinx-build
|
||||
sphinx-build -b html . _build/html -W --keep-going
|
||||
# Build with strict mode (warnings treated as errors)
|
||||
cd docs/api && uv run ./build-docs.sh --strict
|
||||
```
|
||||
|
||||
### ReadTheDocs Test Build
|
||||
The build script will:
|
||||
|
||||
To test the documentation build process exactly as it would run on ReadTheDocs:
|
||||
|
||||
```bash
|
||||
./rtd-test.py
|
||||
```
|
||||
|
||||
This script:
|
||||
|
||||
- Creates a fresh virtual environment
|
||||
- Installs all dependencies as specified in requirements files
|
||||
- Handles conflicting dependencies (like grpcio versions for Riva)
|
||||
- Builds the documentation in an isolated environment
|
||||
- Provides detailed logging of the build process
|
||||
|
||||
Use this script to verify your documentation will build correctly on ReadTheDocs before pushing changes.
|
||||
|
||||
## Viewing Documentation
|
||||
|
||||
The built documentation will be available at `_build/html/index.html`. To open:
|
||||
|
||||
```bash
|
||||
# On MacOS
|
||||
open _build/html/index.html
|
||||
|
||||
# On Linux
|
||||
xdg-open _build/html/index.html
|
||||
|
||||
# On Windows
|
||||
start _build/html/index.html
|
||||
```
|
||||
1. Install documentation dependencies via `uv sync --group docs`
|
||||
2. Clean previous build output
|
||||
3. Run `sphinx-build` to generate HTML documentation
|
||||
4. Open the result in your browser (macOS)
|
||||
|
||||
## Directory Structure
|
||||
|
||||
```
|
||||
.
|
||||
├── api/ # Auto-generated API documentation
|
||||
├── _build/ # Built documentation
|
||||
├── _static/ # Static files (images, css, etc.)
|
||||
├── conf.py # Sphinx configuration
|
||||
├── api/ # Auto-generated API documentation (created during build)
|
||||
├── _build/ # Built documentation output
|
||||
├── conf.py # Sphinx configuration (mock imports, extensions, etc.)
|
||||
├── index.rst # Main documentation entry point
|
||||
├── requirements-base.txt # Base documentation dependencies
|
||||
├── requirements-riva.txt # Riva-specific dependencies
|
||||
├── build-docs.sh # Local build script
|
||||
└── rtd-test.py # ReadTheDocs test build script
|
||||
└── rtd-test.sh # ReadTheDocs test build script (uses pip, not uv)
|
||||
```
|
||||
|
||||
## Notes
|
||||
## How It Works
|
||||
|
||||
- Documentation is auto-generated from Python docstrings
|
||||
- Service modules are automatically detected and included
|
||||
- The build process matches our ReadTheDocs configuration
|
||||
- Warnings are treated as errors (-W flag) to maintain consistency
|
||||
- The --keep-going flag ensures all errors are reported
|
||||
- Dependencies are split into multiple requirements files to handle version conflicts
|
||||
- `conf.py` runs `sphinx-apidoc` during Sphinx's `setup()` phase to generate `.rst` files from Python source
|
||||
- Sphinx autodoc imports each module to extract docstrings
|
||||
- Modules with unavailable dependencies are listed in `autodoc_mock_imports` in `conf.py`
|
||||
- Napoleon extension converts Google-style docstrings to reStructuredText
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
If you encounter missing service modules:
|
||||
**Module not appearing in docs:**
|
||||
|
||||
1. Verify the service is installed with its extras: `pip install pipecat-ai[service-name]`
|
||||
2. Check the build logs for import errors
|
||||
3. Ensure the service module is properly initialized in the package
|
||||
4. Run `./rtd-test.py` to test in an isolated environment matching ReadTheDocs
|
||||
1. Check the build output for `autodoc: failed to import` warnings
|
||||
2. If the module has an unresolvable import dependency, add it to `autodoc_mock_imports` in `conf.py`
|
||||
3. Verify the module is importable: `uv run python -c "import pipecat.module.name"`
|
||||
|
||||
For dependency conflicts:
|
||||
**Duplicate object warnings:**
|
||||
|
||||
1. Check the requirements files for version specifications
|
||||
2. Use `rtd-test.py` to verify dependency resolution
|
||||
3. Consider adding service-specific requirements files if needed
|
||||
These come from re-export modules or Sphinx discovering the same class through multiple import paths. Usually cosmetic.
|
||||
|
||||
For more information:
|
||||
**Docstring formatting warnings:**
|
||||
|
||||
- [ReadTheDocs Configuration](.readthedocs.yaml)
|
||||
- [Sphinx Documentation](https://www.sphinx-doc.org/)
|
||||
Docstrings use reStructuredText, not Markdown. Common issues:
|
||||
- Use `Example::` with indented code blocks, not `` ```python ``
|
||||
- Ensure blank lines between directive content and subsequent sections
|
||||
- Use `Parameters:` (not `Attributes:`) for dataclass field documentation to avoid duplicate entries
|
||||
|
||||
@@ -1,8 +1,16 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Usage: ./build-docs.sh [--strict]
|
||||
# --strict: Treat warnings as errors (default: warnings only)
|
||||
|
||||
SPHINX_OPTS=""
|
||||
if [ "$1" = "--strict" ]; then
|
||||
SPHINX_OPTS="-W --keep-going"
|
||||
fi
|
||||
|
||||
# Build docs using uv
|
||||
echo "Installing dependencies with uv..."
|
||||
uv sync --group docs --all-extras --no-extra krisp --no-extra gstreamer --no-extra local_smart_turn --no-extra moondream --no-extra riva --no-extra mlx-whisper
|
||||
uv sync --group docs --all-extras --no-extra gstreamer --no-extra local_smart_turn --no-extra moondream --no-extra mlx-whisper
|
||||
|
||||
# Check if sphinx-build is available
|
||||
if ! uv run sphinx-build --version &> /dev/null; then
|
||||
@@ -14,8 +22,7 @@ fi
|
||||
rm -rf _build
|
||||
|
||||
echo "Building documentation..."
|
||||
# Build docs matching ReadTheDocs configuration
|
||||
uv run sphinx-build -b html -d _build/doctrees . _build/html -W --keep-going
|
||||
uv run sphinx-build -b html -d _build/doctrees . _build/html $SPHINX_OPTS
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "Documentation built successfully!"
|
||||
|
||||
@@ -4,6 +4,19 @@ import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
# Fix Pydantic v2 + Sphinx autodoc incompatibility: ConfigDict(extra="allow") fails
|
||||
# during Sphinx's import because __pydantic_extra__ annotation on BaseModel resolves to
|
||||
# `Dict[str, Any] | None` whose get_origin() is Union, not dict. Patch the check to
|
||||
# accept Union-wrapped dict types (i.e., Optional[Dict[str, Any]]).
|
||||
import pydantic._internal._generate_schema as _pydantic_gs
|
||||
|
||||
_ORIG_DICT_TYPES = _pydantic_gs.DICT_TYPES
|
||||
# Expand the accepted types to include Union (Optional[Dict[str, Any]])
|
||||
import types
|
||||
import typing
|
||||
|
||||
_pydantic_gs.DICT_TYPES = [*_ORIG_DICT_TYPES, typing.Union, types.UnionType]
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
||||
logger = logging.getLogger("sphinx-build")
|
||||
@@ -48,8 +61,6 @@ autodoc_default_options = {
|
||||
# Mock imports for optional dependencies
|
||||
autodoc_mock_imports = [
|
||||
# Krisp - has build issues on some platforms
|
||||
"pipecat_ai_krisp",
|
||||
"krisp",
|
||||
"krisp_audio",
|
||||
# System-specific GUI libraries
|
||||
"_tkinter",
|
||||
@@ -78,16 +89,6 @@ autodoc_mock_imports = [
|
||||
"einops",
|
||||
"intel_extension_for_pytorch",
|
||||
"huggingface_hub",
|
||||
# riva dependencies
|
||||
"riva",
|
||||
"riva.client",
|
||||
"riva.client.Auth",
|
||||
"riva.client.ASRService",
|
||||
"riva.client.StreamingRecognitionConfig",
|
||||
"riva.client.RecognitionConfig",
|
||||
"riva.client.AudioEncoding",
|
||||
"riva.client.proto.riva_tts_pb2",
|
||||
"riva.client.SpeechSynthesisService",
|
||||
# MLX dependencies (Apple Silicon specific)
|
||||
"mlx",
|
||||
"mlx_whisper", # Note: might need underscore format too
|
||||
@@ -98,7 +99,6 @@ autodoc_mock_imports = [
|
||||
"cartesia",
|
||||
"camb",
|
||||
"sarvamai",
|
||||
"openpipe",
|
||||
"openai.types.beta.realtime",
|
||||
"langchain_core",
|
||||
"langchain_core.messages",
|
||||
@@ -110,6 +110,8 @@ autodoc_mock_imports = [
|
||||
"fastapi.middleware",
|
||||
"fastapi.responses",
|
||||
"uvicorn",
|
||||
# Deepgram dependencies
|
||||
"deepgram",
|
||||
]
|
||||
|
||||
# HTML output settings
|
||||
@@ -136,6 +138,8 @@ def import_core_modules():
|
||||
"pipecat.runner",
|
||||
"pipecat.serializers",
|
||||
"pipecat.transcriptions",
|
||||
"pipecat.turns",
|
||||
"pipecat.extensions",
|
||||
"pipecat.utils",
|
||||
]
|
||||
|
||||
@@ -180,7 +184,6 @@ def setup(app):
|
||||
logger.info(f"Source directory: {source_dir}")
|
||||
|
||||
excludes = [
|
||||
str(project_root / "src/pipecat/pipeline/to_be_updated"),
|
||||
str(project_root / "src/pipecat/examples"),
|
||||
str(project_root / "src/pipecat/tests"),
|
||||
"**/test_*.py",
|
||||
|
||||
@@ -32,4 +32,5 @@ Quick Links
|
||||
Services <api/pipecat.services>
|
||||
Transcriptions <api/pipecat.transcriptions>
|
||||
Transports <api/pipecat.transports>
|
||||
Turns <api/pipecat.turns>
|
||||
Utils <api/pipecat.utils>
|
||||
|
||||
@@ -121,6 +121,9 @@ MINIMAX_GROUP_ID=...
|
||||
# Mistral
|
||||
MISTRAL_API_KEY=...
|
||||
|
||||
# Nebius
|
||||
NEBIUS_API_KEY=...
|
||||
|
||||
# Neuphonic
|
||||
NEUPHONIC_API_KEY=...
|
||||
|
||||
@@ -133,9 +136,6 @@ NVIDIA_API_KEY=...
|
||||
# OpenAI
|
||||
OPENAI_API_KEY=...
|
||||
|
||||
# OpenPipe
|
||||
OPENPIPE_API_KEY=...
|
||||
|
||||
# OpenRouter
|
||||
OPENROUTER_API_KEY=...
|
||||
|
||||
|
||||
@@ -1,31 +1,150 @@
|
||||
# Pipecat Examples
|
||||
|
||||
This directory contains examples to help you learn how to build with Pipecat.
|
||||
This directory contains examples showing how to build voice and multimodal agents with Pipecat.
|
||||
|
||||
## Getting Started
|
||||
## Setup
|
||||
|
||||
New to Pipecat? Start here:
|
||||
1. Follow the [README](https://github.com/pipecat-ai/pipecat/blob/main/README.md#%EF%B8%8F-contributing-to-the-framework) steps to get your local environment configured.
|
||||
|
||||
- **[Quickstart](quickstart/)** - Get your first voice AI bot running in 5 minutes _(coming soon)_
|
||||
- **[Client/Server Web](client-server-web/)** - Learn to build web applications with Pipecat's client SDKs _(coming soon)_
|
||||
- **[Phone Bot with Twilio](phone-bot-twilio/)** - Connect your bot to a phone number _(coming soon)_
|
||||
> **Run from root directory**: Make sure you are running the steps from the root directory.
|
||||
|
||||
## Foundational Examples
|
||||
> **Using local audio?**: The `LocalAudioTransport` requires a system dependency for `portaudio`. Install the dependency to use the transport.
|
||||
|
||||
Single-file examples that introduce core Pipecat concepts one at a time. These examples:
|
||||
2. Copy the [`env.example`](../env.example) file and add API keys for services you plan to use:
|
||||
|
||||
- Build on each other progressively
|
||||
- Focus on specific features or integrations
|
||||
- Are used for testing with every Pipecat release
|
||||
```bash
|
||||
cp env.example .env
|
||||
# Edit .env with your API keys
|
||||
```
|
||||
|
||||
See the **[Foundational Examples README](foundational/)** for the complete list.
|
||||
3. Run any example:
|
||||
|
||||
## More Advanced Examples
|
||||
```bash
|
||||
uv run python getting-started/01-say-one-thing.py
|
||||
```
|
||||
|
||||
Ready to explore complex use cases? Visit **[pipecat-examples](https://github.com/pipecat-ai/pipecat-examples)** for:
|
||||
4. Open the web interface at http://localhost:7860/client/ and click "Connect"
|
||||
|
||||
- Production-ready applications
|
||||
- Multi-platform client implementations
|
||||
- Telephony integrations
|
||||
- Multimodal and creative applications
|
||||
- Deployment and monitoring examples
|
||||
## Running examples with other transports
|
||||
|
||||
Most examples support running with other transports, like Twilio or Daily.
|
||||
|
||||
### Daily
|
||||
|
||||
You need to create a Daily account at https://dashboard.daily.co/u/signup. Once signed up, you can create your own room from the dashboard and set the environment variables `DAILY_ROOM_URL` and `DAILY_API_KEY`. Alternatively, you can let the example create a room for you (still needs `DAILY_API_KEY` environment variable). Then, start any example with `-t daily`:
|
||||
|
||||
```bash
|
||||
uv run getting-started/06-voice-agent.py -t daily
|
||||
```
|
||||
|
||||
### Twilio
|
||||
|
||||
It is also possible to run the example through a Twilio phone number. You will need to setup a few things:
|
||||
|
||||
1. Install and run [ngrok](https://ngrok.com/download).
|
||||
|
||||
```bash
|
||||
ngrok http 7860
|
||||
```
|
||||
|
||||
2. Configure your Twilio phone number. One way is to setup a TwiML app and set the request URL to the ngrok URL from step (1). Then, set your phone number to use the new TwiML app.
|
||||
|
||||
Then, run the example with:
|
||||
|
||||
```bash
|
||||
uv run getting-started/06-voice-agent.py -t twilio -x NGROK_HOST_NAME
|
||||
```
|
||||
|
||||
## Directory Structure
|
||||
|
||||
### [`getting-started/`](./getting-started/)
|
||||
|
||||
Progressive introduction to Pipecat, from minimal TTS to a full voice agent with function calling.
|
||||
|
||||
### [`voice/`](./voice/)
|
||||
|
||||
Full STT + LLM + TTS voice agent pipelines showcasing different speech service providers (Deepgram, ElevenLabs, Cartesia, etc.)
|
||||
|
||||
### [`function-calling/`](./function-calling/)
|
||||
|
||||
Function calling with different LLM providers (OpenAI, Anthropic, Google, etc.)
|
||||
|
||||
### [`transcription/`](./transcription/)
|
||||
|
||||
Speech-to-text examples with various STT providers.
|
||||
|
||||
### [`vision/`](./vision/)
|
||||
|
||||
Image description and vision capabilities with different multimodal LLMs.
|
||||
|
||||
### [`realtime/`](./realtime/)
|
||||
|
||||
Realtime and multimodal live APIs (OpenAI Realtime, Gemini Live, AWS Nova Sonic, Ultravox, Grok).
|
||||
|
||||
### [`persistent-context/`](./persistent-context/)
|
||||
|
||||
Maintaining conversation context across sessions with different providers.
|
||||
|
||||
### [`context-summarization/`](./context-summarization/)
|
||||
|
||||
Summarizing conversation context to manage token limits.
|
||||
|
||||
### [`update-settings/`](./update-settings/)
|
||||
|
||||
Changing service settings at runtime, organized by service type:
|
||||
|
||||
- **[`stt/`](./update-settings/stt/)** — Speech-to-text settings
|
||||
- **[`tts/`](./update-settings/tts/)** — Text-to-speech settings
|
||||
- **[`llm/`](./update-settings/llm/)** — LLM settings
|
||||
|
||||
### [`turn-management/`](./turn-management/)
|
||||
|
||||
Turn detection, interruption handling, and user input management.
|
||||
|
||||
### [`thinking-and-mcp/`](./thinking-and-mcp/)
|
||||
|
||||
LLM thinking/reasoning modes and MCP (Model Context Protocol) tool server integration.
|
||||
|
||||
### [`transports/`](./transports/)
|
||||
|
||||
Transport layer examples (WebRTC, Daily, LiveKit).
|
||||
|
||||
### [`video-avatar/`](./video-avatar/)
|
||||
|
||||
Video avatar integrations (Tavus, HeyGen, Simli, LemonSlice).
|
||||
|
||||
### [`video-processing/`](./video-processing/)
|
||||
|
||||
Video processing, mirroring, GStreamer, and custom video tracks.
|
||||
|
||||
### [`audio/`](./audio/)
|
||||
|
||||
Audio recording, background sounds, and sound effects.
|
||||
|
||||
### [`observability/`](./observability/)
|
||||
|
||||
Pipeline monitoring: observers, heartbeats, and Sentry metrics.
|
||||
|
||||
### [`rag/`](./rag/)
|
||||
|
||||
Retrieval-augmented generation, grounding, and long-term memory (Mem0, Gemini).
|
||||
|
||||
### [`features/`](./features/)
|
||||
|
||||
Miscellaneous features: wake phrases, live translation, service switching, voice switching, and more.
|
||||
|
||||
## Advanced Usage
|
||||
|
||||
### Customizing Network Settings
|
||||
|
||||
```bash
|
||||
uv run python <example-name> --host 0.0.0.0 --port 8080
|
||||
```
|
||||
|
||||
### Troubleshooting
|
||||
|
||||
- **No audio/video**: Check browser permissions for microphone and camera
|
||||
- **Connection errors**: Verify API keys in `.env` file
|
||||
- **Port conflicts**: Use `--port` to change the port
|
||||
|
||||
For more examples, visit the [pipecat-examples repository](https://github.com/pipecat-ai/pipecat-examples).
|
||||
|
||||
|
Before Width: | Height: | Size: 63 KiB After Width: | Height: | Size: 63 KiB |
|
Before Width: | Height: | Size: 1.1 MiB After Width: | Height: | Size: 1.1 MiB |
|
Before Width: | Height: | Size: 871 KiB After Width: | Height: | Size: 871 KiB |
|
Before Width: | Height: | Size: 868 KiB After Width: | Height: | Size: 868 KiB |
|
Before Width: | Height: | Size: 868 KiB After Width: | Height: | Size: 868 KiB |
|
Before Width: | Height: | Size: 870 KiB After Width: | Height: | Size: 870 KiB |
|
Before Width: | Height: | Size: 871 KiB After Width: | Height: | Size: 871 KiB |
|
Before Width: | Height: | Size: 871 KiB After Width: | Height: | Size: 871 KiB |
|
Before Width: | Height: | Size: 872 KiB After Width: | Height: | Size: 872 KiB |
|
Before Width: | Height: | Size: 868 KiB After Width: | Height: | Size: 868 KiB |
|
Before Width: | Height: | Size: 33 KiB After Width: | Height: | Size: 33 KiB |
|
Before Width: | Height: | Size: 30 KiB After Width: | Height: | Size: 30 KiB |
@@ -34,7 +34,7 @@ from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
load_dotenv(override=True)
|
||||
|
||||
OFFICE_SOUND_FILE = os.path.join(
|
||||
os.path.dirname(__file__), "assets", "office-ambience-24000-mono.mp3"
|
||||
os.path.dirname(__file__), "../assets", "office-ambience-24000-mono.mp3"
|
||||
)
|
||||
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
@@ -36,7 +36,7 @@ from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.google import GoogleLLMService
|
||||
from pipecat.services.google.llm import GoogleLLMService
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||