Compare commits
411 Commits
cb/extra-l
...
mb/flush-a
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
10e31958fd | ||
|
|
373ffc48b6 | ||
|
|
5ae6229c03 | ||
|
|
615cbe966a | ||
|
|
3caccab608 | ||
|
|
78d3c77369 | ||
|
|
44b3e6cefa | ||
|
|
20ea073398 | ||
|
|
154fe65011 | ||
|
|
61f534ca34 | ||
|
|
a91c26785f | ||
|
|
d7e93551d2 | ||
|
|
06c742a2ad | ||
|
|
55b0797fd5 | ||
|
|
21443b9a08 | ||
|
|
4b167a3c3d | ||
|
|
2df77430aa | ||
|
|
2d114b15f9 | ||
|
|
26000b616d | ||
|
|
710eebab09 | ||
|
|
532423eb4c | ||
|
|
bb29e50adb | ||
|
|
4048d6782b | ||
|
|
76d36a312b | ||
|
|
2a75373c04 | ||
|
|
a840b0e815 | ||
|
|
ebcde719a6 | ||
|
|
5c912927bb | ||
|
|
0e55db054e | ||
|
|
5967ac0d4f | ||
|
|
1451483cf7 | ||
|
|
c14b85c12b | ||
|
|
9f3c0219d7 | ||
|
|
ec36fef26e | ||
|
|
5f1848d24b | ||
|
|
d6867bd12f | ||
|
|
17a1f30572 | ||
|
|
8e0dc1f256 | ||
|
|
b9100beee3 | ||
|
|
b8bc3d2565 | ||
|
|
3213e85b7d | ||
|
|
de3bcd64c4 | ||
|
|
ad7f1eec12 | ||
|
|
29310b4e92 | ||
|
|
2f4d36a146 | ||
|
|
6c9bb782b1 | ||
|
|
010d9103d4 | ||
|
|
12131eb7c5 | ||
|
|
80b830322a | ||
|
|
8db9d16174 | ||
|
|
1c92fab1fb | ||
|
|
974717d1b9 | ||
|
|
59fb631390 | ||
|
|
4824220260 | ||
|
|
55a338614d | ||
|
|
f033046963 | ||
|
|
6018fc068c | ||
|
|
d5b634301f | ||
|
|
a37eb1049d | ||
|
|
803ea9d8bc | ||
|
|
499bc25217 | ||
|
|
53d403af4b | ||
|
|
a0a8ea1641 | ||
|
|
26c68ccd7c | ||
|
|
fa010c8644 | ||
|
|
d58f398bc4 | ||
|
|
11383a86a1 | ||
|
|
daa52ff8df | ||
|
|
a5f41e22f7 | ||
|
|
530bb5233d | ||
|
|
4a64e09f6c | ||
|
|
74582bb8d5 | ||
|
|
1ca2101e3a | ||
|
|
e80311c323 | ||
|
|
2f24c422b6 | ||
|
|
0d0b9fddef | ||
|
|
1753cc99f4 | ||
|
|
4f8b036abe | ||
|
|
f83c89c202 | ||
|
|
bb89a036e5 | ||
|
|
b994a03466 | ||
|
|
27161f8e3b | ||
|
|
8acf9a488b | ||
|
|
96c6aeaada | ||
|
|
6722aae598 | ||
|
|
66564392a6 | ||
|
|
f258f5ab66 | ||
|
|
f8f0578c3d | ||
|
|
aa60a413f3 | ||
|
|
3e66f2378d | ||
|
|
9a50f33e36 | ||
|
|
4bd5e9c0a7 | ||
|
|
12092c8715 | ||
|
|
92cc6d39f2 | ||
|
|
34a50033cb | ||
|
|
e60b65228b | ||
|
|
e74864335b | ||
|
|
27a088a457 | ||
|
|
cfe72143b8 | ||
|
|
36a729cbfe | ||
|
|
d2f006682c | ||
|
|
fb7fe540f5 | ||
|
|
1ec68bd071 | ||
|
|
4536d03e82 | ||
|
|
699704732c | ||
|
|
376d969a77 | ||
|
|
68789dfcf0 | ||
|
|
fe9fc61c4e | ||
|
|
6028f0f23a | ||
|
|
e9a0959e28 | ||
|
|
f66be2cfa7 | ||
|
|
f818bed58f | ||
|
|
07b9be5308 | ||
|
|
40c2452d6e | ||
|
|
30cdd1b71a | ||
|
|
2110b79507 | ||
|
|
fc544fa61c | ||
|
|
976fe95304 | ||
|
|
408270b647 | ||
|
|
1dfb75bc9d | ||
|
|
cefc2a1088 | ||
|
|
3b9b9200ea | ||
|
|
d6f29a0f4b | ||
|
|
5b762d11ef | ||
|
|
2f3e2da6b9 | ||
|
|
45058d4a94 | ||
|
|
5b637bd826 | ||
|
|
2d4fd7e903 | ||
|
|
b5662520aa | ||
|
|
af45c170b5 | ||
|
|
65f548b2ec | ||
|
|
b29ab8c608 | ||
|
|
d6dc37f0b6 | ||
|
|
12bce2e8c0 | ||
|
|
4acf7296e0 | ||
|
|
98706d429c | ||
|
|
41720b1a13 | ||
|
|
3ef4245166 | ||
|
|
3bb0797922 | ||
|
|
7c7b4c52af | ||
|
|
01f083b7fc | ||
|
|
91fcaebe25 | ||
|
|
9c5fe5c85e | ||
|
|
7e5e167a4b | ||
|
|
d04c4b36f3 | ||
|
|
a811e53626 | ||
|
|
df57202a05 | ||
|
|
69e6f3fdb7 | ||
|
|
6809254963 | ||
|
|
81093d3bed | ||
|
|
d9a67164f6 | ||
|
|
98259af54e | ||
|
|
039d144c79 | ||
|
|
d0f67fc189 | ||
|
|
6e3f96aa83 | ||
|
|
293677588d | ||
|
|
77e777b1ce | ||
|
|
7e7926059c | ||
|
|
c948754eff | ||
|
|
83f1a8830d | ||
|
|
80f8e05fcf | ||
|
|
afd1a1e80b | ||
|
|
84ac88cad7 | ||
|
|
211163e5c7 | ||
|
|
1b0bcebef6 | ||
|
|
89736b03c4 | ||
|
|
4edda718ed | ||
|
|
22a62edc9e | ||
|
|
50b6cc8135 | ||
|
|
45cf36925a | ||
|
|
83a71e1fec | ||
|
|
e809c8680e | ||
|
|
c926063d74 | ||
|
|
0334550356 | ||
|
|
90b9dce710 | ||
|
|
a5cdd5f1b8 | ||
|
|
5f937b8479 | ||
|
|
b45f7fee6f | ||
|
|
01c06c5cac | ||
|
|
329e89c1d9 | ||
|
|
883410d8ac | ||
|
|
1f5b790dd0 | ||
|
|
a107b1cb4b | ||
|
|
63950912f0 | ||
|
|
2ce9402571 | ||
|
|
f6912c0f9a | ||
|
|
633a4d4c58 | ||
|
|
67da745bb3 | ||
|
|
5126d4de92 | ||
|
|
426d7ac213 | ||
|
|
9115692c72 | ||
|
|
c26fe3f277 | ||
|
|
47b059d387 | ||
|
|
a49d81e519 | ||
|
|
b3a575c7c7 | ||
|
|
790d0c1256 | ||
|
|
ee7e0dc3f7 | ||
|
|
f53ee79ddb | ||
|
|
aeadb40c3f | ||
|
|
cacb07f4c2 | ||
|
|
0b91d821fb | ||
|
|
af66a43056 | ||
|
|
e006dcf172 | ||
|
|
8588f8b0d8 | ||
|
|
bff54547b0 | ||
|
|
b2754bf208 | ||
|
|
9a4942b0d0 | ||
|
|
ed6201910b | ||
|
|
ac5ebc587e | ||
|
|
dff4c54e57 | ||
|
|
c744409651 | ||
|
|
7578fbeaef | ||
|
|
5909dff423 | ||
|
|
a6502df72c | ||
|
|
e0d24d7fc0 | ||
|
|
99779046a8 | ||
|
|
67cdc0063a | ||
|
|
b28f752afa | ||
|
|
463078e375 | ||
|
|
84510fd521 | ||
|
|
9f6a1c093a | ||
|
|
b602e78625 | ||
|
|
7c815121ea | ||
|
|
16a107948b | ||
|
|
839aa7d935 | ||
|
|
4cbcfe2b0b | ||
|
|
91a628d1ba | ||
|
|
50288eeaaa | ||
|
|
e1f2bbceb3 | ||
|
|
8bdd7ed0ed | ||
|
|
1b7dfe8126 | ||
|
|
d1ee851a65 | ||
|
|
0358673b46 | ||
|
|
16fe1b10e9 | ||
|
|
f001819df8 | ||
|
|
dceec60186 | ||
|
|
b96979a4ed | ||
|
|
745c40def4 | ||
|
|
42ab62716d | ||
|
|
16ba2010aa | ||
|
|
ec0ca46617 | ||
|
|
6ff1f526ff | ||
|
|
84143cc80c | ||
|
|
229dccedc6 | ||
|
|
68aaa1f8f4 | ||
|
|
f110a45c85 | ||
|
|
1e8a86de63 | ||
|
|
ee93e2a2b1 | ||
|
|
2e87a019a8 | ||
|
|
687b3d9d4c | ||
|
|
397768d872 | ||
|
|
24cdcd74e6 | ||
|
|
5d6370690c | ||
|
|
9f728aa623 | ||
|
|
32d8f6153f | ||
|
|
8c2071f248 | ||
|
|
a9c2197dc6 | ||
|
|
ce0358804b | ||
|
|
66a6a6a295 | ||
|
|
9f1732c390 | ||
|
|
b44ddf2456 | ||
|
|
17420f4d0c | ||
|
|
6cb55ec2cb | ||
|
|
e2b4554a54 | ||
|
|
fd68b82e48 | ||
|
|
cc90f5ab9f | ||
|
|
08f40d9179 | ||
|
|
80e1325621 | ||
|
|
ed76a5bfa5 | ||
|
|
69b0d9035f | ||
|
|
dcc63dd648 | ||
|
|
2d08f42870 | ||
|
|
0814c0bc82 | ||
|
|
28e233b195 | ||
|
|
6e4d2d6ade | ||
|
|
266135ec54 | ||
|
|
d81aa48262 | ||
|
|
8c7752fbc2 | ||
|
|
77fb63372a | ||
|
|
5a8279d3c2 | ||
|
|
4db620198a | ||
|
|
d35f4c6b99 | ||
|
|
0a990b2aaa | ||
|
|
97586b132d | ||
|
|
8020db350e | ||
|
|
54f64b8dad | ||
|
|
8f8a3ae7f9 | ||
|
|
344aff5681 | ||
|
|
0d2e90cff1 | ||
|
|
1a8dd6b713 | ||
|
|
2dc585aee0 | ||
|
|
a64fa44811 | ||
|
|
baeb83484d | ||
|
|
b0c3f80963 | ||
|
|
eb3c9b1e75 | ||
|
|
ad4cbdb1ec | ||
|
|
32baee924b | ||
|
|
9cc53509d1 | ||
|
|
2c62d3bf32 | ||
|
|
b06b16adb7 | ||
|
|
cd52d73027 | ||
|
|
c9d8c572c7 | ||
|
|
d9439fd398 | ||
|
|
081abcedb3 | ||
|
|
1455e24ad1 | ||
|
|
4613cf4790 | ||
|
|
7aa2e1209d | ||
|
|
76daaab6ca | ||
|
|
37cfe870cc | ||
|
|
160167758b | ||
|
|
4b634713a5 | ||
|
|
72954d5f15 | ||
|
|
f2b07271c1 | ||
|
|
32b9de5f51 | ||
|
|
71ce8f9bcf | ||
|
|
7d05728e2f | ||
|
|
dee5448b57 | ||
|
|
d67861925a | ||
|
|
0180619d44 | ||
|
|
f07e498612 | ||
|
|
57964cb929 | ||
|
|
6840c77684 | ||
|
|
a1b58115ce | ||
|
|
23eb6e3d46 | ||
|
|
74a2c38c6c | ||
|
|
90b217fda8 | ||
|
|
6855bc0ada | ||
|
|
a359434307 | ||
|
|
856c8959c3 | ||
|
|
8da7a42137 | ||
|
|
510a0f5ef5 | ||
|
|
03ac744bcf | ||
|
|
b058461a7d | ||
|
|
abd9f16b90 | ||
|
|
d07732f2e8 | ||
|
|
4d25582e16 | ||
|
|
d4b2160f9c | ||
|
|
dd7926aab5 | ||
|
|
070bf66980 | ||
|
|
962fc27dbd | ||
|
|
3d4d6132fc | ||
|
|
a96d9294b7 | ||
|
|
a6e78550d5 | ||
|
|
d9f6b7b93c | ||
|
|
969de92ad9 | ||
|
|
c4dbe92b30 | ||
|
|
684764fece | ||
|
|
c4be07693f | ||
|
|
c5d5ca8232 | ||
|
|
428e763814 | ||
|
|
0efa2711ff | ||
|
|
4904f52cee | ||
|
|
dbcf14ddb4 | ||
|
|
7c13ec10d9 | ||
|
|
29b9dccc53 | ||
|
|
e8ce826473 | ||
|
|
bbb991dfd8 | ||
|
|
4432e7e4f7 | ||
|
|
ee9cce64b2 | ||
|
|
1ae4f0150d | ||
|
|
4c77c3ed34 | ||
|
|
975b97472a | ||
|
|
c8ccf13bc7 | ||
|
|
ba59736f87 | ||
|
|
5989e1ed16 | ||
|
|
bc21a0b817 | ||
|
|
99d3227ff5 | ||
|
|
7730f59635 | ||
|
|
ba31546c32 | ||
|
|
a363d12d1f | ||
|
|
feab9c8fa2 | ||
|
|
61f6669926 | ||
|
|
3be69908d2 | ||
|
|
fcb80ec330 | ||
|
|
c9f5684e2f | ||
|
|
c257fa1573 | ||
|
|
97c55da29f | ||
|
|
49426aa9a1 | ||
|
|
0a333c26da | ||
|
|
75a29424ff | ||
|
|
cd1b429308 | ||
|
|
7f1ae4b8cc | ||
|
|
af9fd811cd | ||
|
|
69f5c9b9d3 | ||
|
|
ab45e481be | ||
|
|
ef1e4277d3 | ||
|
|
823b763b25 | ||
|
|
3cb189eb1f | ||
|
|
cc54255c41 | ||
|
|
1cdb66f889 | ||
|
|
51a86a509c | ||
|
|
824898f7b7 | ||
|
|
57dadb6359 | ||
|
|
5dcdc68ef5 | ||
|
|
aafb2db620 | ||
|
|
f3f22cf61c | ||
|
|
371c2f3704 | ||
|
|
1f14f62696 | ||
|
|
06449eff2c | ||
|
|
dcfb86583d | ||
|
|
cda34a1320 | ||
|
|
13611fd8e1 | ||
|
|
fc89aad469 | ||
|
|
d236973c0f | ||
|
|
7e3e126730 | ||
|
|
75ca0571bb | ||
|
|
a48e5d0714 | ||
|
|
2b6a992207 | ||
|
|
24cf106ed2 | ||
|
|
95c8346cb5 | ||
|
|
bc98c2e36c |
54
.github/workflows/coverage.yaml
vendored
Normal file
54
.github/workflows/coverage.yaml
vendored
Normal file
@@ -0,0 +1,54 @@
|
||||
name: coverage
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
pull_request:
|
||||
branches:
|
||||
- "**"
|
||||
paths-ignore:
|
||||
- "docs/**"
|
||||
|
||||
jobs:
|
||||
coverage:
|
||||
name: "Coverage"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repo
|
||||
uses: actions/checkout@v4
|
||||
- name: Set up Python
|
||||
id: setup_python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.10"
|
||||
- name: Cache virtual environment
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
# We are hashing dev-requirements.txt and test-requirements.txt which
|
||||
# contain all dependencies needed to run the tests.
|
||||
key: venv-${{ runner.os }}-${{ steps.setup_python.outputs.python-version}}-${{ hashFiles('dev-requirements.txt') }}-${{ hashFiles('test-requirements.txt') }}
|
||||
path: .venv
|
||||
- name: Install system packages
|
||||
id: install_system_packages
|
||||
run: |
|
||||
sudo apt-get install -y portaudio19-dev
|
||||
- name: Setup virtual environment
|
||||
run: |
|
||||
python -m venv .venv
|
||||
- name: Install basic Python dependencies
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r dev-requirements.txt -r test-requirements.txt
|
||||
- name: Run tests with coverage
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
coverage run
|
||||
coverage xml
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v5
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
slug: pipecat-ai/pipecat
|
||||
15
.gitignore
vendored
15
.gitignore
vendored
@@ -32,6 +32,21 @@ fly.toml
|
||||
|
||||
# Example files
|
||||
pipecat/examples/twilio-chatbot/templates/streams.xml
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/node_modules/
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/.expo/
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/dist/
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/npm-debug.*
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/*.jks
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/*.p8
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/*.p12
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/*.key
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/*.mobileprovision
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/*.orig.*
|
||||
pipecat/examples/bot-ready-signalling/client/react-native/web-build/
|
||||
|
||||
# macOS
|
||||
.DS_Store
|
||||
|
||||
|
||||
# Documentation
|
||||
docs/api/_build/
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
repos:
|
||||
- repo: local
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.9.7
|
||||
hooks:
|
||||
- id: ruff-format-hook
|
||||
name: Check ruff formatting
|
||||
entry: sh scripts/pre-commit.sh
|
||||
language: system
|
||||
- id: ruff
|
||||
language_version: python3
|
||||
args: [ --select, I, ]
|
||||
- id: ruff-format
|
||||
|
||||
442
CHANGELOG.md
442
CHANGELOG.md
@@ -9,6 +9,410 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
### Added
|
||||
|
||||
- Added a `flush_audio()` method to `AzureTTSService`, `FishTTSService`,
|
||||
`PlayHTTTSService`, and `LMNTTTSService`.
|
||||
|
||||
- Added `set_language()` and `set_model()` to `AzureSTTService`,
|
||||
`AssemblySTTService`, and `GladiaSTTService`.
|
||||
|
||||
- Added `on_user_turn_audio_data` and `on_bot_turn_audio_data` to
|
||||
`AudioBufferProcessor`. This gives the ability to grab the audio of only that
|
||||
turn for both the user and the bot.
|
||||
|
||||
- Added new base class `BaseObject` which is now the base class of
|
||||
`FrameProcessor`, `PipelineRunner`, `PipelineTask` and `BaseTransport`. The
|
||||
new `BaseObject` adds supports for event handlers.
|
||||
|
||||
- Added support for a unified format for specifying function calling across all
|
||||
LLM services.
|
||||
|
||||
```python
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function])
|
||||
```
|
||||
|
||||
- Added `speech_threshold` parameter to `GladiaSTTService`.
|
||||
|
||||
- Allow passing user (`user_kwargs`) and assistant (`assistant_kwargs`) context
|
||||
aggregator parameters when using `create_context_aggregator()`. The values are
|
||||
passed as a mapping that will then be converted to arguments.
|
||||
|
||||
- Added `speed` as an `InputParam` for both `ElevenLabsTTSService` and
|
||||
`ElevenLabsHttpTTSService`.
|
||||
|
||||
- Added new `LLMFullResponseAggregator` to aggregate full LLM completions. At
|
||||
every completion the `on_completion` event handler is triggered.
|
||||
|
||||
- Added a new frame, `RTVIServerMessageFrame`, and RTVI message
|
||||
`RTVIServerMessage` which provides a generic mechanism for sending custom
|
||||
messages from server to client. The `RTVIServerMessageFrame` is processed by
|
||||
the `RTVIObserver` and will be delivered to the client's `onServerMessage`
|
||||
callback or `ServerMessage` event.
|
||||
|
||||
- Added `GoogleLLMOpenAIBetaService` for Google LLM integration with an
|
||||
OpenAI-compatible interface. Added foundational example
|
||||
`14o-function-calling-gemini-openai-format.py`.
|
||||
|
||||
- Added `AzureRealtimeBetaLLMService` to support Azure's OpeanAI Realtime API. Added
|
||||
foundational example `19a-azure-realtime-beta.py`.
|
||||
|
||||
### Changed
|
||||
|
||||
- Moved `flush_audio()` from the `TTSService` base class to
|
||||
`WebsocketTTSService`.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue in `GoogleSTTService`, where it didn't have a `set_language`
|
||||
function. This required a name change from `set_languages` to `set_language`.
|
||||
|
||||
## [0.0.58] - 2025-02-26
|
||||
|
||||
### Added
|
||||
|
||||
- Added track-specific audio event `on_track_audio_data` to
|
||||
`AudioBufferProcessor` for accessing separate input and output audio tracks.
|
||||
|
||||
- Pipecat version will now be logged on every application startup. This will
|
||||
help us identify what version we are running in case of any issues.
|
||||
|
||||
- Added a new `StopFrame` which can be used to stop a pipeline task while
|
||||
keeping the frame processors running. The frame processors could then be used
|
||||
in a different pipeline. The difference between a `StopFrame` and a
|
||||
`StopTaskFrame` is that, as with `EndFrame` and `EndTaskFrame`, the
|
||||
`StopFrame` is pushed from the task and the `StopTaskFrame` is pushed upstream
|
||||
inside the pipeline by any processor.
|
||||
|
||||
- Added a new `PipelineTask` parameter `observers` that replaces the previous
|
||||
`PipelineParams.observers`.
|
||||
|
||||
- Added a new `PipelineTask` parameter `check_dangling_tasks` to enable or
|
||||
disable checking for frame processors' dangling tasks when the Pipeline
|
||||
finishes running.
|
||||
|
||||
- Added new `on_completion_timeout` event for LLM services (all OpenAI-based
|
||||
services, Anthropic and Google). Note that this event will only get triggered
|
||||
if LLM timeouts are setup and if the timeout was reached. It can be useful to
|
||||
retrigger another completion and see if the timeout was just a blip.
|
||||
|
||||
- Added new log observers `LLMLogObserver` and `TranscriptionLogObserver` that
|
||||
can be useful for debugging your pipelines.
|
||||
|
||||
- Added `room_url` property to `DailyTransport`.
|
||||
|
||||
- Added `addons` argument to `DeepgramSTTService`.
|
||||
|
||||
- Added `exponential_backoff_time()` to `utils.network` module.
|
||||
|
||||
### Changed
|
||||
|
||||
- ⚠️ `PipelineTask` now requires keyword arguments (except for the first one for
|
||||
the pipeline).
|
||||
|
||||
- Updated `PlayHTHttpTTSService` to take a `voice_engine` and `protocol` input
|
||||
in the constructor. The previous method of providing a `voice_engine` input
|
||||
that contains the engine and protocol is deprecated by PlayHT.
|
||||
|
||||
- The base `TTSService` class now strips leading newlines before sending text
|
||||
to the TTS provider. This change is to solve issues where some TTS providers,
|
||||
like Azure, would not output text due to newlines.
|
||||
|
||||
- `GrokLLMSService` now uses `grok-2` as the default model.
|
||||
|
||||
- `AnthropicLLMService` now uses `claude-3-7-sonnet-20250219` as the default
|
||||
model.
|
||||
|
||||
- `RimeHttpTTSService` needs an `aiohttp.ClientSession` to be passed to the
|
||||
constructor as all the other HTTP-based services.
|
||||
|
||||
- `RimeHttpTTSService` doesn't use a default voice anymore.
|
||||
|
||||
- `DeepgramSTTService` now uses the new `nova-3` model by default. If you want
|
||||
to use the previous model you can pass `LiveOptions(model="nova-2-general")`.
|
||||
(see https://deepgram.com/learn/introducing-nova-3-speech-to-text-api)
|
||||
|
||||
```python
|
||||
stt = DeepgramSTTService(..., live_options=LiveOptions(model="nova-2-general"))
|
||||
```
|
||||
|
||||
### Deprecated
|
||||
|
||||
- `PipelineParams.observers` is now deprecated, you the new `PipelineTask`
|
||||
parameter `observers`.
|
||||
|
||||
### Removed
|
||||
|
||||
- Remove `TransportParams.audio_out_is_live` since it was not being used at all.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed a `GoogleLLMService` that was causing an exception when sending inline
|
||||
audio in some cases.
|
||||
|
||||
- Fixed an `AudioContextWordTTSService` issue that would cause an `EndFrame` to
|
||||
disconnect from the TTS service before audio from all the contexts was
|
||||
received. This affected services like Cartesia and Rime.
|
||||
|
||||
- Fixed an issue that was not allowing to pass an `OpenAILLMContext` to create
|
||||
`GoogleLLMService`'s context aggregators.
|
||||
|
||||
- Fixed a `ElevenLabsTTSService`, `FishAudioTTSService`, `LMNTTTSService` and
|
||||
`PlayHTTTSService` issue that was resulting in audio requested before an
|
||||
interruption being played after an interruption.
|
||||
|
||||
- Fixed `match_endofsentence` support for ellipses.
|
||||
|
||||
- Fixed an issue that would cause undesired interruptions via
|
||||
`EmulateUserStartedSpeakingFrame` when only interim transcriptions (i.e. no
|
||||
final transcriptions) where received.
|
||||
|
||||
- Fixed an issue where `EndTaskFrame` was not triggering
|
||||
`on_client_disconnected` or closing the WebSocket in FastAPI.
|
||||
|
||||
- Fixed an issue in `DeepgramSTTService` where the `sample_rate` passed to the
|
||||
`LiveOptions` was not being used, causing the service to use the default
|
||||
sample rate of pipeline.
|
||||
|
||||
- Fixed a context aggregator issue that would not append the LLM text response
|
||||
to the context if a function call happened in the same LLM turn.
|
||||
|
||||
- Fixed an issue that was causing HTTP TTS services to push `TTSStoppedFrame`
|
||||
more than once.
|
||||
|
||||
- Fixed a `FishAudioTTSService` issue where `TTSStoppedFrame` was not being
|
||||
pushed.
|
||||
|
||||
- Fixed an issue that `start_callback` was not invoked for some LLM services.
|
||||
|
||||
- Fixed an issue that would cause `DeepgramSTTService` to stop working after an
|
||||
error occurred (e.g. sudden network loss). If the network recovered we would
|
||||
not reconnect.
|
||||
|
||||
- Fixed a `STTMuteFilter` issue that would not mute user audio frames causing
|
||||
transcriptions to be generated by the STT service.
|
||||
|
||||
### Other
|
||||
|
||||
- Added Gemini support to `examples/phone-chatbot`.
|
||||
|
||||
- Added foundational example `34-audio-recording.py` showing how to use the
|
||||
AudioBufferProcessor callbacks to save merged and track recordings.
|
||||
|
||||
## [0.0.57] - 2025-02-14
|
||||
|
||||
### Added
|
||||
|
||||
- Added new `AudioContextWordTTSService`. This is a TTS base class for TTS
|
||||
services that handling multiple separate audio requests.
|
||||
|
||||
- Added new frames `EmulateUserStartedSpeakingFrame` and
|
||||
`EmulateUserStoppedSpeakingFrame` which can be used to emulated VAD behavior
|
||||
without VAD being present or not being triggered.
|
||||
|
||||
- Added a new `audio_in_stream_on_start` field to `TransportParams`.
|
||||
|
||||
- Added a new method `start_audio_in_streaming` in the `BaseInputTransport`.
|
||||
|
||||
- This method should be used to start receiving the input audio in case the
|
||||
field `audio_in_stream_on_start` is set to `false`.
|
||||
|
||||
- Added support for the `RTVIProcessor` to handle buffered audio in `base64`
|
||||
format, converting it into InputAudioRawFrame for transport.
|
||||
|
||||
- Added support for the `RTVIProcessor` to trigger `start_audio_in_streaming`
|
||||
only after the `client-ready` message.
|
||||
|
||||
- Added new `MUTE_UNTIL_FIRST_BOT_COMPLETE` strategy to `STTMuteStrategy`. This
|
||||
strategy starts muted and remains muted until the first bot speech completes,
|
||||
ensuring the bot's first response cannot be interrupted. This complements the
|
||||
existing `FIRST_SPEECH` strategy which only mutes during the first detected
|
||||
bot speech.
|
||||
|
||||
- Added support for Google Cloud Speech-to-Text V2 through `GoogleSTTService`.
|
||||
|
||||
- Added `RimeTTSService`, a new `WordTTSService`. Updated the foundational
|
||||
example `07q-interruptible-rime.py` to use `RimeTTSService`.
|
||||
|
||||
- Added support for Groq's Whisper API through the new `GroqSTTService` and
|
||||
OpenAI's Whisper API through the new `OpenAISTTService`. Introduced a new
|
||||
base class `BaseWhisperSTTService` to handle common Whisper API
|
||||
functionality.
|
||||
|
||||
- Added `PerplexityLLMService` for Perplexity NIM API integration, with an
|
||||
OpenAI-compatible interface. Also, added foundational example
|
||||
`14n-function-calling-perplexity.py`.
|
||||
|
||||
- Added `DailyTransport.update_remote_participants()`. This allows you to update
|
||||
remote participant's settings, like their permissions or which of their
|
||||
devices are enabled. Requires that the local participant have participant
|
||||
admin permission.
|
||||
|
||||
### Changed
|
||||
|
||||
- We don't consider a colon `:` and end of sentence any more.
|
||||
|
||||
- Updated `DailyTransport` to respect the `audio_in_stream_on_start` field,
|
||||
ensuring it only starts receiving the audio input if it is enabled.
|
||||
|
||||
- Updated `FastAPIWebsocketOutputTransport` to send `TransportMessageFrame` and
|
||||
`TransportMessageUrgentFrame` to the serializer.
|
||||
|
||||
- Updated `WebsocketServerOutputTransport` to send `TransportMessageFrame` and
|
||||
`TransportMessageUrgentFrame` to the serializer.
|
||||
|
||||
- Enhanced `STTMuteConfig` to validate strategy combinations, preventing
|
||||
`MUTE_UNTIL_FIRST_BOT_COMPLETE` and `FIRST_SPEECH` from being used together
|
||||
as they handle first bot speech differently.
|
||||
|
||||
- Updated foundational example `07n-interruptible-google.py` to use all Google
|
||||
services.
|
||||
|
||||
- `RimeHttpTTSService` now uses the `mistv2` model by default.
|
||||
|
||||
- Improved error handling in `AzureTTSService` to properly detect and log
|
||||
synthesis cancellation errors.
|
||||
|
||||
- Enhanced `WhisperSTTService` with full language support and improved model
|
||||
documentation.
|
||||
|
||||
- Updated foundation example `14f-function-calling-groq.py` to use
|
||||
`GroqSTTService` for transcription.
|
||||
|
||||
- Updated `GroqLLMService` to use `llama-3.3-70b-versatile` as the default
|
||||
model.
|
||||
|
||||
- `RTVIObserver` doesn't handle `LLMSearchResponseFrame` frames anymore. For
|
||||
now, to handle those frames you need to create a `GoogleRTVIObserver`
|
||||
instead.
|
||||
|
||||
### Deprecated
|
||||
|
||||
- `STTMuteFilter` constructor's `stt_service` parameter is now deprecated and
|
||||
will be removed in a future version. The filter now manages mute state
|
||||
internally instead of querying the STT service.
|
||||
|
||||
- `RTVI.observer()` is now deprecated, instantiate an `RTVIObserver` directly
|
||||
instead.
|
||||
|
||||
- All RTVI frame processors (e.g. `RTVISpeakingProcessor`,
|
||||
`RTVIBotLLMProcessor`) are now deprecated, instantiate an `RTVIObserver`
|
||||
instead.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed a `FalImageGenService` issue that was causing the event loop to be
|
||||
blocked while loading the downloadded image.
|
||||
|
||||
- Fixed a `CartesiaTTSService` service issue that would cause audio overlapping
|
||||
in some cases.
|
||||
|
||||
- Fixed a websocket-based service issue (e.g. `CartesiaTTSService`) that was
|
||||
preventing a reconnection after the server disconnected cleanly, which was
|
||||
causing an inifite loop instead.
|
||||
|
||||
- Fixed a `BaseOutputTransport` issue that was causing upstream frames to no be
|
||||
pushed upstream.
|
||||
|
||||
- Fixed multiple issue where user transcriptions where not being handled
|
||||
properly. It was possible for short utterances to not trigger VAD which would
|
||||
cause user transcriptions to be ignored. It was also possible for one or more
|
||||
transcriptions to be generated after VAD in which case they would also be
|
||||
ignored.
|
||||
|
||||
- Fixed an issue that was causing `BotStoppedSpeakingFrame` to be generated too
|
||||
late. This could then cause issues unblocking `STTMuteFilter` later than
|
||||
desired.
|
||||
|
||||
- Fixed an issue that was causing `AudioBufferProcessor` to not record
|
||||
synchronized audio.
|
||||
|
||||
- Fixed an `RTVI` issue that was causing `bot-tts-text` messages to be sent
|
||||
before being processed by the output transport.
|
||||
|
||||
- Fixed an issue[#1192] in 11labs where we are trying to reconnect/disconnect
|
||||
the websocket connection even when the connection is already closed.
|
||||
|
||||
- Fixed an issue where `has_regular_messages` condition was always true in
|
||||
`GoogleLLMContext` due to `Part` having `function_call` & `function_response`
|
||||
with `None` values.
|
||||
|
||||
### Other
|
||||
|
||||
- Added new `instant-voice` example. This example showcases how to enable
|
||||
instant voice communication as soon as a user connects.
|
||||
|
||||
- Added new `local-input-select-stt` example. This examples allows you to play
|
||||
with local audio inputs by slecting them through a nice text interface.
|
||||
|
||||
## [0.0.56] - 2025-02-06
|
||||
|
||||
### Changed
|
||||
|
||||
- Use `gemini-2.0-flash-001` as the default model for `GoogleLLMSerivce`.
|
||||
|
||||
- Improved foundational examples 22b, 22c, and 22d to support function calling.
|
||||
With these base examples, `FunctionCallInProgressFrame` and
|
||||
`FunctionCallResultFrame` will no longer be blocked by the gates.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed a `TkLocalTransport` and `LocalAudioTransport` issues that was causing
|
||||
errors on cleanup.
|
||||
|
||||
- Fixed an issue that was causing `tests.utils` import to fail because of
|
||||
logging setup.
|
||||
|
||||
- Fixed a `SentryMetrics` issue that was preventing any metrics to be sent to
|
||||
Sentry and also was preventing from metrics frames to be pushed to the
|
||||
pipeline.
|
||||
|
||||
- Fixed an issue in `BaseOutputTransport` where incoming audio would not be
|
||||
resampled to the desired output sample rate.
|
||||
|
||||
- Fixed an issue with the `TwilioFrameSerializer` and `TelnyxFrameSerializer`
|
||||
where `twilio_sample_rate` and `telnyx_sample_rate` were incorrectly
|
||||
initialized to `audio_in_sample_rate`. Those values currently default to 8000
|
||||
and should be set manually from the serializer constructor if a different
|
||||
value is needed.
|
||||
|
||||
### Other
|
||||
|
||||
- Added a new `sentry-metrics` example.
|
||||
|
||||
## [0.0.55] - 2025-02-05
|
||||
|
||||
### Added
|
||||
|
||||
- Added a new `start_metadata` field to `PipelineParams`. The provided metadata
|
||||
will be set to the initial `StartFrame` being pushed from the `PipelineTask`.
|
||||
|
||||
- Added new fields to `PipelineParams` to control audio input and output sample
|
||||
rates for the whole pipeline. This allows controlling sample rates from a
|
||||
single place instead of having to specify sample rates in each
|
||||
service. Setting a sample rate to a service is still possible and will
|
||||
override the value from `PipelineParams`.
|
||||
|
||||
- Introduce audio resamplers (`BaseAudioResampler`). This is just a base class
|
||||
to implement audio resamplers. Currently, two implementations are provided
|
||||
`SOXRAudioResampler` and `ResampyResampler`. A new
|
||||
`create_default_resampler()` has been added (replacing the now deprecated
|
||||
`resample_audio()`).
|
||||
|
||||
- It is now possible to specify the asyncio event loop that a `PipelineTask` and
|
||||
all the processors should run on by passing it as a new argument to the
|
||||
`PipelineRunner`. This could allow running pipelines in multiple threads each
|
||||
@@ -41,6 +445,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
### Changed
|
||||
|
||||
- `GatedOpenAILLMContextAggregator` now require keyword arguments. Also, a new
|
||||
`start_open` argument has been added to set the initial state of the gate.
|
||||
|
||||
- Added `organization` and `project` level authentication to
|
||||
`OpenAILLMService`.
|
||||
|
||||
@@ -56,8 +463,27 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
- `InputDTMFFrame` is now based on `DTMFFrame`. There's also a new
|
||||
`OutputDTMFFrame` frame.
|
||||
|
||||
### Deprecated
|
||||
|
||||
- `resample_audio()` is now deprecated, use `create_default_resampler()`
|
||||
instead.
|
||||
|
||||
### Removed
|
||||
|
||||
- `AudioBufferProcessor.reset_audio_buffers()` has been removed, use
|
||||
`AudioBufferProcessor.start_recording()` and
|
||||
`AudioBufferProcessor.stop_recording()` instead.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed a `AudioBufferProcessor` that would cause crackling in some recordings.
|
||||
|
||||
- Fixed an issue in `AudioBufferProcessor` where user callback would not be
|
||||
called on task cancellation.
|
||||
|
||||
- Fixed an issue in `AudioBufferProcessor` that would cause wrong silence
|
||||
padding in some cases.
|
||||
|
||||
- Fixed an issue where `ElevenLabsTTSService` messages would return a 1009
|
||||
websocket error by increasing the max message size limit to 16MB.
|
||||
|
||||
@@ -73,6 +499,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
### Other
|
||||
|
||||
- Improved Unit Test `run_test()` to use `PipelineTask` and
|
||||
`PipelineRunner`. There's now also some control around `StartFrame` and
|
||||
`EndFrame`. The `EndTaskFrame` has been removed since it doesn't seem
|
||||
necessary with this new approach.
|
||||
|
||||
- Updated `twilio-chatbot` with a few new features: use 8000 sample rate and
|
||||
avoid resampling, a new client useful for stress testing and testing locally
|
||||
without the need to make phone calls. Also, added audio recording on both the
|
||||
client and the server to make sure the audio sounds good.
|
||||
|
||||
- Updated examples to use `task.cancel()` to immediately exit the example when a
|
||||
participant leaves or disconnects, instead of pushing an `EndFrame`. Pushing
|
||||
an `EndFrame` causes the bot to run through everything that is internally
|
||||
@@ -102,7 +538,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
- Added `enable_recording` and `geo` parameters to `DailyRoomProperties`.
|
||||
|
||||
- Added `RecordingsBucketConfig` to `DailyRoomProperties` to upload recordings to a custom AWS bucket.
|
||||
- Added `RecordingsBucketConfig` to `DailyRoomProperties` to upload recordings
|
||||
to a custom AWS bucket.
|
||||
|
||||
### Changed
|
||||
|
||||
@@ -1527,6 +1964,9 @@ async def on_connected(processor):
|
||||
|
||||
### Changed
|
||||
|
||||
- `FrameSerializer.serialize()` and `FrameSerializer.deserialize()` are now
|
||||
`async`.
|
||||
|
||||
- `Filter` has been renamed to `FrameFilter` and it's now under
|
||||
`processors/filters`.
|
||||
|
||||
|
||||
46
README.md
46
README.md
@@ -2,7 +2,7 @@
|
||||
<img alt="pipecat" width="300px" height="auto" src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/pipecat.png">
|
||||
</div></h1>
|
||||
|
||||
[](https://pypi.org/project/pipecat-ai)  [](https://docs.pipecat.ai) [](https://discord.gg/pipecat) <a href="https://app.commanddash.io/agent/github_pipecat-ai_pipecat"><img src="https://img.shields.io/badge/AI-Code%20Agent-EB9FDA"></a>
|
||||
[](https://pypi.org/project/pipecat-ai)  [](https://codecov.io/gh/pipecat-ai/pipecat) [](https://docs.pipecat.ai) [](https://discord.gg/pipecat)
|
||||
|
||||
Pipecat is an open source Python framework for building voice and multimodal conversational agents. It handles the complex orchestration of AI services, network transport, audio processing, and multimodal interactions, letting you focus on creating engaging experiences.
|
||||
|
||||
@@ -55,17 +55,17 @@ pip install "pipecat-ai[option,...]"
|
||||
|
||||
### Available services
|
||||
|
||||
| Category | Services | Install Command Example |
|
||||
| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------- |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) | `pip install "pipecat-ai[deepgram]"` |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Together AI](https://docs.pipecat.ai/server/services/llm/together) | `pip install "pipecat-ai[openai]"` |
|
||||
| Text-to-Speech | [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) | `pip install "pipecat-ai[cartesia]"` |
|
||||
| Speech-to-Speech | [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) | `pip install "pipecat-ai[openai]"` |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local | `pip install "pipecat-ai[daily]"` |
|
||||
| Video | [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) | `pip install "pipecat-ai[tavus,simli]"` |
|
||||
| Vision & Image | [Moondream](https://docs.pipecat.ai/server/services/vision/moondream), [fal](https://docs.pipecat.ai/server/services/image-generation/fal) | `pip install "pipecat-ai[moondream]"` |
|
||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) | `pip install "pipecat-ai[silero]"` |
|
||||
| Analytics & Metrics | [Canonical AI](https://docs.pipecat.ai/server/services/analytics/canonical), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) | `pip install "pipecat-ai[canonical]"` |
|
||||
| Category | Services | Install Command Example |
|
||||
| ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------- |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) | `pip install "pipecat-ai[deepgram]"` |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Together AI](https://docs.pipecat.ai/server/services/llm/together) | `pip install "pipecat-ai[openai]"` |
|
||||
| Text-to-Speech | [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) | `pip install "pipecat-ai[cartesia]"` |
|
||||
| Speech-to-Speech | [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) | `pip install "pipecat-ai[google]"` |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local | `pip install "pipecat-ai[daily]"` |
|
||||
| Video | [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) | `pip install "pipecat-ai[tavus,simli]"` |
|
||||
| Vision & Image | [Moondream](https://docs.pipecat.ai/server/services/vision/moondream), [fal](https://docs.pipecat.ai/server/services/image-generation/fal) | `pip install "pipecat-ai[moondream]"` |
|
||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) | `pip install "pipecat-ai[silero]"` |
|
||||
| Analytics & Metrics | [Canonical AI](https://docs.pipecat.ai/server/services/analytics/canonical), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) | `pip install "pipecat-ai[canonical]"` |
|
||||
|
||||
📚 [View full services documentation →](https://docs.pipecat.ai/server/services/supported-services)
|
||||
|
||||
@@ -149,36 +149,40 @@ Sign up [here](https://dashboard.daily.co/u/signup) and [create a room](https://
|
||||
|
||||
## Hacking on the framework itself
|
||||
|
||||
_Note that you may need to set up a virtual environment before following the instructions below. For instance, you might need to run the following from the root of the repo:_
|
||||
_Note: You may need to set up a virtual environment before following these instructions. From the root of the repo:_
|
||||
|
||||
```shell
|
||||
python3 -m venv venv
|
||||
source venv/bin/activate
|
||||
```
|
||||
|
||||
From the root of this repo, run the following:
|
||||
Install the development dependencies:
|
||||
|
||||
```shell
|
||||
pip install -r dev-requirements.txt
|
||||
```
|
||||
|
||||
This will install the necessary development dependencies. Also, make sure you install the git pre-commit hooks:
|
||||
Install the git pre-commit hooks (these help ensure your code follows project rules):
|
||||
|
||||
```shell
|
||||
pre-commit install
|
||||
```
|
||||
|
||||
The hooks will just save you time when you submit a PR by making sure your code follows the project rules.
|
||||
|
||||
To use the package locally (e.g. to run sample files), run:
|
||||
Install the `pipecat-ai` package locally in editable mode:
|
||||
|
||||
```shell
|
||||
pip install --editable ".[option,...]"
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
The `--editable` option makes sure you don't have to run `pip install` again and you can just edit the project files locally.
|
||||
The `-e` or `--editable` option allows you to modify the code without reinstalling.
|
||||
|
||||
If you want to use this package from another directory, you can run:
|
||||
To include optional dependencies, add them to the install command. For example:
|
||||
|
||||
```shell
|
||||
pip install -e ".[daily,deepgram,cartesia,openai,silero]" # Updated for the services you're using
|
||||
```
|
||||
|
||||
If you want to use this package from another directory:
|
||||
|
||||
```shell
|
||||
pip install "path_to_this_repo[option,...]"
|
||||
|
||||
11
codecov.yml
Normal file
11
codecov.yml
Normal file
@@ -0,0 +1,11 @@
|
||||
coverage:
|
||||
range: 50..90 # coverage lower than 50 is red, higher than 90 green, between color code
|
||||
|
||||
status:
|
||||
project:
|
||||
default:
|
||||
target: auto # auto % coverage target
|
||||
threshold: 5% # allow for 5% reduction of coverage without failing
|
||||
|
||||
# do not run coverage on patch nor changes
|
||||
patch: false
|
||||
@@ -1,11 +1,12 @@
|
||||
build~=1.2.2
|
||||
grpcio-tools~=1.69.0
|
||||
coverage~=7.6.12
|
||||
grpcio-tools~=1.67.1
|
||||
pip-tools~=7.4.1
|
||||
pre-commit~=4.0.1
|
||||
pyright~=1.1.392
|
||||
pyright~=1.1.394
|
||||
pytest~=8.3.4
|
||||
pytest-asyncio~=0.25.2
|
||||
ruff~=0.9.1
|
||||
setuptools~=75.8.0
|
||||
pytest-asyncio~=0.25.3
|
||||
ruff~=0.9.7
|
||||
setuptools~=70.0.0
|
||||
setuptools_scm~=8.1.0
|
||||
python-dotenv~=1.0.1
|
||||
|
||||
@@ -18,6 +18,9 @@ AZURE_DALLE_API_KEY=...
|
||||
AZURE_DALLE_ENDPOINT=https://...
|
||||
AZURE_DALLE_MODEL=...
|
||||
|
||||
# Cartesia
|
||||
CARTESIA_API_KEY=...
|
||||
|
||||
# Daily
|
||||
DAILY_API_KEY=...
|
||||
DAILY_SAMPLE_ROOM_URL=https://...
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
"@daily-co/daily-js": "0.74.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"vite": "^6.0.2"
|
||||
"vite": "^6.0.9"
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/runtime": {
|
||||
@@ -1007,15 +1007,14 @@
|
||||
}
|
||||
},
|
||||
"node_modules/vite": {
|
||||
"version": "6.0.7",
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-6.0.7.tgz",
|
||||
"integrity": "sha512-RDt8r/7qx9940f8FcOIAH9PTViRrghKaK2K1jY3RaAURrEUbm9Du1mJ72G+jlhtG3WwodnfzY8ORQZbBavZEAQ==",
|
||||
"version": "6.1.0",
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-6.1.0.tgz",
|
||||
"integrity": "sha512-RjjMipCKVoR4hVfPY6GQTgveinjNuyLw+qruksLDvA5ktI1150VmcMBKmQaEWJhg/j6Uaf6dNCNA0AfdzUb/hQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"esbuild": "^0.24.2",
|
||||
"postcss": "^8.4.49",
|
||||
"rollup": "^4.23.0"
|
||||
"postcss": "^8.5.1",
|
||||
"rollup": "^4.30.1"
|
||||
},
|
||||
"bin": {
|
||||
"vite": "bin/vite.js"
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
"license": "ISC",
|
||||
"description": "",
|
||||
"devDependencies": {
|
||||
"vite": "^6.0.2"
|
||||
"vite": "^6.0.9"
|
||||
},
|
||||
"dependencies": {
|
||||
"@daily-co/daily-js": "0.74.0"
|
||||
|
||||
1
examples/bot-ready-signalling/client/react-native/.nvmrc
Normal file
1
examples/bot-ready-signalling/client/react-native/.nvmrc
Normal file
@@ -0,0 +1 @@
|
||||
22.14
|
||||
60
examples/bot-ready-signalling/client/react-native/README.md
Normal file
60
examples/bot-ready-signalling/client/react-native/README.md
Normal file
@@ -0,0 +1,60 @@
|
||||
# React Native Implementation
|
||||
|
||||
Basic implementation using the [Pipecat React Native SDK](https://docs.pipecat.ai/client/react-native/introduction).
|
||||
|
||||
## Usage
|
||||
|
||||
### Expo requirements
|
||||
|
||||
This project cannot be used with an [Expo Go](https://docs.expo.dev/workflow/expo-go/) app because [it requires custom native code](https://docs.expo.io/workflow/customizing/).
|
||||
|
||||
When a project requires custom native code or a config plugin, we need to transition from using [Expo Go](https://docs.expo.dev/workflow/expo-go/)
|
||||
to a [development build](https://docs.expo.dev/development/introduction/).
|
||||
|
||||
More details about the custom native code used by this demo can be found in [rn-daily-js-expo-config-plugin](https://github.com/daily-co/rn-daily-js-expo-config-plugin).
|
||||
|
||||
### Building remotely
|
||||
|
||||
If you do not have experience with Xcode and Android Studio builds or do not have them installed locally on your computer, you will need to follow [this guide from Expo to use EAS Build](https://docs.expo.dev/development/create-development-builds/#create-and-install-eas-build).
|
||||
|
||||
### Building locally
|
||||
|
||||
You will need to have installed locally on your computer:
|
||||
- [Xcode](https://developer.apple.com/xcode/) to build for iOS;
|
||||
- [Android Studio](https://developer.android.com/studio) to build for Android;
|
||||
|
||||
#### Install the demo dependencies
|
||||
|
||||
```bash
|
||||
# Use the version of node specified in .nvmrc
|
||||
nvm i
|
||||
|
||||
# Install dependencies
|
||||
npm i
|
||||
|
||||
# Before a native app can be compiled, the native source code must be generated.
|
||||
npx expo prebuild
|
||||
|
||||
# Configure the environment variable to connect to the local server
|
||||
cp env.example .env
|
||||
# edit .env and add your local ip address, for example: http://192.168.1.16:7860
|
||||
```
|
||||
|
||||
#### Running on Android
|
||||
|
||||
After plugging in an Android device [configured for debugging](https://developer.android.com/studio/debug/dev-options), run the following command:
|
||||
|
||||
```
|
||||
npm run android
|
||||
```
|
||||
|
||||
#### Running on iOS
|
||||
|
||||
Run the following command:
|
||||
|
||||
```
|
||||
npm run ios
|
||||
```
|
||||
|
||||
#### Connect to the server
|
||||
Use the http://localhost:5173 in your app.
|
||||
75
examples/bot-ready-signalling/client/react-native/app.json
Normal file
75
examples/bot-ready-signalling/client/react-native/app.json
Normal file
@@ -0,0 +1,75 @@
|
||||
{
|
||||
"expo": {
|
||||
"name": "bot-ready-rn",
|
||||
"slug": "bot-ready-rn",
|
||||
"version": "1.0.0",
|
||||
"orientation": "portrait",
|
||||
"icon": "./assets/icon.png",
|
||||
"userInterfaceStyle": "light",
|
||||
"splash": {
|
||||
"image": "./assets/splash.png",
|
||||
"resizeMode": "contain",
|
||||
"backgroundColor": "#ffffff"
|
||||
},
|
||||
"updates": {
|
||||
"fallbackToCacheTimeout": 0
|
||||
},
|
||||
"assetBundlePatterns": [
|
||||
"**/*"
|
||||
],
|
||||
"ios": {
|
||||
"supportsTablet": true,
|
||||
"bitcode": false,
|
||||
"bundleIdentifier": "co.daily.expo.BotReady",
|
||||
"infoPlist": {
|
||||
"UIBackgroundModes": [
|
||||
"voip"
|
||||
]
|
||||
},
|
||||
"appleTeamId": "EEBGKV9N3N"
|
||||
},
|
||||
"android": {
|
||||
"adaptiveIcon": {
|
||||
"foregroundImage": "./assets/adaptive-icon.png",
|
||||
"backgroundColor": "#FFFFFF"
|
||||
},
|
||||
"package": "co.daily.expo.BotReady",
|
||||
"permissions": [
|
||||
"android.permission.ACCESS_NETWORK_STATE",
|
||||
"android.permission.BLUETOOTH",
|
||||
"android.permission.CAMERA",
|
||||
"android.permission.INTERNET",
|
||||
"android.permission.MODIFY_AUDIO_SETTINGS",
|
||||
"android.permission.RECORD_AUDIO",
|
||||
"android.permission.SYSTEM_ALERT_WINDOW",
|
||||
"android.permission.WAKE_LOCK",
|
||||
"android.permission.FOREGROUND_SERVICE",
|
||||
"android.permission.FOREGROUND_SERVICE_CAMERA",
|
||||
"android.permission.FOREGROUND_SERVICE_MICROPHONE",
|
||||
"android.permission.FOREGROUND_SERVICE_MEDIA_PROJECTION",
|
||||
"android.permission.POST_NOTIFICATIONS"
|
||||
]
|
||||
},
|
||||
"web": {
|
||||
"favicon": "./assets/favicon.png"
|
||||
},
|
||||
"plugins": [
|
||||
"@config-plugins/react-native-webrtc",
|
||||
"@daily-co/config-plugin-rn-daily-js",
|
||||
[
|
||||
"expo-build-properties",
|
||||
{
|
||||
"android": {
|
||||
"minSdkVersion": 24,
|
||||
"compileSdkVersion": 35,
|
||||
"targetSdkVersion": 34,
|
||||
"buildToolsVersion": "35.0.0"
|
||||
},
|
||||
"ios": {
|
||||
"deploymentTarget": "15.1"
|
||||
}
|
||||
}
|
||||
]
|
||||
]
|
||||
}
|
||||
}
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 17 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 1.4 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 22 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 46 KiB |
@@ -0,0 +1,7 @@
|
||||
module.exports = function(api) {
|
||||
api.cache(true);
|
||||
return {
|
||||
presets: ['babel-preset-expo'],
|
||||
plugins: [["module:react-native-dotenv"]],
|
||||
};
|
||||
};
|
||||
@@ -0,0 +1 @@
|
||||
API_BASE_URL=http://YOUR_LOCAL_IP:7860
|
||||
7
examples/bot-ready-signalling/client/react-native/index.js
vendored
Normal file
7
examples/bot-ready-signalling/client/react-native/index.js
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
import { registerRootComponent } from "expo";
|
||||
|
||||
import App from "./src/App";
|
||||
|
||||
// registerRootComponent calls AppRegistry.registerComponent('main', () => App);
|
||||
// It also ensures that the environment is set up appropriately
|
||||
registerRootComponent(App);
|
||||
@@ -0,0 +1,4 @@
|
||||
// Learn more https://docs.expo.io/guides/customizing-metro
|
||||
const { getDefaultConfig } = require('expo/metro-config');
|
||||
|
||||
module.exports = getDefaultConfig(__dirname);
|
||||
10983
examples/bot-ready-signalling/client/react-native/package-lock.json
generated
Normal file
10983
examples/bot-ready-signalling/client/react-native/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"name": "bot-ready-rn",
|
||||
"version": "1.0.0",
|
||||
"scripts": {
|
||||
"start": "expo start --dev-client",
|
||||
"android": "expo run:android --device",
|
||||
"ios": "expo run:ios --device",
|
||||
"web": "expo start --web"
|
||||
},
|
||||
"dependencies": {
|
||||
"@config-plugins/react-native-webrtc": "^10.0.0",
|
||||
"@daily-co/config-plugin-rn-daily-js": "0.0.7",
|
||||
"@daily-co/react-native-daily-js": "^0.70.0",
|
||||
"@daily-co/react-native-webrtc": "^118.0.3-daily.2",
|
||||
"@react-native-async-storage/async-storage": "1.23.1",
|
||||
"expo": "^52.0.0",
|
||||
"expo-build-properties": "~0.13.1",
|
||||
"expo-dev-client": "~5.0.5",
|
||||
"expo-splash-screen": "~0.29.16",
|
||||
"expo-status-bar": "~2.0.0",
|
||||
"react": "18.3.1",
|
||||
"react-native": "0.76.3",
|
||||
"react-native-background-timer": "^2.4.1",
|
||||
"react-native-dotenv": "^3.4.11",
|
||||
"react-native-get-random-values": "^1.11.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@babel/core": "^7.12.9"
|
||||
},
|
||||
"private": true
|
||||
}
|
||||
121
examples/bot-ready-signalling/client/react-native/src/App.js
vendored
Normal file
121
examples/bot-ready-signalling/client/react-native/src/App.js
vendored
Normal file
@@ -0,0 +1,121 @@
|
||||
import React, { useState, useEffect } from 'react';
|
||||
import {SafeAreaView, View, Text, Button, StyleSheet, ScrollView} from 'react-native';
|
||||
import Daily from "@daily-co/react-native-daily-js";
|
||||
import { API_BASE_URL } from "@env";
|
||||
|
||||
const CallScreen = () => {
|
||||
const [connectionStatus, setConnectionStatus] = useState('Disconnected');
|
||||
const [isConnected, setIsConnected] = useState(false);
|
||||
const [callObject, setCallObject] = useState(null);
|
||||
const [logs, setLogs] = useState([]);
|
||||
|
||||
useEffect(() => {
|
||||
if (callObject) {
|
||||
setupTrackListeners(callObject);
|
||||
}
|
||||
}, [callObject]);
|
||||
|
||||
const log = (message) => {
|
||||
setLogs((prevLogs) => [...prevLogs, `${new Date().toISOString()} - ${message}`]);
|
||||
console.log(message);
|
||||
};
|
||||
|
||||
const setupTrackListeners = (callObject) => {
|
||||
callObject.on("joined-meeting", () => {
|
||||
setConnectionStatus('Connected');
|
||||
setIsConnected(true);
|
||||
log('Client connected');
|
||||
});
|
||||
callObject.on("left-meeting", () => {
|
||||
setConnectionStatus('Disconnected');
|
||||
setIsConnected(false);
|
||||
log('Client disconnected');
|
||||
});
|
||||
callObject.on("participant-left", () => {
|
||||
// When the bot leaves, we are also disconnecting from the call
|
||||
disconnect().catch((err) => {
|
||||
log(`Failed to disconnect ${err}`);
|
||||
})
|
||||
});
|
||||
// Trigger so the bot can start sending audio
|
||||
callObject.on("track-started", (evt) => {
|
||||
if (evt.track.kind === "audio" && evt.participant.local === false) {
|
||||
handleEventToConsole(evt)
|
||||
log("Sending the message that will trigger the bot to play the audio.")
|
||||
callObject.sendAppMessage("playable")
|
||||
}
|
||||
});
|
||||
callObject.on("error", (evt) => log(`Error: ${evt.error}`));
|
||||
// Other events just for awareness
|
||||
callObject.on("track-stopped", handleEventToConsole);
|
||||
callObject.on("participant-joined", handleEventToConsole);
|
||||
callObject.on("participant-updated", handleEventToConsole);
|
||||
};
|
||||
|
||||
const handleEventToConsole = (evt) => {
|
||||
log(`Received event: ${evt.action}`);
|
||||
};
|
||||
|
||||
const connect = async () => {
|
||||
try {
|
||||
const callObject = Daily.createCallObject({ subscribeToTracksAutomatically: true });
|
||||
setCallObject(callObject);
|
||||
const connectionUrl = `${API_BASE_URL}/connect`
|
||||
const res = await fetch(connectionUrl, { method: "POST", headers: { "Content-Type": "application/json" } });
|
||||
const roomInfo = await res.json();
|
||||
await callObject.join({ url: roomInfo.room_url });
|
||||
} catch (error) {
|
||||
log(`Error connecting: ${error.message}`);
|
||||
}
|
||||
};
|
||||
|
||||
const disconnect = async () => {
|
||||
if (callObject) {
|
||||
try {
|
||||
await callObject.leave();
|
||||
await callObject.destroy();
|
||||
setCallObject(null);
|
||||
} catch (error) {
|
||||
log(`Error disconnecting: ${error.message}`);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<SafeAreaView style={styles.safeArea}>
|
||||
<View style={styles.container}>
|
||||
<View style={styles.statusBar}>
|
||||
<Text>Status: <Text style={styles.status}>{connectionStatus}</Text></Text>
|
||||
<View style={styles.controls}>
|
||||
<Button
|
||||
title={isConnected ? "Disconnect" : "Connect"}
|
||||
onPress={isConnected ? disconnect : connect}
|
||||
/>
|
||||
</View>
|
||||
</View>
|
||||
|
||||
<View style={styles.debugPanel}>
|
||||
<Text style={styles.debugTitle}>Debug Info</Text>
|
||||
<ScrollView style={styles.debugLog}>
|
||||
{logs.map((logEntry, index) => (
|
||||
<Text key={index} style={styles.logText}>{logEntry}</Text>
|
||||
))}
|
||||
</ScrollView>
|
||||
</View>
|
||||
</View>
|
||||
</SafeAreaView>
|
||||
);
|
||||
};
|
||||
|
||||
const styles = StyleSheet.create({
|
||||
safeArea: { flex: 1, backgroundColor: '#f0f0f0', padding: 20 },
|
||||
container: { flex: 1, margin: 20 },
|
||||
statusBar: { flexDirection: 'row', justifyContent: 'space-between', alignItems: 'center', padding: 10, backgroundColor: '#fff', borderRadius: 8, marginBottom: 20 },
|
||||
status: { fontWeight: 'bold' },
|
||||
controls: { flexDirection: 'row', gap: 10 },
|
||||
debugPanel: { height: '80%', backgroundColor: '#fff', borderRadius: 8, padding: 20},
|
||||
debugTitle: { fontSize: 16, fontWeight: 'bold' },
|
||||
debugLog: { height: '100%', overflow: 'scroll', backgroundColor: '#f8f8f8', padding: 10, borderRadius: 4, fontFamily: 'monospace', fontSize: 12, lineHeight: 1.4 },
|
||||
});
|
||||
|
||||
export default CallScreen;
|
||||
@@ -6,6 +6,7 @@
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
import aiohttp
|
||||
|
||||
@@ -18,7 +19,7 @@ async def configure(aiohttp_session: aiohttp.ClientSession):
|
||||
|
||||
|
||||
async def configure_with_args(
|
||||
aiohttp_session: aiohttp.ClientSession, parser: argparse.ArgumentParser | None = None
|
||||
aiohttp_session: aiohttp.ClientSession, parser: Optional[argparse.ArgumentParser] = None
|
||||
):
|
||||
if not parser:
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
|
||||
@@ -31,16 +31,15 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
class SilenceFrame(OutputAudioRawFrame):
|
||||
def __init__(
|
||||
self,
|
||||
audio: bytes = None,
|
||||
sample_rate: int = 16000,
|
||||
num_channels: int = 1,
|
||||
duration: float = 0.1,
|
||||
*,
|
||||
sample_rate: int,
|
||||
duration: float,
|
||||
):
|
||||
# Initialize the parent class with the silent frame's data
|
||||
super().__init__(
|
||||
audio=self.create_silent_audio_frame(sample_rate, num_channels, duration).audio,
|
||||
audio=self.create_silent_audio_frame(sample_rate, 1, duration).audio,
|
||||
sample_rate=sample_rate,
|
||||
num_channels=num_channels,
|
||||
num_channels=1,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
@@ -80,7 +79,10 @@ async def main():
|
||||
return
|
||||
await task.queue_frames(
|
||||
[
|
||||
SilenceFrame(duration=0.5),
|
||||
SilenceFrame(
|
||||
sample_rate=task.params.audio_out_sample_rate,
|
||||
duration=0.5,
|
||||
),
|
||||
TTSSpeakFrame(f"Hello there, how are you doing today ?"),
|
||||
EndFrame(),
|
||||
]
|
||||
|
||||
@@ -65,7 +65,6 @@ async def main():
|
||||
# English
|
||||
#
|
||||
voice_id="cgSgspJ2msm6clMCkdW9",
|
||||
aiohttp_session=session,
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
@@ -114,16 +113,17 @@ async def main():
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
audio_buffer_processor, # captures audio into a buffer
|
||||
canonical, # uploads audio buffer to Canonical AI for metrics
|
||||
audio_buffer_processor, # captures audio into a buffer
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await audio_buffer_processor.start_recording()
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
|
||||
@@ -32,10 +32,16 @@ load_dotenv(override=True)
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
# Create the recordings directory if it doesn't exist
|
||||
os.makedirs("recordings", exist_ok=True)
|
||||
|
||||
async def save_audio(audio: bytes, sample_rate: int, num_channels: int):
|
||||
|
||||
async def save_audio(audio: bytes, sample_rate: int, num_channels: int, name: str):
|
||||
if len(audio) > 0:
|
||||
filename = f"conversation_recording{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
|
||||
filename = os.path.join(
|
||||
"recordings",
|
||||
f"{name}_conversation_recording{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.wav",
|
||||
)
|
||||
with io.BytesIO() as buffer:
|
||||
with wave.open(buffer, "wb") as wf:
|
||||
wf.setsampwidth(2)
|
||||
@@ -82,7 +88,6 @@ async def main():
|
||||
# English
|
||||
#
|
||||
voice_id="cgSgspJ2msm6clMCkdW9",
|
||||
aiohttp_session=session,
|
||||
#
|
||||
# Spanish
|
||||
#
|
||||
@@ -109,8 +114,9 @@ async def main():
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
# Save audio every 10 seconds.
|
||||
audiobuffer = AudioBufferProcessor(buffer_size=480000)
|
||||
# NOTE: Watch out! This will save all the conversation in memory. You
|
||||
# can pass `buffer_size` to get periodic callbacks.
|
||||
audiobuffer = AudioBufferProcessor(enable_turn_audio=True)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -124,14 +130,23 @@ async def main():
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
@audiobuffer.event_handler("on_audio_data")
|
||||
async def on_audio_data(buffer, audio, sample_rate, num_channels):
|
||||
await save_audio(audio, sample_rate, num_channels)
|
||||
await save_audio(audio, sample_rate, num_channels, "full")
|
||||
|
||||
@audiobuffer.event_handler("on_user_turn_audio_data")
|
||||
async def on_user_turn_audio_data(buffer, audio, sample_rate, num_channels):
|
||||
await save_audio(audio, sample_rate, num_channels, "user")
|
||||
|
||||
@audiobuffer.event_handler("on_bot_turn_audio_data")
|
||||
async def on_bot_turn_audio_data(buffer, audio, sample_rate, num_channels):
|
||||
await save_audio(audio, sample_rate, num_channels, "bot")
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await audiobuffer.start_recording()
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
|
||||
@@ -70,7 +70,7 @@ async def main(room_url: str, token: str):
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
|
||||
@@ -62,7 +62,7 @@ async def main(room_url: str, token: str):
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -16,8 +16,7 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.local.audio import LocalAudioTransport
|
||||
from pipecat.transports.local.audio import LocalAudioTransport, LocalAudioTransportParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -26,7 +25,7 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
transport = LocalAudioTransport(TransportParams(audio_out_enabled=True))
|
||||
transport = LocalAudioTransport(LocalAudioTransportParams(audio_out_enabled=True))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
@@ -41,7 +40,7 @@ async def main():
|
||||
await asyncio.sleep(1)
|
||||
await task.queue_frames([TTSSpeakFrame("Hello there, how is it going!"), EndFrame()])
|
||||
|
||||
runner = PipelineRunner()
|
||||
runner = PipelineRunner(handle_sigint=False if sys.platform == "win32" else True)
|
||||
|
||||
await asyncio.gather(runner.run(task), say_something())
|
||||
|
||||
|
||||
@@ -18,8 +18,7 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.fal import FalImageGenService
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.local.tk import TkLocalTransport
|
||||
from pipecat.transports.local.tk import TkLocalTransport, TkTransportParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -34,7 +33,9 @@ async def main():
|
||||
|
||||
transport = TkLocalTransport(
|
||||
tk_root,
|
||||
TransportParams(camera_out_enabled=True, camera_out_width=1024, camera_out_height=1024),
|
||||
TkTransportParams(
|
||||
camera_out_enabled=True, camera_out_width=1024, camera_out_height=1024
|
||||
),
|
||||
)
|
||||
|
||||
imagegen = FalImageGenService(
|
||||
|
||||
@@ -44,7 +44,8 @@ async def main():
|
||||
runner = PipelineRunner()
|
||||
|
||||
task = PipelineTask(
|
||||
Pipeline([imagegen, transport.output()]), PipelineParams(enable_metrics=True)
|
||||
Pipeline([imagegen, transport.output()]),
|
||||
params=PipelineParams(enable_metrics=True),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
|
||||
@@ -51,7 +51,6 @@ async def main():
|
||||
)
|
||||
|
||||
elevenlabs_tts = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
)
|
||||
|
||||
@@ -30,8 +30,7 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaHttpTTSService
|
||||
from pipecat.services.fal import FalImageGenService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.local.tk import TkLocalTransport, TkOutputTransport
|
||||
from pipecat.transports.local.tk import TkLocalTransport, TkTransportParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -152,7 +151,7 @@ async def main():
|
||||
|
||||
transport = TkLocalTransport(
|
||||
tk_root,
|
||||
TransportParams(
|
||||
TkTransportParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
|
||||
@@ -105,7 +105,10 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(enable_metrics=True, enable_usage_metrics=True),
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
|
||||
@@ -27,7 +27,7 @@ from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaHttpTTSService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
@@ -91,7 +91,7 @@ async def main():
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaHttpTTSService(
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
@@ -127,7 +127,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -76,7 +76,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -74,7 +74,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -79,7 +79,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -103,7 +103,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -81,7 +81,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -74,7 +74,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
103
examples/foundational/07d-interruptible-elevenlabs-http.py
Normal file
103
examples/foundational/07d-interruptible-elevenlabs-http.py
Normal file
@@ -0,0 +1,103 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.elevenlabs import ElevenLabsHttpTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsHttpTTSService(
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
|
||||
aiohttp_session=session,
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -74,7 +74,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -75,7 +75,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -77,7 +77,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -83,7 +83,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
109
examples/foundational/07g-interruptible-openai.py
Normal file
109
examples/foundational/07g-interruptible-openai.py
Normal file
@@ -0,0 +1,109 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.openai import OpenAILLMService, OpenAISTTService, OpenAITTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=24000,
|
||||
transcription_enabled=False,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True,
|
||||
),
|
||||
)
|
||||
|
||||
# You can use the OpenAI compatible API like Groq.
|
||||
# stt = OpenAISTTService(
|
||||
# base_url="https://api.groq.com/openai/v1",
|
||||
# api_key="gsk_***",
|
||||
# model="whisper-large-v3",
|
||||
# )
|
||||
stt = OpenAISTTService(api_key=os.getenv("OPENAI_API_KEY"), model="whisper-1")
|
||||
|
||||
tts = OpenAITTSService(api_key=os.getenv("OPENAI_API_KEY"), voice="alloy")
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -81,7 +81,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -75,7 +75,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -21,6 +21,7 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.gladia import GladiaSTTService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
@@ -80,7 +81,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -38,7 +38,6 @@ async def main():
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=24000,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
@@ -72,7 +71,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -88,7 +88,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -81,7 +81,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -18,9 +18,7 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.deepgram import DeepgramSTTService
|
||||
from pipecat.services.google import GoogleTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.google import GoogleLLMService, GoogleSTTService, GoogleTTSService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
@@ -40,21 +38,22 @@ async def main():
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=24000,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True,
|
||||
),
|
||||
)
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
stt = GoogleSTTService(
|
||||
params=GoogleSTTService.InputParams(languages=Language.EN_US),
|
||||
)
|
||||
|
||||
tts = GoogleTTSService(
|
||||
voice_id="en-US-Journey-F",
|
||||
params=GoogleTTSService.InputParams(language=Language.EN_US),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
@@ -80,7 +79,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -80,7 +80,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -76,7 +76,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -18,7 +18,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.openai import OpenAILLMService, OpenAITTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.rime import RimeHttpTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
@@ -37,14 +38,17 @@ async def main():
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=24000,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = OpenAITTSService(api_key=os.getenv("OPENAI_API_KEY"), voice="alloy")
|
||||
tts = RimeHttpTTSService(
|
||||
api_key=os.getenv("RIME_API_KEY", ""),
|
||||
voice_id="rex",
|
||||
aiohttp_session=session,
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
@@ -71,7 +75,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
@@ -19,7 +19,7 @@ from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.rime import RimeHttpTTSService
|
||||
from pipecat.services.rime import RimeTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
@@ -44,10 +44,9 @@ async def main():
|
||||
),
|
||||
)
|
||||
|
||||
tts = RimeHttpTTSService(
|
||||
tts = RimeTTSService(
|
||||
api_key=os.getenv("RIME_API_KEY", ""),
|
||||
voice_id="rex",
|
||||
params=RimeHttpTTSService.InputParams(reduce_latency=True),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
@@ -75,7 +74,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -74,7 +74,7 @@ async def main():
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
|
||||
@@ -216,11 +216,7 @@ async def main():
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = GoogleLLMService(
|
||||
model="gemini-1.5-flash-latest",
|
||||
# model="gemini-exp-1114",
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
)
|
||||
llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"), model="gemini-2.0-flash-001")
|
||||
|
||||
messages = [
|
||||
{
|
||||
@@ -255,7 +251,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -74,7 +74,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -48,7 +48,6 @@ async def main():
|
||||
region=os.getenv("AZURE_SPEECH_REGION"),
|
||||
)
|
||||
tts2 = ElevenLabsTTSService(
|
||||
aiohttp_session=session,
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id="jBpfuIE2acCO8z3wKNLl",
|
||||
)
|
||||
|
||||
@@ -21,7 +21,7 @@ from pipecat.frames.frames import (
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
@@ -61,7 +61,6 @@ async def main():
|
||||
"Test",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_in_sample_rate=24000,
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_is_live=True,
|
||||
@@ -78,7 +77,13 @@ async def main():
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
audio_in_sample_rate=24000,
|
||||
audio_out_sample_rate=24000,
|
||||
),
|
||||
)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
@@ -22,10 +22,9 @@ from pipecat.frames.frames import (
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.local.tk import TkLocalTransport
|
||||
from pipecat.transports.local.tk import TkLocalTransport, TkTransportParams
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
@@ -62,12 +61,12 @@ async def main():
|
||||
tk_root.title("Local Mirror")
|
||||
|
||||
daily_transport = DailyTransport(
|
||||
room_url, token, "Test", DailyParams(audio_in_enabled=True, audio_in_sample_rate=24000)
|
||||
room_url, token, "Test", DailyParams(audio_in_enabled=True)
|
||||
)
|
||||
|
||||
tk_transport = TkLocalTransport(
|
||||
tk_root,
|
||||
TransportParams(
|
||||
TkTransportParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_is_live=True,
|
||||
@@ -82,7 +81,13 @@ async def main():
|
||||
|
||||
pipeline = Pipeline([daily_transport.input(), MirrorProcessor(), tk_transport.output()])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
audio_in_sample_rate=24000,
|
||||
audio_out_sample_rate=24000,
|
||||
),
|
||||
)
|
||||
|
||||
async def run_tk():
|
||||
while not task.has_finished():
|
||||
|
||||
@@ -76,7 +76,7 @@ async def main():
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
from typing import Optional
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
@@ -32,7 +33,7 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class UserImageRequester(FrameProcessor):
|
||||
def __init__(self, participant_id: str | None = None):
|
||||
def __init__(self, participant_id: Optional[str] = None):
|
||||
super().__init__()
|
||||
self._participant_id = participant_id
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
from typing import Optional
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
@@ -32,7 +33,7 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class UserImageRequester(FrameProcessor):
|
||||
def __init__(self, participant_id: str | None = None):
|
||||
def __init__(self, participant_id: Optional[str] = None):
|
||||
super().__init__()
|
||||
self._participant_id = participant_id
|
||||
|
||||
@@ -72,9 +73,7 @@ async def main():
|
||||
|
||||
vision_aggregator = VisionImageFrameAggregator()
|
||||
|
||||
google = GoogleLLMService(
|
||||
model="gemini-1.5-flash-latest", api_key=os.getenv("GOOGLE_API_KEY")
|
||||
)
|
||||
google = GoogleLLMService(model="gemini-2.0-flash-001", api_key=os.getenv("GOOGLE_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
from typing import Optional
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
@@ -32,7 +33,7 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class UserImageRequester(FrameProcessor):
|
||||
def __init__(self, participant_id: str | None = None):
|
||||
def __init__(self, participant_id: Optional[str] = None):
|
||||
super().__init__()
|
||||
self._participant_id = participant_id
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
from typing import Optional
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
@@ -32,7 +33,7 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
class UserImageRequester(FrameProcessor):
|
||||
def __init__(self, participant_id: str | None = None):
|
||||
def __init__(self, participant_id: Optional[str] = None):
|
||||
super().__init__()
|
||||
self._participant_id = participant_id
|
||||
|
||||
|
||||
@@ -16,8 +16,7 @@ from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.whisper import WhisperSTTService
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.local.audio import LocalAudioTransport
|
||||
from pipecat.transports.local.audio import LocalAudioTransport, LocalAudioTransportParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -34,7 +33,7 @@ class TranscriptionLogger(FrameProcessor):
|
||||
|
||||
|
||||
async def main():
|
||||
transport = LocalAudioTransport(TransportParams(audio_in_enabled=True))
|
||||
transport = LocalAudioTransport(LocalAudioTransportParams(audio_in_enabled=True))
|
||||
|
||||
stt = WhisperSTTService()
|
||||
|
||||
@@ -44,7 +43,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
runner = PipelineRunner()
|
||||
runner = PipelineRunner(handle_sigint=False if sys.platform == "win32" else True)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
@@ -11,9 +11,10 @@ import sys
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
from runner import configure
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
@@ -65,30 +66,24 @@ async def main():
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function(None, fetch_weather_from_api, start_callback=start_fetch_weather)
|
||||
|
||||
tools = [
|
||||
ChatCompletionToolParam(
|
||||
type="function",
|
||||
function={
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the users location.",
|
||||
},
|
||||
},
|
||||
"required": ["location", "format"],
|
||||
},
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
)
|
||||
]
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location", "format"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function])
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
@@ -112,7 +107,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -13,6 +13,8 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -59,22 +61,18 @@ async def main():
|
||||
)
|
||||
llm.register_function("get_weather", get_weather)
|
||||
|
||||
tools = [
|
||||
{
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"input_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
}
|
||||
},
|
||||
"required": ["location"],
|
||||
weather_function = FunctionSchema(
|
||||
name="get_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
}
|
||||
]
|
||||
},
|
||||
required=["location"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function])
|
||||
|
||||
# todo: test with very short initial user message
|
||||
|
||||
@@ -99,7 +97,13 @@ async def main():
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True, enable_metrics=True))
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
|
||||
@@ -13,6 +13,8 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -72,36 +74,29 @@ async def main():
|
||||
llm.register_function("get_weather", get_weather)
|
||||
llm.register_function("get_image", get_image)
|
||||
|
||||
tools = [
|
||||
{
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"input_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
}
|
||||
},
|
||||
"required": ["location"],
|
||||
weather_function = FunctionSchema(
|
||||
name="get_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": "get_image",
|
||||
"description": "Get an image from the video stream.",
|
||||
"input_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"question": {
|
||||
"type": "string",
|
||||
"description": "The question that the user is asking about the image.",
|
||||
}
|
||||
},
|
||||
"required": ["question"],
|
||||
},
|
||||
required=["location"],
|
||||
)
|
||||
get_image_function = FunctionSchema(
|
||||
name="get_image",
|
||||
description="Get an image from the video stream.",
|
||||
properties={
|
||||
"question": {
|
||||
"type": "string",
|
||||
"description": "The question that the user is asking about the image.",
|
||||
}
|
||||
},
|
||||
]
|
||||
required=["question"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function, get_image_function])
|
||||
|
||||
# todo: test with very short initial user message
|
||||
|
||||
@@ -153,7 +148,13 @@ If you need to use a tool, simply use the tool. Do not tell the user the tool yo
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True, enable_metrics=True))
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
|
||||
@@ -11,9 +11,10 @@ import sys
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
from runner import configure
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
@@ -69,30 +70,23 @@ async def main():
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function(None, fetch_weather_from_api, start_callback=start_fetch_weather)
|
||||
|
||||
tools = [
|
||||
ChatCompletionToolParam(
|
||||
type="function",
|
||||
function={
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the users location.",
|
||||
},
|
||||
},
|
||||
"required": ["location", "format"],
|
||||
},
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
)
|
||||
]
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location", "format"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function])
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
|
||||
@@ -11,9 +11,10 @@ import sys
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
from runner import configure
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -66,47 +67,34 @@ async def main():
|
||||
llm.register_function("get_weather", get_weather)
|
||||
llm.register_function("get_image", get_image)
|
||||
|
||||
tools = [
|
||||
ChatCompletionToolParam(
|
||||
type="function",
|
||||
function={
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the users location.",
|
||||
},
|
||||
},
|
||||
"required": ["location", "format"],
|
||||
},
|
||||
weather_function = FunctionSchema(
|
||||
name="get_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
),
|
||||
ChatCompletionToolParam(
|
||||
type="function",
|
||||
function={
|
||||
"name": "get_image",
|
||||
"description": "Get an image from the video stream.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"question": {
|
||||
"type": "string",
|
||||
"description": "The question to ask the AI to generate an image of",
|
||||
},
|
||||
},
|
||||
"required": ["question"],
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
),
|
||||
]
|
||||
},
|
||||
required=["location"],
|
||||
)
|
||||
get_image_function = FunctionSchema(
|
||||
name="get_image",
|
||||
description="Get an image from the video stream.",
|
||||
properties={
|
||||
"question": {
|
||||
"type": "string",
|
||||
"description": "The question that the user is asking about the image.",
|
||||
}
|
||||
},
|
||||
required=["question"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function, get_image_function])
|
||||
|
||||
system_prompt = """\
|
||||
You are a helpful assistant who converses with a user and answers questions. Respond concisely to general questions.
|
||||
|
||||
@@ -13,7 +13,10 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
@@ -30,6 +33,12 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
video_participant_id = None
|
||||
|
||||
|
||||
async def start_fetch_weather(function_name, llm, context):
|
||||
"""Push a frame to the LLM; this is handy when the LLM response might take a while."""
|
||||
await llm.push_frame(TTSSpeakFrame("Let me check on that."))
|
||||
logger.debug(f"Starting fetch_weather_from_api with function_name: {function_name}")
|
||||
|
||||
|
||||
async def get_weather(function_name, tool_call_id, arguments, llm, context, result_callback):
|
||||
location = arguments["location"]
|
||||
await result_callback(f"The weather in {location} is currently 72 degrees and sunny.")
|
||||
@@ -62,53 +71,38 @@ async def main():
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = GoogleLLMService(
|
||||
model="gemini-1.5-flash-latest",
|
||||
# model="gemini-exp-1114",
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
)
|
||||
llm.register_function("get_weather", get_weather)
|
||||
llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"), model="gemini-2.0-flash-001")
|
||||
llm.register_function("get_weather", get_weather, start_fetch_weather)
|
||||
llm.register_function("get_image", get_image)
|
||||
|
||||
tools = [
|
||||
{
|
||||
"function_declarations": [
|
||||
{
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the users location.",
|
||||
},
|
||||
},
|
||||
"required": ["location", "format"],
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": "get_image",
|
||||
"description": "Get and image from the camera or video stream.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"question": {
|
||||
"type": "string",
|
||||
"description": "The question to to use when running inference on the acquired image.",
|
||||
},
|
||||
},
|
||||
"required": ["question"],
|
||||
},
|
||||
},
|
||||
]
|
||||
}
|
||||
]
|
||||
weather_function = FunctionSchema(
|
||||
name="get_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location", "format"],
|
||||
)
|
||||
get_image_function = FunctionSchema(
|
||||
name="get_image",
|
||||
description="Get an image from the video stream.",
|
||||
properties={
|
||||
"question": {
|
||||
"type": "string",
|
||||
"description": "The question that the user is asking about the image.",
|
||||
}
|
||||
},
|
||||
required=["question"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function, get_image_function])
|
||||
|
||||
system_prompt = """\
|
||||
You are a helpful assistant who converses with a user and answers questions. Respond concisely to general questions.
|
||||
@@ -149,7 +143,7 @@ indicate you should use the get_image tool are:
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -11,16 +11,17 @@ import sys
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
from runner import configure
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.groq import GroqLLMService
|
||||
from pipecat.services.groq import GroqLLMService, GroqSTTService
|
||||
from pipecat.services.openai import OpenAILLMContext
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
@@ -50,48 +51,41 @@ async def main():
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True,
|
||||
),
|
||||
)
|
||||
|
||||
stt = GroqSTTService(api_key=os.getenv("GROQ_API_KEY"), model="distil-whisper-large-v3-en")
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = GroqLLMService(
|
||||
api_key=os.getenv("GROQ_API_KEY"), model="llama3-groq-70b-8192-tool-use-preview"
|
||||
)
|
||||
llm = GroqLLMService(api_key=os.getenv("GROQ_API_KEY"), model="llama-3.3-70b-versatile")
|
||||
# Register a function_name of None to get all functions
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function(None, fetch_weather_from_api, start_callback=start_fetch_weather)
|
||||
|
||||
tools = [
|
||||
ChatCompletionToolParam(
|
||||
type="function",
|
||||
function={
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"unit": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the users location.",
|
||||
},
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
)
|
||||
]
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function])
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
@@ -105,6 +99,7 @@ async def main():
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
@@ -115,7 +110,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -11,9 +11,10 @@ import sys
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
from runner import configure
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
@@ -66,30 +67,23 @@ async def main():
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function(None, fetch_weather_from_api, start_callback=start_fetch_weather)
|
||||
|
||||
tools = [
|
||||
ChatCompletionToolParam(
|
||||
type="function",
|
||||
function={
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the users location.",
|
||||
},
|
||||
},
|
||||
"required": ["location", "format"],
|
||||
},
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
)
|
||||
]
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location", "format"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function])
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
@@ -113,7 +107,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -11,9 +11,10 @@ import sys
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
from runner import configure
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
@@ -70,30 +71,23 @@ async def main():
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function(None, fetch_weather_from_api, start_callback=start_fetch_weather)
|
||||
|
||||
tools = [
|
||||
ChatCompletionToolParam(
|
||||
type="function",
|
||||
function={
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the users location.",
|
||||
},
|
||||
},
|
||||
"required": ["location", "format"],
|
||||
},
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
)
|
||||
]
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location", "format"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function])
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
@@ -117,7 +111,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -11,9 +11,10 @@ import sys
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
from runner import configure
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
@@ -69,30 +70,23 @@ async def main():
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function(None, fetch_weather_from_api, start_callback=start_fetch_weather)
|
||||
|
||||
tools = [
|
||||
ChatCompletionToolParam(
|
||||
type="function",
|
||||
function={
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the users location.",
|
||||
},
|
||||
},
|
||||
"required": ["location", "format"],
|
||||
},
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
)
|
||||
]
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location", "format"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function])
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
@@ -116,7 +110,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -11,9 +11,10 @@ import sys
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
from runner import configure
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
@@ -69,30 +70,23 @@ async def main():
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function(None, fetch_weather_from_api, start_callback=start_fetch_weather)
|
||||
|
||||
tools = [
|
||||
ChatCompletionToolParam(
|
||||
type="function",
|
||||
function={
|
||||
"name": "get_current_weather",
|
||||
"description": "Returns the current weather at a location, if one is specified, and defaults to the user's location.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The location to find the weather of, or if not provided, it's the default location.",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "Whether to use SI or USCS units (celsius or fahrenheit).",
|
||||
},
|
||||
},
|
||||
"required": ["location", "format"],
|
||||
},
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
)
|
||||
]
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location", "format"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function])
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
@@ -116,7 +110,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -11,9 +11,10 @@ import sys
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
from runner import configure
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
@@ -66,30 +67,23 @@ async def main():
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function(None, fetch_weather_from_api, start_callback=start_fetch_weather)
|
||||
|
||||
tools = [
|
||||
ChatCompletionToolParam(
|
||||
type="function",
|
||||
function={
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather for a specific location. You MUST use this function whenever asked about weather.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Use fahrenheit for US locations, celsius for others.",
|
||||
},
|
||||
},
|
||||
"required": ["location", "format"],
|
||||
},
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
)
|
||||
]
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location", "format"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function])
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
@@ -123,7 +117,7 @@ Start by asking me for my location. Then, use 'get_weather_current' to give me a
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -11,9 +11,10 @@ import sys
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
from runner import configure
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
@@ -66,30 +67,23 @@ async def main():
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function(None, fetch_weather_from_api, start_callback=start_fetch_weather)
|
||||
|
||||
tools = [
|
||||
ChatCompletionToolParam(
|
||||
type="function",
|
||||
function={
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather for a specific location. You MUST use this function whenever asked about weather.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Use fahrenheit for US locations, celsius for others.",
|
||||
},
|
||||
},
|
||||
"required": ["location", "format"],
|
||||
},
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
)
|
||||
]
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location", "format"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function])
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
@@ -123,7 +117,7 @@ Start by asking me for my location. Then, use 'get_weather_current' to give me a
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -11,9 +11,10 @@ import sys
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
from runner import configure
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
@@ -70,30 +71,23 @@ async def main():
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function(None, fetch_weather_from_api, start_callback=start_fetch_weather)
|
||||
|
||||
tools = [
|
||||
ChatCompletionToolParam(
|
||||
type="function",
|
||||
function={
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the users location.",
|
||||
},
|
||||
},
|
||||
"required": ["location", "format"],
|
||||
},
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
)
|
||||
]
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location", "format"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function])
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
@@ -117,7 +111,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
106
examples/foundational/14n-function-calling-perplexity.py
Normal file
106
examples/foundational/14n-function-calling-perplexity.py
Normal file
@@ -0,0 +1,106 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""This example demonstrates using the Perplexity API as a drop-in replacement for OpenAI.
|
||||
|
||||
Note that while this file is in the function-calling examples, Perplexity's API does not
|
||||
currently support function calling. The example shows basic chat completion functionality
|
||||
using Perplexity's API while maintaining compatibility with the OpenAI interface.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMContext, OpenAILLMService
|
||||
from pipecat.services.perplexity import PerplexityLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = PerplexityLLMService(api_key=os.getenv("PERPLEXITY_API_KEY"), model="sonar")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -0,0 +1,131 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.google import GoogleLLMOpenAIBetaService
|
||||
from pipecat.services.openai import OpenAILLMContext
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def start_fetch_weather(function_name, llm, context):
|
||||
"""Push a frame to the LLM; this is handy when the LLM response might take a while."""
|
||||
await llm.push_frame(TTSSpeakFrame("Let me check on that."))
|
||||
logger.debug(f"Starting fetch_weather_from_api with function_name: {function_name}")
|
||||
|
||||
|
||||
async def fetch_weather_from_api(function_name, tool_call_id, args, llm, context, result_callback):
|
||||
await result_callback({"conditions": "nice", "temperature": "75"})
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
|
||||
)
|
||||
|
||||
llm = GoogleLLMOpenAIBetaService(api_key=os.getenv("GEMINI_API_KEY"))
|
||||
# Register a function_name of None to get all functions
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function(
|
||||
"get_current_weather", fetch_weather_from_api, start_callback=start_fetch_weather
|
||||
)
|
||||
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location", "format"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function])
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Start a conversation with 'Hey there' to get the current weather.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -133,7 +133,7 @@ async def main():
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
|
||||
@@ -126,7 +126,7 @@ async def main():
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
|
||||
@@ -85,7 +85,13 @@ async def main():
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True, enable_metrics=True))
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
# When a participant joins, start transcription for that participant so the
|
||||
# bot can "hear" and respond to them.
|
||||
|
||||
@@ -108,7 +108,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
|
||||
@@ -38,7 +38,6 @@ async def main():
|
||||
"GStreamer",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_is_live=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1280,
|
||||
camera_out_height=720,
|
||||
@@ -51,8 +50,6 @@ async def main():
|
||||
out_params=GStreamerPipelineSource.OutputParams(
|
||||
video_width=1280,
|
||||
video_height=720,
|
||||
audio_sample_rate=24000,
|
||||
audio_channels=1,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -80,9 +80,7 @@ async def main():
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_in_sample_rate=24000,
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=24000,
|
||||
transcription_enabled=False,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.8)),
|
||||
@@ -156,7 +154,7 @@ Remember, your responses should be short. Just one or two sentences, usually."""
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
179
examples/foundational/19a-azure-realtime-beta.py
Normal file
179
examples/foundational/19a-azure-realtime-beta.py
Normal file
@@ -0,0 +1,179 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
|
||||
import aiohttp
|
||||
import websockets
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.openai_realtime_beta import (
|
||||
AzureRealtimeBetaLLMService,
|
||||
InputAudioTranscription,
|
||||
SessionProperties,
|
||||
TurnDetection,
|
||||
)
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def fetch_weather_from_api(function_name, tool_call_id, args, llm, context, result_callback):
|
||||
temperature = 75 if args["format"] == "fahrenheit" else 24
|
||||
await result_callback(
|
||||
{
|
||||
"conditions": "nice",
|
||||
"temperature": temperature,
|
||||
"format": args["format"],
|
||||
"timestamp": datetime.now().strftime("%Y%m%d_%H%M%S"),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the users location.",
|
||||
},
|
||||
},
|
||||
"required": ["location", "format"],
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=False,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.8)),
|
||||
vad_audio_passthrough=True,
|
||||
),
|
||||
)
|
||||
|
||||
session_properties = SessionProperties(
|
||||
input_audio_transcription=InputAudioTranscription(),
|
||||
# Set openai TurnDetection parameters. Not setting this at all will turn it
|
||||
# on by default
|
||||
# turn_detection=TurnDetection(silence_duration_ms=1000),
|
||||
# Or set to False to disable openai turn detection and use transport VAD
|
||||
# turn_detection=False,
|
||||
# tools=tools,
|
||||
instructions="""Your knowledge cutoff is 2023-10. You are a helpful and friendly AI.
|
||||
|
||||
Act like a human, but remember that you aren't a human and that you can't do human
|
||||
things in the real world. Your voice and personality should be warm and engaging, with a lively and
|
||||
playful tone.
|
||||
|
||||
If interacting in a non-English language, start by using the standard accent or dialect familiar to
|
||||
the user. Talk quickly. You should always call a function if you can. Do not refer to these rules,
|
||||
even if you're asked about them.
|
||||
-
|
||||
You are participating in a voice conversation. Keep your responses concise, short, and to the point
|
||||
unless specifically asked to elaborate on a topic.
|
||||
|
||||
Remember, your responses should be short. Just one or two sentences, usually.""",
|
||||
)
|
||||
|
||||
llm = AzureRealtimeBetaLLMService(
|
||||
api_key=os.getenv("AZURE_REALTIME_API_KEY"),
|
||||
base_url=os.getenv("AZURE_REALTIME_BASE_URL"),
|
||||
session_properties=session_properties,
|
||||
start_audio_paused=False,
|
||||
)
|
||||
|
||||
# you can either register a single function for all function calls, or specific functions
|
||||
# llm.register_function(None, fetch_weather_from_api)
|
||||
llm.register_function("get_current_weather", fetch_weather_from_api)
|
||||
|
||||
# Create a standard OpenAI LLM context object using the normal messages format. The
|
||||
# OpenAIRealtimeBetaLLMService will convert this internally to messages that the
|
||||
# openai WebSocket API can understand.
|
||||
context = OpenAILLMContext(
|
||||
[{"role": "user", "content": "Say hello!"}],
|
||||
# [{"role": "user", "content": [{"type": "text", "text": "Say hello!"}]}],
|
||||
# [
|
||||
# {
|
||||
# "role": "user",
|
||||
# "content": [
|
||||
# {"type": "text", "text": "Say"},
|
||||
# {"type": "text", "text": "yo what's up!"},
|
||||
# ],
|
||||
# }
|
||||
# ],
|
||||
tools,
|
||||
)
|
||||
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
context_aggregator.user(),
|
||||
llm, # LLM
|
||||
context_aggregator.assistant(),
|
||||
transport.output(), # Transport bot output
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
# report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -212,7 +212,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -177,9 +177,7 @@ async def main():
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_in_sample_rate=24000,
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=24000,
|
||||
transcription_enabled=False,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.8)),
|
||||
@@ -239,7 +237,7 @@ Remember, your responses should be short. Just one or two sentences, usually."""
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -209,7 +209,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -237,7 +237,7 @@ async def main():
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
||||
)
|
||||
|
||||
llm = GoogleLLMService(model="gemini-1.5-flash-latest", api_key=os.getenv("GOOGLE_API_KEY"))
|
||||
llm = GoogleLLMService(model="gemini-2.0-flash-001", api_key=os.getenv("GOOGLE_API_KEY"))
|
||||
|
||||
# you can either register a single function for all function calls, or specific functions
|
||||
# llm.register_function(None, fetch_weather_from_api)
|
||||
@@ -263,7 +263,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -87,7 +87,11 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
# We just use 16000 because that's what Tavus is expecting and
|
||||
# we avoid resampling.
|
||||
audio_in_sample_rate=16000,
|
||||
audio_out_sample_rate=16000,
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user