Compare commits
473 Commits
hush/realt
...
cb/multi-t
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
052b2439a5 | ||
|
|
d39e46a9e1 | ||
|
|
031879423a | ||
|
|
df1aac0cc8 | ||
|
|
676f0a7b64 | ||
|
|
f948a144f8 | ||
|
|
0b8486ce39 | ||
|
|
d4ae091ddd | ||
|
|
9e0a57a6de | ||
|
|
fc4c1e4110 | ||
|
|
9b740d9e72 | ||
|
|
b03563765f | ||
|
|
6466573b84 | ||
|
|
b42dc83696 | ||
|
|
fe5931b884 | ||
|
|
4b438ff7d7 | ||
|
|
89a8c16676 | ||
|
|
c4c92585f9 | ||
|
|
ec00edc893 | ||
|
|
c226c20e12 | ||
|
|
78e6669105 | ||
|
|
79f29e14dd | ||
|
|
d4a00fd080 | ||
|
|
d4186fa115 | ||
|
|
3536cbcd13 | ||
|
|
e3bcb70b13 | ||
|
|
19a82f9522 | ||
|
|
8c0a847449 | ||
|
|
e3704cd1a1 | ||
|
|
1ba037865b | ||
|
|
909520f76e | ||
|
|
d06cfcd597 | ||
|
|
2579d0cf57 | ||
|
|
1ec20b2e74 | ||
|
|
55a6e5aa4c | ||
|
|
2229730169 | ||
|
|
24b54c66ee | ||
|
|
a14205415f | ||
|
|
18b56d4a10 | ||
|
|
b85bd91d08 | ||
|
|
23f3285a7d | ||
|
|
94f6436619 | ||
|
|
480692971c | ||
|
|
5df5f6ae4c | ||
|
|
6940112ab9 | ||
|
|
80584e9138 | ||
|
|
1fd01e715d | ||
|
|
a7a1cd0cde | ||
|
|
e5a6b9d2b4 | ||
|
|
169b50af61 | ||
|
|
31311d8ac5 | ||
|
|
bfd06b321d | ||
|
|
3efbcab39c | ||
|
|
b40ca391f5 | ||
|
|
43008c8c5b | ||
|
|
3a37b11e56 | ||
|
|
9ea81bc982 | ||
|
|
98b499e2e9 | ||
|
|
72c8f6c8c3 | ||
|
|
ea61256ddc | ||
|
|
babafadbe4 | ||
|
|
a5660f6dc7 | ||
|
|
64ad916c5f | ||
|
|
13d0563298 | ||
|
|
20a1dd066d | ||
|
|
56f6e3ceb4 | ||
|
|
3afab63870 | ||
|
|
d3b9a0aab0 | ||
|
|
6b21081a7d | ||
|
|
648bdea64c | ||
|
|
ed387e876a | ||
|
|
2fb9aa4d76 | ||
|
|
9eba8f1637 | ||
|
|
43c255f58a | ||
|
|
121e70a029 | ||
|
|
70e28a0547 | ||
|
|
c9a93f2504 | ||
|
|
8a12470efd | ||
|
|
05d53bc66f | ||
|
|
e763cd7bee | ||
|
|
94ec5118e6 | ||
|
|
7203ef6885 | ||
|
|
3074a62bb1 | ||
|
|
31712b84ac | ||
|
|
c99ec0b0b7 | ||
|
|
cd7abd2962 | ||
|
|
c7544954cf | ||
|
|
4f390b15a3 | ||
|
|
f2a05b065d | ||
|
|
5d5041eb2b | ||
|
|
f4dc66cb13 | ||
|
|
b88744b18d | ||
|
|
209de2638d | ||
|
|
5d829fb6a9 | ||
|
|
a978a5cd4a | ||
|
|
b9ea3f0fd9 | ||
|
|
d2f5ee2915 | ||
|
|
acddddc508 | ||
|
|
0c2c6fa771 | ||
|
|
80088c6138 | ||
|
|
766639a9a4 | ||
|
|
675e2b1498 | ||
|
|
af6c23f7b1 | ||
|
|
d212e88030 | ||
|
|
d6758bf2ad | ||
|
|
5abfb15300 | ||
|
|
f576254d61 | ||
|
|
a90807a3d2 | ||
|
|
a06fc4ce50 | ||
|
|
80cb4497f0 | ||
|
|
8aa878c5e9 | ||
|
|
e982b3d919 | ||
|
|
8945fd1fc6 | ||
|
|
16b97d151b | ||
|
|
f7ac142ad2 | ||
|
|
2355067f61 | ||
|
|
76f9626d35 | ||
|
|
f82c2566e8 | ||
|
|
b6007bb3d6 | ||
|
|
311a5360ad | ||
|
|
62cb0376f2 | ||
|
|
91a69b7029 | ||
|
|
1d4d7f28a1 | ||
|
|
a55a7bbb96 | ||
|
|
a394b35e85 | ||
|
|
aa85df4fd6 | ||
|
|
3bb1f5f7a8 | ||
|
|
7c115f9d59 | ||
|
|
a82b847971 | ||
|
|
50515aa842 | ||
|
|
b348fde32b | ||
|
|
45787520b2 | ||
|
|
053bf72da2 | ||
|
|
ca4893397a | ||
|
|
c1f6a4e079 | ||
|
|
135ed811f1 | ||
|
|
055a3f1c53 | ||
|
|
750bb88586 | ||
|
|
c4f9171fe1 | ||
|
|
d223201c3f | ||
|
|
86701fd3c7 | ||
|
|
b414077a07 | ||
|
|
15f23929e9 | ||
|
|
cc9e4047d0 | ||
|
|
4ef4dcefce | ||
|
|
f3caa8cf7a | ||
|
|
e5470fec7a | ||
|
|
887c197bce | ||
|
|
f5d49fea81 | ||
|
|
e087f6ec5d | ||
|
|
406f5a395b | ||
|
|
060bb4c26b | ||
|
|
499e69846d | ||
|
|
e6e339a02e | ||
|
|
dc2ee2bf0a | ||
|
|
d982fc35d8 | ||
|
|
72d373e565 | ||
|
|
59fdfe697d | ||
|
|
97c9e0676e | ||
|
|
aeac40312e | ||
|
|
ce9f75a851 | ||
|
|
c45d852f6b | ||
|
|
55cc1fe9f6 | ||
|
|
1ba7e2d6fa | ||
|
|
1b8d326b49 | ||
|
|
077952b658 | ||
|
|
e694971423 | ||
|
|
d00ae492e5 | ||
|
|
9450b07ec5 | ||
|
|
19b464ba23 | ||
|
|
8aebf00c2d | ||
|
|
01458895c2 | ||
|
|
2082d023ef | ||
|
|
c99436b80e | ||
|
|
f884c93826 | ||
|
|
2780c6eed6 | ||
|
|
7ad36eeaf4 | ||
|
|
67a93d09c2 | ||
|
|
f3b50bc3c4 | ||
|
|
397bae29f7 | ||
|
|
3b3fdd0da1 | ||
|
|
a9b1298f3b | ||
|
|
2fcf4e6d70 | ||
|
|
fcb8b9a5b3 | ||
|
|
fee0409f63 | ||
|
|
3be6973e2c | ||
|
|
5184d178ef | ||
|
|
48e8d3968a | ||
|
|
59644a939a | ||
|
|
3311afc581 | ||
|
|
a3ccbf91f7 | ||
|
|
3ed764a769 | ||
|
|
be8d5a31f5 | ||
|
|
480bcc1ab1 | ||
|
|
dd81048ddb | ||
|
|
04d462ff02 | ||
|
|
7e7aaeddd9 | ||
|
|
e77f7c8456 | ||
|
|
442f18d47b | ||
|
|
fc78e6fc5a | ||
|
|
d71b520153 | ||
|
|
3b4d91e1c1 | ||
|
|
09c62d939a | ||
|
|
f2b9789acf | ||
|
|
1592703e77 | ||
|
|
66e42ae410 | ||
|
|
8d6dbbe293 | ||
|
|
2ac8f2ec2d | ||
|
|
41688205be | ||
|
|
541a4b6063 | ||
|
|
8f6d92ce7d | ||
|
|
96fa6c19a8 | ||
|
|
c9f7882728 | ||
|
|
0fdd577ae7 | ||
|
|
2133152e5b | ||
|
|
c3f3f4603d | ||
|
|
b20ce7d655 | ||
|
|
66ba1116a4 | ||
|
|
08956e914a | ||
|
|
5a39f146f6 | ||
|
|
de8a831ee1 | ||
|
|
efa5f133d7 | ||
|
|
44380bc8c0 | ||
|
|
721ee75887 | ||
|
|
ada68f0699 | ||
|
|
70dbf0d6fc | ||
|
|
f0774268cc | ||
|
|
2ae5bdd8a9 | ||
|
|
0d74bcacb7 | ||
|
|
f94a099111 | ||
|
|
3dd4ef7230 | ||
|
|
e707efbffa | ||
|
|
7b594093dd | ||
|
|
31317ce77d | ||
|
|
f693a3c70f | ||
|
|
39ca607bbb | ||
|
|
9840abd85b | ||
|
|
1075c25055 | ||
|
|
e91610c69e | ||
|
|
1a20d9bed7 | ||
|
|
d009b80438 | ||
|
|
fe5fc30211 | ||
|
|
be2cf6d556 | ||
|
|
e80bfe22de | ||
|
|
214c8f79eb | ||
|
|
16accafa6d | ||
|
|
4449e9a25b | ||
|
|
bfdf52bd69 | ||
|
|
2b4debec11 | ||
|
|
f4626287cd | ||
|
|
e4bb4aacb4 | ||
|
|
f298febacf | ||
|
|
c51291190b | ||
|
|
e0c3f6ad83 | ||
|
|
b1d506c137 | ||
|
|
1f6ed01ba6 | ||
|
|
3e9678db84 | ||
|
|
d455fd070e | ||
|
|
d1550d5a85 | ||
|
|
c15286b148 | ||
|
|
a98000fd1d | ||
|
|
fc06306efd | ||
|
|
039fa59165 | ||
|
|
0e14cec139 | ||
|
|
2417ec4f92 | ||
|
|
7cdcd1c3d1 | ||
|
|
b6be25ab84 | ||
|
|
e18d9f6a11 | ||
|
|
3a73346a41 | ||
|
|
8d58d1c8bb | ||
|
|
07a77e066f | ||
|
|
3024896d3d | ||
|
|
a3b5e4413a | ||
|
|
f31e77c4f6 | ||
|
|
8942c2e053 | ||
|
|
afb26be0ad | ||
|
|
48d73a2636 | ||
|
|
da531dabfd | ||
|
|
336e2f1579 | ||
|
|
fc0f404d26 | ||
|
|
54620133d4 | ||
|
|
e7224473f2 | ||
|
|
1a3a268c9d | ||
|
|
11984b89b7 | ||
|
|
1dbad2326a | ||
|
|
2e0c6c2bd1 | ||
|
|
5f28834588 | ||
|
|
7f1ccab445 | ||
|
|
7ddac4eb88 | ||
|
|
514ecda755 | ||
|
|
48b6850df4 | ||
|
|
71a38a120e | ||
|
|
79616de7a4 | ||
|
|
6368fbe0dd | ||
|
|
5dc8b48fbe | ||
|
|
9112ff114f | ||
|
|
32609b1132 | ||
|
|
4303ed4991 | ||
|
|
4677c34663 | ||
|
|
b28276446d | ||
|
|
2dee882710 | ||
|
|
6ec4052f29 | ||
|
|
ddcc1fbb2f | ||
|
|
e731a0d41f | ||
|
|
4918eab4e8 | ||
|
|
11987765d8 | ||
|
|
6f09ee25b8 | ||
|
|
83dda8a759 | ||
|
|
188677e601 | ||
|
|
dc5067407d | ||
|
|
1c19777d5e | ||
|
|
2e1a18503b | ||
|
|
c57fa93a70 | ||
|
|
6885d07e88 | ||
|
|
acd0660f66 | ||
|
|
3f002f8ffb | ||
|
|
d5776c27f4 | ||
|
|
6e6905405b | ||
|
|
571c10403f | ||
|
|
5b6b700214 | ||
|
|
1ad8e28025 | ||
|
|
3458f1b6de | ||
|
|
02dbef8f5a | ||
|
|
1baa52a17e | ||
|
|
c1382b0691 | ||
|
|
5f000efc61 | ||
|
|
fa7da8f5f6 | ||
|
|
8b86f6991d | ||
|
|
d3cd1a6c59 | ||
|
|
24220f38f0 | ||
|
|
1f8752ab03 | ||
|
|
16d7df1c9f | ||
|
|
2474211291 | ||
|
|
b632d71465 | ||
|
|
f8610a69a5 | ||
|
|
624a454f8b | ||
|
|
11ba08b7ba | ||
|
|
11b13d053b | ||
|
|
7dec8431e1 | ||
|
|
ce3f3b2edb | ||
|
|
1b3b4ee04a | ||
|
|
676c5d9ba7 | ||
|
|
6eb3a8409f | ||
|
|
526f9c2e06 | ||
|
|
c9a31ea513 | ||
|
|
2770d64a25 | ||
|
|
8a7e305619 | ||
|
|
8f2dadf5a0 | ||
|
|
c0c7c5d600 | ||
|
|
87004937be | ||
|
|
b426be3067 | ||
|
|
b71e2b97ff | ||
|
|
25dcf7def6 | ||
|
|
30432639b4 | ||
|
|
1bf964a667 | ||
|
|
08fb931ef6 | ||
|
|
c5aa931096 | ||
|
|
d33a4b3a11 | ||
|
|
9cad8bfcc6 | ||
|
|
b084a3e9e7 | ||
|
|
5c9e33bc7a | ||
|
|
93d8ddf4f2 | ||
|
|
0b9c4b2255 | ||
|
|
effb5f6cd8 | ||
|
|
ead555eb4b | ||
|
|
f843482968 | ||
|
|
23a4933af9 | ||
|
|
0d05312071 | ||
|
|
f8e33d8b7b | ||
|
|
f24c5b0aa7 | ||
|
|
d9ef19233a | ||
|
|
357334e3c9 | ||
|
|
da25e0c008 | ||
|
|
c99d02d8bb | ||
|
|
59ea94af86 | ||
|
|
4a363bebf0 | ||
|
|
c196fb5f98 | ||
|
|
5f97f6ff94 | ||
|
|
5860fe5319 | ||
|
|
3522bbb533 | ||
|
|
cfca7269f4 | ||
|
|
e6f269a903 | ||
|
|
468e936a5f | ||
|
|
ecc4411128 | ||
|
|
740ba4e759 | ||
|
|
e56c8f881c | ||
|
|
a747f08017 | ||
|
|
c6c0b73345 | ||
|
|
fde90ee01d | ||
|
|
689a844aaf | ||
|
|
aab98b61a0 | ||
|
|
a62741df94 | ||
|
|
5bd359ada9 | ||
|
|
40562402a2 | ||
|
|
98e5089fbe | ||
|
|
e1c8a09b60 | ||
|
|
154fe65011 | ||
|
|
61f534ca34 | ||
|
|
a91c26785f | ||
|
|
d7e93551d2 | ||
|
|
06c742a2ad | ||
|
|
55b0797fd5 | ||
|
|
21443b9a08 | ||
|
|
4b167a3c3d | ||
|
|
2df77430aa | ||
|
|
2d114b15f9 | ||
|
|
26000b616d | ||
|
|
710eebab09 | ||
|
|
532423eb4c | ||
|
|
bb29e50adb | ||
|
|
4048d6782b | ||
|
|
76d36a312b | ||
|
|
2a75373c04 | ||
|
|
a840b0e815 | ||
|
|
ebcde719a6 | ||
|
|
5c912927bb | ||
|
|
0e55db054e | ||
|
|
5967ac0d4f | ||
|
|
1451483cf7 | ||
|
|
3fe7c1d730 | ||
|
|
c14b85c12b | ||
|
|
9f3c0219d7 | ||
|
|
ec36fef26e | ||
|
|
5f1848d24b | ||
|
|
d6867bd12f | ||
|
|
17a1f30572 | ||
|
|
8e0dc1f256 | ||
|
|
b9100beee3 | ||
|
|
b8bc3d2565 | ||
|
|
3213e85b7d | ||
|
|
de3bcd64c4 | ||
|
|
ad7f1eec12 | ||
|
|
29310b4e92 | ||
|
|
2f4d36a146 | ||
|
|
6c9bb782b1 | ||
|
|
010d9103d4 | ||
|
|
12131eb7c5 | ||
|
|
80b830322a | ||
|
|
8db9d16174 | ||
|
|
1c92fab1fb | ||
|
|
974717d1b9 | ||
|
|
59fb631390 | ||
|
|
4824220260 | ||
|
|
55a338614d | ||
|
|
f033046963 | ||
|
|
6018fc068c | ||
|
|
d5b634301f | ||
|
|
a37eb1049d | ||
|
|
803ea9d8bc | ||
|
|
499bc25217 | ||
|
|
53d403af4b | ||
|
|
a0a8ea1641 | ||
|
|
26c68ccd7c | ||
|
|
fa010c8644 | ||
|
|
d58f398bc4 | ||
|
|
11383a86a1 | ||
|
|
daa52ff8df | ||
|
|
a5f41e22f7 | ||
|
|
530bb5233d | ||
|
|
4a64e09f6c | ||
|
|
74582bb8d5 | ||
|
|
1ca2101e3a | ||
|
|
e80311c323 | ||
|
|
2f24c422b6 | ||
|
|
0d0b9fddef | ||
|
|
1753cc99f4 | ||
|
|
4f8b036abe | ||
|
|
f83c89c202 | ||
|
|
bb89a036e5 | ||
|
|
b994a03466 | ||
|
|
27161f8e3b | ||
|
|
8acf9a488b | ||
|
|
79ac696973 |
@@ -1,7 +1,8 @@
|
|||||||
repos:
|
repos:
|
||||||
- repo: local
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||||
|
rev: v0.9.7
|
||||||
hooks:
|
hooks:
|
||||||
- id: ruff-format-hook
|
- id: ruff
|
||||||
name: Check ruff formatting
|
language_version: python3
|
||||||
entry: sh scripts/pre-commit.sh
|
args: [ --select, I, ]
|
||||||
language: system
|
- id: ruff-format
|
||||||
|
|||||||
534
CHANGELOG.md
534
CHANGELOG.md
@@ -9,6 +9,521 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
|
||||||
|
- Added new processors `ProducerProcessor` and `ConsumerProcessor`. The
|
||||||
|
producer processor processes frames from the pipeline and decides whether the
|
||||||
|
consumers should consume it or not. If so, the same frame that is received by
|
||||||
|
the producer is sent to the consumer. There can be multiple consumers per
|
||||||
|
producer. These processors can be useful to push frames from one part of a
|
||||||
|
pipeline to a different one (e.g. when using `ParallelPipeline`).
|
||||||
|
|
||||||
|
- Improvements for the `SmallWebRTCTransport`:
|
||||||
|
- Wait until the pipeline is ready before triggering the `connected` event.
|
||||||
|
- Queue messages if the data channel is not ready.
|
||||||
|
- Update the aiortc dependency to fix an issue where the 'video/rtx' MIME
|
||||||
|
type was incorrectly handled as a codec retransmission.
|
||||||
|
- Avoid initial video delays.
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
|
||||||
|
- Fixed an issue in the Azure TTS services where the language was being set
|
||||||
|
incorrectly.
|
||||||
|
|
||||||
|
- Fixed `SmallWebRTCTransport` to support dynamic values for
|
||||||
|
`TransportParams.audio_out_10ms_chunks`. Previously, it only worked with 20ms
|
||||||
|
chunks.
|
||||||
|
|
||||||
|
- Fixed an issue where `LLMAssistantContextAggregator` would prevent a
|
||||||
|
`BotStoppedSpeakingFrame` from moving through the pipeline.
|
||||||
|
|
||||||
|
## [0.0.62] - 2025-04-01 "An April Fools' release"
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
- Added `TransportParams.audio_out_10ms_chunks` parameter to allow controlling
|
||||||
|
the amount of audio being sent by the output transport. It defaults to 4, so
|
||||||
|
40ms audio chunks are sent.
|
||||||
|
|
||||||
|
- Added `QwenLLMService` for Qwen integration with an OpenAI-compatible
|
||||||
|
interface. Added foundational example `14q-function-calling-qwen.py`.
|
||||||
|
|
||||||
|
- Added `Mem0MemoryService`. Mem0 is a self-improving memory layer for LLM
|
||||||
|
applications. Learn more at: https://mem0.ai/.
|
||||||
|
|
||||||
|
- Added `WhisperSTTServiceMLX` for Whisper transcription on Apple Silicon.
|
||||||
|
See example in `examples/foundational/13e-whisper-mlx.py`. Latency of
|
||||||
|
completed transcription using Whisper large-v3-turbo on an M4 macbook is
|
||||||
|
~500ms.
|
||||||
|
|
||||||
|
- Added `SmallWebRTCTransport`, a new P2P WebRTC transport.
|
||||||
|
|
||||||
|
- Created two examples in `p2p-webrtc`:
|
||||||
|
- **video-transform**: Demonstrates sending and receiving audio/video with
|
||||||
|
`SmallWebRTCTransport` using `TypeScript`. Includes video frame
|
||||||
|
processing with OpenCV.
|
||||||
|
- **voice-agent**: A minimal example of creating a voice agent with
|
||||||
|
`SmallWebRTCTransport`.
|
||||||
|
|
||||||
|
- `GladiaSTTService` now have comprehensive support for the latest API config
|
||||||
|
options, including model, language detection, preprocessing, custom
|
||||||
|
vocabulary, custom spelling, translation, and message filtering options.
|
||||||
|
|
||||||
|
- Added `SmallWebRTCTransport`, a new P2P WebRTC transport.
|
||||||
|
|
||||||
|
- Created two examples in `p2p-webrtc`:
|
||||||
|
- **video-transform**: Demonstrates sending and receiving audio/video with
|
||||||
|
`SmallWebRTCTransport` using `TypeScript`. Includes video frame
|
||||||
|
processing with OpenCV.
|
||||||
|
- **voice-agent**: A minimal example of creating a voice agent with
|
||||||
|
`SmallWebRTCTransport`.
|
||||||
|
|
||||||
|
- Added support to `ProtobufFrameSerializer` to send the messages from
|
||||||
|
`TransportMessageFrame` and `TransportMessageUrgentFrame`.
|
||||||
|
|
||||||
|
- Added support for a new TTS service, `PiperTTSService`.
|
||||||
|
(see https://github.com/rhasspy/piper/)
|
||||||
|
|
||||||
|
- It is now possible to tell whether `UserStartedSpeakingFrame` or
|
||||||
|
`UserStoppedSpeakingFrame` have been generated because of emulation frames.
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
|
||||||
|
- `FunctionCallResultFrame`a are now system frames. This is to prevent function
|
||||||
|
call results to be discarded during interruptions.
|
||||||
|
|
||||||
|
- Pipecat services have been reorganized into packages. Each package can have
|
||||||
|
one or more of the following modules (in the future new module names might be
|
||||||
|
needed) depending on the services implemented:
|
||||||
|
|
||||||
|
- image: for image generation services
|
||||||
|
- llm: for LLM services
|
||||||
|
- memory: for memory services
|
||||||
|
- stt: for Speech-To-Text services
|
||||||
|
- tts: for Text-To-Speech services
|
||||||
|
- video: for video generation services
|
||||||
|
- vision: for video recognition services
|
||||||
|
|
||||||
|
- Base classes for AI services have been reorganized into modules. They can now
|
||||||
|
be found in
|
||||||
|
`pipecat.services.[ai_service,image_service,llm_service,stt_service,vision_service]`.
|
||||||
|
|
||||||
|
- `GladiaSTTService` now uses the `solaria-1` model by default. Other params
|
||||||
|
use Gladia's default values. Added support for more language codes.
|
||||||
|
|
||||||
|
### Deprecated
|
||||||
|
|
||||||
|
- All Pipecat services imports have been deprecated and a warning will be shown
|
||||||
|
when using the old import. The new import should be
|
||||||
|
`pipecat.services.[service].[image,llm,memory,stt,tts,video,vision]`. For
|
||||||
|
example, `from pipecat.services.openai.llm import OpenAILLMService`.
|
||||||
|
|
||||||
|
- Import for AI services base classes from `pipecat.services.ai_services` is now
|
||||||
|
deprecated, use one of
|
||||||
|
`pipecat.services.[ai_service,image_service,llm_service,stt_service,vision_service]`.
|
||||||
|
|
||||||
|
- Deprecated the `language` parameter in `GladiaSTTService.InputParams` in
|
||||||
|
favor of `language_config`, which better aligns with Gladia's API.
|
||||||
|
|
||||||
|
- Deprecated using `GladiaSTTService.InputParams` directly. Use the new
|
||||||
|
`GladiaInputParams` class instead.
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
|
||||||
|
- Fixed a `FastAPIWebsocketTransport` and `WebsocketClientTransport` issue that
|
||||||
|
would cause the transport to be closed prematurely, preventing the internally
|
||||||
|
queued audio to be sent. The same issue could also cause an infinite loop
|
||||||
|
while using an output mixer and when sending an `EndFrame`, preventing the bot
|
||||||
|
to finish.
|
||||||
|
|
||||||
|
- Fixed an issue that could cause the `TranscriptionUpdateFrame` being pushed
|
||||||
|
because of an interruption to be discarded.
|
||||||
|
|
||||||
|
- Fixed an issue that would cause `SegmentedSTTService` based services
|
||||||
|
(e.g. `OpenAISTTService`) to try to transcribe non-spoken audio, causing
|
||||||
|
invalid transcriptions.
|
||||||
|
|
||||||
|
- Fixed an issue where `GoogleTTSService` was emitting two `TTSStoppedFrames`.
|
||||||
|
|
||||||
|
### Performance
|
||||||
|
|
||||||
|
- Output transports now send 40ms audio chunks instead of 20ms. This should
|
||||||
|
improve performance.
|
||||||
|
|
||||||
|
- `BotSpeakingFrame`s are now sent every 200ms. If the output transport audio chunks
|
||||||
|
are higher than 200ms then they will be sent at every audio chunk.
|
||||||
|
|
||||||
|
### Other
|
||||||
|
|
||||||
|
- Added foundational example `37-mem0.py` demonstrating how to use the
|
||||||
|
`Mem0MemoryService`.
|
||||||
|
|
||||||
|
- Added foundational example `13e-whisper-mlx.py` demonstrating how to use the
|
||||||
|
`WhisperSTTServiceMLX`.
|
||||||
|
|
||||||
|
## [0.0.61] - 2025-03-26
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
- Added a new frame, `LLMSetToolChoiceFrame`, which provides a mechanism
|
||||||
|
for modifying the `tool_choice` in the context.
|
||||||
|
|
||||||
|
- Added `GroqTTSService` which provides text-to-speech functionality using
|
||||||
|
Groq's API.
|
||||||
|
|
||||||
|
- Added support in `DailyTransport` for updating remote participants'
|
||||||
|
`canReceive` permission via the `update_remote_participants()` method, by
|
||||||
|
bumping the daily-python dependency to >= 0.16.0.
|
||||||
|
|
||||||
|
- ElevenLabs TTS services now support a sample rate of 8000.
|
||||||
|
|
||||||
|
- Added support for `instructions` in `OpenAITTSService`.
|
||||||
|
|
||||||
|
- Added support for `base_url` in `OpenAIImageGenService` and
|
||||||
|
`OpenAITTSService`.
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
|
||||||
|
- Fixed an issue in `RTVIObserver` that prevented handling of Google LLM
|
||||||
|
context messages. The observer now processes both OpenAI-style and
|
||||||
|
Google-style contexts.
|
||||||
|
|
||||||
|
- Fixed an issue in Daily involving switching virtual devices, by bumping the
|
||||||
|
daily-python dependency to >= 0.16.1.
|
||||||
|
|
||||||
|
- Fixed a `GoogleAssistantContextAggregator` issue where function calls
|
||||||
|
placeholders where not being updated when then function call result was
|
||||||
|
different from a string.
|
||||||
|
|
||||||
|
- Fixed an issue that would cause `LLMAssistantContextAggregator` to block
|
||||||
|
processing more frames while processing a function call result.
|
||||||
|
|
||||||
|
- Fixed an issue where the `RTVIObserver` would report two bot started and
|
||||||
|
stopped speaking events for each bot turn.
|
||||||
|
|
||||||
|
- Fixed an issue in `UltravoxSTTService` that caused improper audio processing
|
||||||
|
and incorrect LLM frame output.
|
||||||
|
|
||||||
|
### Other
|
||||||
|
|
||||||
|
- Added `examples/foundational/07x-interruptible-local.py` to show how a local
|
||||||
|
transport can be used.
|
||||||
|
|
||||||
|
## [0.0.60] - 2025-03-20
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
- Added `default_headers` parameter to `BaseOpenAILLMService` constructor.
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
|
||||||
|
- Rollback to `deepgram-sdk` 3.8.0 since 3.10.1 was causing connections issues.
|
||||||
|
|
||||||
|
- Changed the default `InputAudioTranscription` model to `gpt-4o-transcribe`
|
||||||
|
for `OpenAIRealtimeBetaLLMService`.
|
||||||
|
|
||||||
|
### Other
|
||||||
|
|
||||||
|
- Update the `19-openai-realtime-beta.py` and `19a-azure-realtime-beta.py`
|
||||||
|
examples to use the FunctionSchema format.
|
||||||
|
|
||||||
|
## [0.0.59] - 2025-03-20
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
- When registering a function call it is now possible to indicate if you want
|
||||||
|
the function call to be cancelled if there's a user interruption via
|
||||||
|
`cancel_on_interruption` (defaults to False). This is now possible because
|
||||||
|
function calls are executed concurrently.
|
||||||
|
|
||||||
|
- Added support for detecting idle pipelines. By default, if no activity has
|
||||||
|
been detected during 5 minutes, the `PipelineTask` will be automatically
|
||||||
|
cancelled. It is possible to override this behavior by passing
|
||||||
|
`cancel_on_idle_timeout=False`. It is also possible to change the default
|
||||||
|
timeout with `idle_timeout_secs` or the frames that prevent the pipeline from
|
||||||
|
being idle with `idle_timeout_frames`. Finally, an `on_idle_timeout` event
|
||||||
|
handler will be triggered if the idle timeout is reached (whether the pipeline
|
||||||
|
task is cancelled or not).
|
||||||
|
|
||||||
|
- Added `FalSTTService`, which provides STT for Fal's Wizper API.
|
||||||
|
|
||||||
|
- Added a `reconnect_on_error` parameter to websocket-based TTS services as well
|
||||||
|
as a `on_connection_error` event handler. The `reconnect_on_error` indicates
|
||||||
|
whether the TTS service should reconnect on error. The `on_connection_error`
|
||||||
|
will always get called if there's any error no matter the value of
|
||||||
|
`reconnect_on_error`. This allows, for example, to fallback to a different TTS
|
||||||
|
provider if something goes wrong with the current one.
|
||||||
|
|
||||||
|
- Added new `SkipTagsAggregator` that extends `BaseTextAggregator` to aggregate
|
||||||
|
text and skips end of sentence matching if aggregated text is between
|
||||||
|
start/end tags.
|
||||||
|
|
||||||
|
- Added new `PatternPairAggregator` that extends `BaseTextAggregator` to
|
||||||
|
identify content between matching pattern pairs in streamed text. This allows
|
||||||
|
for detection and processing of structured content like XML-style tags that
|
||||||
|
may span across multiple text chunks or sentence boundaries.
|
||||||
|
|
||||||
|
- Added new `BaseTextAggregator`. Text aggregators are used by the TTS service
|
||||||
|
to aggregate LLM tokens and decide when the aggregated text should be pushed
|
||||||
|
to the TTS service. They also allow for the text to be manipulated while it's
|
||||||
|
being aggregated. A text aggregator can be passed via `text_aggregator` to the
|
||||||
|
TTS service.
|
||||||
|
|
||||||
|
- Added new `sample_rate` constructor parameter to `TavusVideoService` to allow
|
||||||
|
changing the output sample rate.
|
||||||
|
|
||||||
|
- Added new `NeuphonicTTSService`.
|
||||||
|
(see https://neuphonic.com)
|
||||||
|
|
||||||
|
- Added new `UltravoxSTTService`.
|
||||||
|
(see https://github.com/fixie-ai/ultravox)
|
||||||
|
|
||||||
|
- Added `on_frame_reached_upstream` and `on_frame_reached_downstream` event
|
||||||
|
handlers to `PipelineTask`. Those events will be called when a frame reaches
|
||||||
|
the beginning or end of the pipeline respectively. Note that by default, the
|
||||||
|
event handlers will not be called unless a filter is set with
|
||||||
|
`PipelineTask.set_reached_upstream_filter()` or
|
||||||
|
`PipelineTask.set_reached_downstream_filter()`.
|
||||||
|
|
||||||
|
- Added support for Chirp voices in `GoogleTTSService`.
|
||||||
|
|
||||||
|
- Added a `flush_audio()` method to `FishTTSService` and `LmntTTSService`.
|
||||||
|
|
||||||
|
- Added a `set_language` convenience method for `GoogleSTTService`, allowing
|
||||||
|
you to set a single language. This is in addition to the `set_languages`
|
||||||
|
method which allows you to set a list of languages.
|
||||||
|
|
||||||
|
- Added `on_user_turn_audio_data` and `on_bot_turn_audio_data` to
|
||||||
|
`AudioBufferProcessor`. This gives the ability to grab the audio of only that
|
||||||
|
turn for both the user and the bot.
|
||||||
|
|
||||||
|
- Added new base class `BaseObject` which is now the base class of
|
||||||
|
`FrameProcessor`, `PipelineRunner`, `PipelineTask` and `BaseTransport`. The
|
||||||
|
new `BaseObject` adds supports for event handlers.
|
||||||
|
|
||||||
|
- Added support for a unified format for specifying function calling across all
|
||||||
|
LLM services.
|
||||||
|
|
||||||
|
```python
|
||||||
|
weather_function = FunctionSchema(
|
||||||
|
name="get_current_weather",
|
||||||
|
description="Get the current weather",
|
||||||
|
properties={
|
||||||
|
"location": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The city and state, e.g. San Francisco, CA",
|
||||||
|
},
|
||||||
|
"format": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["celsius", "fahrenheit"],
|
||||||
|
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
required=["location"],
|
||||||
|
)
|
||||||
|
tools = ToolsSchema(standard_tools=[weather_function])
|
||||||
|
```
|
||||||
|
|
||||||
|
- Added `speech_threshold` parameter to `GladiaSTTService`.
|
||||||
|
|
||||||
|
- Allow passing user (`user_kwargs`) and assistant (`assistant_kwargs`) context
|
||||||
|
aggregator parameters when using `create_context_aggregator()`. The values are
|
||||||
|
passed as a mapping that will then be converted to arguments.
|
||||||
|
|
||||||
|
- Added `speed` as an `InputParam` for both `ElevenLabsTTSService` and
|
||||||
|
`ElevenLabsHttpTTSService`.
|
||||||
|
|
||||||
|
- Added new `LLMFullResponseAggregator` to aggregate full LLM completions. At
|
||||||
|
every completion the `on_completion` event handler is triggered.
|
||||||
|
|
||||||
|
- Added a new frame, `RTVIServerMessageFrame`, and RTVI message
|
||||||
|
`RTVIServerMessage` which provides a generic mechanism for sending custom
|
||||||
|
messages from server to client. The `RTVIServerMessageFrame` is processed by
|
||||||
|
the `RTVIObserver` and will be delivered to the client's `onServerMessage`
|
||||||
|
callback or `ServerMessage` event.
|
||||||
|
|
||||||
|
- Added `GoogleLLMOpenAIBetaService` for Google LLM integration with an
|
||||||
|
OpenAI-compatible interface. Added foundational example
|
||||||
|
`14o-function-calling-gemini-openai-format.py`.
|
||||||
|
|
||||||
|
- Added `AzureRealtimeBetaLLMService` to support Azure's OpeanAI Realtime API. Added
|
||||||
|
foundational example `19a-azure-realtime-beta.py`.
|
||||||
|
|
||||||
|
- Introduced `GoogleVertexLLMService`, a new class for integrating with Vertex AI
|
||||||
|
Gemini models. Added foundational example
|
||||||
|
`14p-function-calling-gemini-vertex-ai.py`.
|
||||||
|
|
||||||
|
- Added support in `OpenAIRealtimeBetaLLMService` for a slate of new features:
|
||||||
|
|
||||||
|
- The `'gpt-4o-transcribe'` input audio transcription model, along
|
||||||
|
with new `language` and `prompt` options specific to that model.
|
||||||
|
- The `input_audio_noise_reduction` session property.
|
||||||
|
|
||||||
|
```python
|
||||||
|
session_properties = SessionProperties(
|
||||||
|
# ...
|
||||||
|
input_audio_noise_reduction=InputAudioNoiseReduction(
|
||||||
|
type="near_field" # also supported: "far_field"
|
||||||
|
)
|
||||||
|
# ...
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
- The `'semantic_vad'` `turn_detection` session property value, a more
|
||||||
|
sophisticated model for detecting when the user has stopped speaking.
|
||||||
|
- `on_conversation_item_created` and `on_conversation_item_updated`
|
||||||
|
events to `OpenAIRealtimeBetaLLMService`.
|
||||||
|
|
||||||
|
```python
|
||||||
|
@llm.event_handler("on_conversation_item_created")
|
||||||
|
async def on_conversation_item_created(llm, item_id, item):
|
||||||
|
# ...
|
||||||
|
|
||||||
|
@llm.event_handler("on_conversation_item_updated")
|
||||||
|
async def on_conversation_item_updated(llm, item_id, item):
|
||||||
|
# `item` may not always be available here
|
||||||
|
# ...
|
||||||
|
```
|
||||||
|
|
||||||
|
- The `retrieve_conversation_item(item_id)` method for introspecting a
|
||||||
|
conversation item on the server.
|
||||||
|
|
||||||
|
```python
|
||||||
|
item = await llm.retrieve_conversation_item(item_id)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
|
||||||
|
- Updated `OpenAISTTService` to use `gpt-4o-transcribe` as the default
|
||||||
|
transcription model.
|
||||||
|
|
||||||
|
- Updated `OpenAITTSService` to use `gpt-4o-mini-tts` as the default TTS model.
|
||||||
|
|
||||||
|
- Function calls are now executed in tasks. This means that the pipeline will
|
||||||
|
not be blocked while the function call is being executed.
|
||||||
|
|
||||||
|
- ⚠️ `PipelineTask` will now be automatically cancelled if no bot activity is
|
||||||
|
happening in the pipeline. There are a few settings to configure this
|
||||||
|
behavior, see `PipelineTask` documentation for more details.
|
||||||
|
|
||||||
|
- All event handlers are now executed in separate tasks in order to prevent
|
||||||
|
blocking the pipeline. It is possible that event handlers take some time to
|
||||||
|
execute in which case the pipeline would be blocked waiting for the event
|
||||||
|
handler to complete.
|
||||||
|
|
||||||
|
- Updated `TranscriptProcessor` to support text output from
|
||||||
|
`OpenAIRealtimeBetaLLMService`.
|
||||||
|
|
||||||
|
- `OpenAIRealtimeBetaLLMService` and `GeminiMultimodalLiveLLMService` now push
|
||||||
|
a `TTSTextFrame`.
|
||||||
|
|
||||||
|
- Updated the default mode for `CartesiaTTSService` and
|
||||||
|
`CartesiaHttpTTSService` to `sonic-2`.
|
||||||
|
|
||||||
|
### Deprecated
|
||||||
|
|
||||||
|
- Passing a `start_callback` to `LLMService.register_function()` is now
|
||||||
|
deprecated, simply move the code from the start callback to the function call.
|
||||||
|
|
||||||
|
- `TTSService` parameter `text_filter` is now deprecated, use `text_filters`
|
||||||
|
instead which is now a list. This allows passing multiple filters that will be
|
||||||
|
executed in order.
|
||||||
|
|
||||||
|
### Removed
|
||||||
|
|
||||||
|
- Removed deprecated `audio.resample_audio()`, use `create_default_resampler()`
|
||||||
|
instead.
|
||||||
|
|
||||||
|
- Removed deprecated`stt_service` parameter from `STTMuteFilter`.
|
||||||
|
|
||||||
|
- Removed deprecated RTVI processors, use an `RTVIObserver` instead.
|
||||||
|
|
||||||
|
- Removed deprecated `AWSTTSService`, use `PollyTTSService` instead.
|
||||||
|
|
||||||
|
- Removed deprecated field `tier` from `DailyTranscriptionSettings`, use `model`
|
||||||
|
instead.
|
||||||
|
|
||||||
|
- Removed deprecated `pipecat.vad` package, use `pipecat.audio.vad` instead.
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
|
||||||
|
- Fixed an assistant aggregator issue that could cause assistant text to be
|
||||||
|
split into multiple chunks during function calls.
|
||||||
|
|
||||||
|
- Fixed an assistant aggregator issue that was causing assistant text to not be
|
||||||
|
added to the context during function calls. This could lead to duplications.
|
||||||
|
|
||||||
|
- Fixed a `SegmentedSTTService` issue that was causing audio to be sent
|
||||||
|
prematurely to the STT service. Instead of analyzing the volume in this
|
||||||
|
service we rely on VAD events which use both VAD and volume.
|
||||||
|
|
||||||
|
- Fixed a `GeminiMultimodalLiveLLMService` issue that was causing messages to be
|
||||||
|
duplicated in the context when pushing `LLMMessagesAppendFrame` frames.
|
||||||
|
|
||||||
|
- Fixed an issue with `SegmentedSTTService` based services
|
||||||
|
(e.g. `GroqSTTService`) that was not allow audio to pass-through downstream.
|
||||||
|
|
||||||
|
- Fixed a `CartesiaTTSService` and `RimeTTSService` issue that would consider
|
||||||
|
text between spelling out tags end of sentence.
|
||||||
|
|
||||||
|
- Fixed a `match_endofsentence` issue that would result in floating point
|
||||||
|
numbers to be considered an end of sentence.
|
||||||
|
|
||||||
|
- Fixed a `match_endofsentence` issue that would result in emails to be
|
||||||
|
considered an end of sentence.
|
||||||
|
|
||||||
|
- Fixed an issue where the RTVI message `disconnect-bot` was pushing an
|
||||||
|
`EndFrame`, resulting in the pipeline not shutting down. It now pushes an
|
||||||
|
`EndTaskFrame` upstream to shutdown the pipeline.
|
||||||
|
|
||||||
|
- Fixed an issue with the `GoogleSTTService` where stream timeouts during
|
||||||
|
periods of inactivity were causing connection failures. The service now
|
||||||
|
properly detects timeout errors and handles reconnection gracefully,
|
||||||
|
ensuring continuous operation even after periods of silence or when using an
|
||||||
|
`STTMuteFilter`.
|
||||||
|
|
||||||
|
- Fixed an issue in `RimeTTSService` where the last line of text sent didn't
|
||||||
|
result in an audio output being generated.
|
||||||
|
|
||||||
|
- Fixed `OpenAIRealtimeBetaLLMService` by adding proper handling for:
|
||||||
|
- The `conversation.item.input_audio_transcription.delta` server message,
|
||||||
|
which was added server-side at some point and not handled client-side.
|
||||||
|
- Errors reported by the `response.done` server message.
|
||||||
|
|
||||||
|
### Other
|
||||||
|
|
||||||
|
- Add foundational example `07w-interruptible-fal.py`, showing `FalSTTService`.
|
||||||
|
|
||||||
|
- Added a new Ultravox example
|
||||||
|
`examples/foundational/07u-interruptible-ultravox.py`.
|
||||||
|
|
||||||
|
- Added new Neuphonic examples
|
||||||
|
`examples/foundational/07v-interruptible-neuphonic.py` and
|
||||||
|
`examples/foundational/07v-interruptible-neuphonic-http.py`.
|
||||||
|
|
||||||
|
- Added a new example `examples/foundational/36-user-email-gathering.py` to show
|
||||||
|
how to gather user emails. The example uses's Cartesia's `<spell></spell>`
|
||||||
|
tags and Rime `spell()` function to spell out the emails for confirmation.
|
||||||
|
|
||||||
|
- Update the `34-audio-recording.py` example to include an STT processor.
|
||||||
|
|
||||||
|
- Added foundational example `35-voice-switching.py` showing how to use the new
|
||||||
|
`PatternPairAggregator`. This example shows how to encode information for the
|
||||||
|
LLM to instruct TTS voice changes, but this can be used to encode any
|
||||||
|
information into the LLM response, which you want to parse and use in other
|
||||||
|
parts of your application.
|
||||||
|
|
||||||
|
- Added a Pipecat Cloud deployment example to the `examples` directory.
|
||||||
|
|
||||||
|
- Removed foundational examples 28b and 28c as the TranscriptProcessor no
|
||||||
|
longer has an LLM depedency. Renamed foundational example 28a to
|
||||||
|
`28-transcript-processor.py`.
|
||||||
|
|
||||||
|
## [0.0.58] - 2025-02-26
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
- Added track-specific audio event `on_track_audio_data` to
|
||||||
|
`AudioBufferProcessor` for accessing separate input and output audio tracks.
|
||||||
|
|
||||||
- Pipecat version will now be logged on every application startup. This will
|
- Pipecat version will now be logged on every application startup. This will
|
||||||
help us identify what version we are running in case of any issues.
|
help us identify what version we are running in case of any issues.
|
||||||
|
|
||||||
@@ -45,6 +560,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||||||
- ⚠️ `PipelineTask` now requires keyword arguments (except for the first one for
|
- ⚠️ `PipelineTask` now requires keyword arguments (except for the first one for
|
||||||
the pipeline).
|
the pipeline).
|
||||||
|
|
||||||
|
- Updated `PlayHTHttpTTSService` to take a `voice_engine` and `protocol` input
|
||||||
|
in the constructor. The previous method of providing a `voice_engine` input
|
||||||
|
that contains the engine and protocol is deprecated by PlayHT.
|
||||||
|
|
||||||
- The base `TTSService` class now strips leading newlines before sending text
|
- The base `TTSService` class now strips leading newlines before sending text
|
||||||
to the TTS provider. This change is to solve issues where some TTS providers,
|
to the TTS provider. This change is to solve issues where some TTS providers,
|
||||||
like Azure, would not output text due to newlines.
|
like Azure, would not output text due to newlines.
|
||||||
@@ -78,6 +597,12 @@ stt = DeepgramSTTService(..., live_options=LiveOptions(model="nova-2-general"))
|
|||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
|
|
||||||
|
- Fixed an issue that would cause undesired interruptions via
|
||||||
|
`EmulateUserStartedSpeakingFrame`.
|
||||||
|
|
||||||
|
- Fixed a `GoogleLLMService` that was causing an exception when sending inline
|
||||||
|
audio in some cases.
|
||||||
|
|
||||||
- Fixed an `AudioContextWordTTSService` issue that would cause an `EndFrame` to
|
- Fixed an `AudioContextWordTTSService` issue that would cause an `EndFrame` to
|
||||||
disconnect from the TTS service before audio from all the contexts was
|
disconnect from the TTS service before audio from all the contexts was
|
||||||
received. This affected services like Cartesia and Rime.
|
received. This affected services like Cartesia and Rime.
|
||||||
@@ -91,10 +616,6 @@ stt = DeepgramSTTService(..., live_options=LiveOptions(model="nova-2-general"))
|
|||||||
|
|
||||||
- Fixed `match_endofsentence` support for ellipses.
|
- Fixed `match_endofsentence` support for ellipses.
|
||||||
|
|
||||||
- Fixed an issue that would cause undesired interruptions via
|
|
||||||
`EmulateUserStartedSpeakingFrame` when only interim transcriptions (i.e. no
|
|
||||||
final transcriptions) where received.
|
|
||||||
|
|
||||||
- Fixed an issue where `EndTaskFrame` was not triggering
|
- Fixed an issue where `EndTaskFrame` was not triggering
|
||||||
`on_client_disconnected` or closing the WebSocket in FastAPI.
|
`on_client_disconnected` or closing the WebSocket in FastAPI.
|
||||||
|
|
||||||
@@ -124,6 +645,9 @@ stt = DeepgramSTTService(..., live_options=LiveOptions(model="nova-2-general"))
|
|||||||
|
|
||||||
- Added Gemini support to `examples/phone-chatbot`.
|
- Added Gemini support to `examples/phone-chatbot`.
|
||||||
|
|
||||||
|
- Added foundational example `34-audio-recording.py` showing how to use the
|
||||||
|
AudioBufferProcessor callbacks to save merged and track recordings.
|
||||||
|
|
||||||
## [0.0.57] - 2025-02-14
|
## [0.0.57] - 2025-02-14
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
@@ -1756,7 +2280,7 @@ async def on_connected(processor):
|
|||||||
completed. If a task is never ran `has_finished()` will return False.
|
completed. If a task is never ran `has_finished()` will return False.
|
||||||
|
|
||||||
- `PipelineRunner` now supports SIGTERM. If received, the runner will be
|
- `PipelineRunner` now supports SIGTERM. If received, the runner will be
|
||||||
canceled.
|
cancelled.
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
|
|
||||||
|
|||||||
@@ -26,11 +26,52 @@ git commit -m "Description of your changes"
|
|||||||
git push origin your-branch-name
|
git push origin your-branch-name
|
||||||
```
|
```
|
||||||
|
|
||||||
9. **Submit a Pull Request (PR)**: Open a PR from your forked repository to the main branch of this repo.
|
8. **Submit a Pull Request (PR)**: Open a PR from your forked repository to the main branch of this repo.
|
||||||
> Important: Describe the changes you've made clearly!
|
> Important: Describe the changes you've made clearly!
|
||||||
|
|
||||||
Our maintainers will review your PR, and once everything is good, your contributions will be merged!
|
Our maintainers will review your PR, and once everything is good, your contributions will be merged!
|
||||||
|
|
||||||
|
## Code Style and Documentation
|
||||||
|
|
||||||
|
### Python Code Style
|
||||||
|
|
||||||
|
We use Ruff for code linting and formatting. Please ensure your code passes all linting checks before submitting a PR.
|
||||||
|
|
||||||
|
### Docstring Conventions
|
||||||
|
|
||||||
|
We follow Google-style docstrings with these specific conventions:
|
||||||
|
|
||||||
|
- Class docstrings should fully document all parameters used in `__init__`
|
||||||
|
- We don't require separate docstrings for `__init__` methods when parameters are documented in the class docstring
|
||||||
|
- Property methods should have docstrings explaining their purpose and return value
|
||||||
|
|
||||||
|
Example of correctly documented class:
|
||||||
|
|
||||||
|
```python
|
||||||
|
class MyClass:
|
||||||
|
"""Class description.
|
||||||
|
|
||||||
|
Additional details about the class.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
param1: Description of first parameter.
|
||||||
|
param2: Description of second parameter.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, param1, param2):
|
||||||
|
# No docstring required here as parameters are documented above
|
||||||
|
self.param1 = param1
|
||||||
|
self.param2 = param2
|
||||||
|
|
||||||
|
@property
|
||||||
|
def some_property(self) -> str:
|
||||||
|
"""Get the formatted property value.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A string representation of the property.
|
||||||
|
"""
|
||||||
|
return f"Property: {self.param1}"
|
||||||
|
```
|
||||||
|
|
||||||
# Contributor Covenant Code of Conduct
|
# Contributor Covenant Code of Conduct
|
||||||
|
|
||||||
@@ -51,23 +92,23 @@ diverse, inclusive, and healthy community.
|
|||||||
Examples of behavior that contributes to a positive environment for our
|
Examples of behavior that contributes to a positive environment for our
|
||||||
community include:
|
community include:
|
||||||
|
|
||||||
* Demonstrating empathy and kindness toward other people
|
- Demonstrating empathy and kindness toward other people
|
||||||
* Being respectful of differing opinions, viewpoints, and experiences
|
- Being respectful of differing opinions, viewpoints, and experiences
|
||||||
* Giving and gracefully accepting constructive feedback
|
- Giving and gracefully accepting constructive feedback
|
||||||
* Accepting responsibility and apologizing to those affected by our mistakes,
|
- Accepting responsibility and apologizing to those affected by our mistakes,
|
||||||
and learning from the experience
|
and learning from the experience
|
||||||
* Focusing on what is best not just for us as individuals, but for the overall
|
- Focusing on what is best not just for us as individuals, but for the overall
|
||||||
community
|
community
|
||||||
|
|
||||||
Examples of unacceptable behavior include:
|
Examples of unacceptable behavior include:
|
||||||
|
|
||||||
* The use of sexualized language or imagery, and sexual attention or advances of
|
- The use of sexualized language or imagery, and sexual attention or advances of
|
||||||
any kind
|
any kind
|
||||||
* Trolling, insulting or derogatory comments, and personal or political attacks
|
- Trolling, insulting or derogatory comments, and personal or political attacks
|
||||||
* Public or private harassment
|
- Public or private harassment
|
||||||
* Publishing others' private information, such as a physical or email address,
|
- Publishing others' private information, such as a physical or email address,
|
||||||
without their explicit permission
|
without their explicit permission
|
||||||
* Other conduct which could reasonably be considered inappropriate in a
|
- Other conduct which could reasonably be considered inappropriate in a
|
||||||
professional setting
|
professional setting
|
||||||
|
|
||||||
## Enforcement Responsibilities
|
## Enforcement Responsibilities
|
||||||
@@ -162,4 +203,4 @@ For answers to common questions about this code of conduct, see the FAQ at
|
|||||||
[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
|
[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
|
||||||
[Mozilla CoC]: https://github.com/mozilla/diversity
|
[Mozilla CoC]: https://github.com/mozilla/diversity
|
||||||
[FAQ]: https://www.contributor-covenant.org/faq
|
[FAQ]: https://www.contributor-covenant.org/faq
|
||||||
[translations]: https://www.contributor-covenant.org/translations
|
[translations]: https://www.contributor-covenant.org/translations
|
||||||
|
|||||||
23
README.md
23
README.md
@@ -55,17 +55,18 @@ pip install "pipecat-ai[option,...]"
|
|||||||
|
|
||||||
### Available services
|
### Available services
|
||||||
|
|
||||||
| Category | Services | Install Command Example |
|
| Category | Services | Install Command Example |
|
||||||
| ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------- |
|
| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------- |
|
||||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) | `pip install "pipecat-ai[deepgram]"` |
|
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Parakeet (NVIDIA)](https://docs.pipecat.ai/server/services/stt/parakeet), [Ultravox](https://docs.pipecat.ai/server/services/stt/ultravox), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) | `pip install "pipecat-ai[deepgram]"` |
|
||||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Together AI](https://docs.pipecat.ai/server/services/llm/together) | `pip install "pipecat-ai[openai]"` |
|
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [Together AI](https://docs.pipecat.ai/server/services/llm/together) | `pip install "pipecat-ai[openai]"` |
|
||||||
| Text-to-Speech | [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) | `pip install "pipecat-ai[cartesia]"` |
|
| Text-to-Speech | [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [FastPitch (NVIDIA)](https://docs.pipecat.ai/server/services/tts/fastpitch), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) | `pip install "pipecat-ai[cartesia]"` |
|
||||||
| Speech-to-Speech | [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) | `pip install "pipecat-ai[google]"` |
|
| Speech-to-Speech | [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) | `pip install "pipecat-ai[google]"` |
|
||||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local | `pip install "pipecat-ai[daily]"` |
|
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local | `pip install "pipecat-ai[daily]"` |
|
||||||
| Video | [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) | `pip install "pipecat-ai[tavus,simli]"` |
|
| Video | [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) | `pip install "pipecat-ai[tavus,simli]"` |
|
||||||
| Vision & Image | [Moondream](https://docs.pipecat.ai/server/services/vision/moondream), [fal](https://docs.pipecat.ai/server/services/image-generation/fal) | `pip install "pipecat-ai[moondream]"` |
|
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) | `pip install "pipecat-ai[mem0]"` |
|
||||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) | `pip install "pipecat-ai[silero]"` |
|
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/fal), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) | `pip install "pipecat-ai[moondream]"` |
|
||||||
| Analytics & Metrics | [Canonical AI](https://docs.pipecat.ai/server/services/analytics/canonical), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) | `pip install "pipecat-ai[canonical]"` |
|
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) | `pip install "pipecat-ai[silero]"` |
|
||||||
|
| Analytics & Metrics | [Canonical AI](https://docs.pipecat.ai/server/services/analytics/canonical), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) | `pip install "pipecat-ai[canonical]"` |
|
||||||
|
|
||||||
📚 [View full services documentation →](https://docs.pipecat.ai/server/services/supported-services)
|
📚 [View full services documentation →](https://docs.pipecat.ai/server/services/supported-services)
|
||||||
|
|
||||||
|
|||||||
@@ -3,10 +3,11 @@ coverage~=7.6.12
|
|||||||
grpcio-tools~=1.67.1
|
grpcio-tools~=1.67.1
|
||||||
pip-tools~=7.4.1
|
pip-tools~=7.4.1
|
||||||
pre-commit~=4.0.1
|
pre-commit~=4.0.1
|
||||||
pyright~=1.1.394
|
pyright~=1.1.397
|
||||||
pytest~=8.3.4
|
pytest~=8.3.4
|
||||||
pytest-asyncio~=0.25.3
|
pytest-asyncio~=0.25.3
|
||||||
ruff~=0.9.7
|
pytest-aiohttp==1.1.0
|
||||||
|
ruff~=0.11.1
|
||||||
setuptools~=70.0.0
|
setuptools~=70.0.0
|
||||||
setuptools_scm~=8.1.0
|
setuptools_scm~=8.1.0
|
||||||
python-dotenv~=1.0.1
|
python-dotenv~=1.0.1
|
||||||
|
|||||||
@@ -50,6 +50,14 @@ autodoc_mock_imports = [
|
|||||||
"pyht.protos",
|
"pyht.protos",
|
||||||
"pyht.protos.api_pb2",
|
"pyht.protos.api_pb2",
|
||||||
"pipecat_ai_playht", # PlayHT wrapper
|
"pipecat_ai_playht", # PlayHT wrapper
|
||||||
|
"vllm",
|
||||||
|
"aiortc",
|
||||||
|
"aiortc.mediastreams",
|
||||||
|
"cv2",
|
||||||
|
"av",
|
||||||
|
"pyneuphonic",
|
||||||
|
"mem0",
|
||||||
|
"mlx_whisper",
|
||||||
"anthropic",
|
"anthropic",
|
||||||
"assemblyai",
|
"assemblyai",
|
||||||
"boto3",
|
"boto3",
|
||||||
|
|||||||
@@ -45,8 +45,10 @@ Transport & Serialization
|
|||||||
Utilities
|
Utilities
|
||||||
~~~~~~~~~
|
~~~~~~~~~
|
||||||
|
|
||||||
|
* :mod:`Adapters <pipecat.adapters>`
|
||||||
* :mod:`Clocks <pipecat.clocks>`
|
* :mod:`Clocks <pipecat.clocks>`
|
||||||
* :mod:`Metrics <pipecat.metrics>`
|
* :mod:`Metrics <pipecat.metrics>`
|
||||||
|
* :mod:`Observers <pipecat.observers>`
|
||||||
* :mod:`Sync <pipecat.sync>`
|
* :mod:`Sync <pipecat.sync>`
|
||||||
* :mod:`Transcriptions <pipecat.transcriptions>`
|
* :mod:`Transcriptions <pipecat.transcriptions>`
|
||||||
* :mod:`Utils <pipecat.utils>`
|
* :mod:`Utils <pipecat.utils>`
|
||||||
@@ -56,10 +58,12 @@ Utilities
|
|||||||
:caption: API Reference
|
:caption: API Reference
|
||||||
:hidden:
|
:hidden:
|
||||||
|
|
||||||
|
Adapters <api/pipecat.adapters>
|
||||||
Audio <api/pipecat.audio>
|
Audio <api/pipecat.audio>
|
||||||
Clocks <api/pipecat.clocks>
|
Clocks <api/pipecat.clocks>
|
||||||
Frames <api/pipecat.frames>
|
Frames <api/pipecat.frames>
|
||||||
Metrics <api/pipecat.metrics>
|
Metrics <api/pipecat.metrics>
|
||||||
|
Observers <api/pipecat.observers>
|
||||||
Pipeline <api/pipecat.pipeline>
|
Pipeline <api/pipecat.pipeline>
|
||||||
Processors <api/pipecat.processors>
|
Processors <api/pipecat.processors>
|
||||||
Serializers <api/pipecat.serializers>
|
Serializers <api/pipecat.serializers>
|
||||||
|
|||||||
@@ -12,22 +12,29 @@ pipecat-ai[aws]
|
|||||||
pipecat-ai[azure]
|
pipecat-ai[azure]
|
||||||
pipecat-ai[canonical]
|
pipecat-ai[canonical]
|
||||||
pipecat-ai[cartesia]
|
pipecat-ai[cartesia]
|
||||||
|
pipecat-ai[cerebras]
|
||||||
|
pipecat-ai[deepseek]
|
||||||
pipecat-ai[daily]
|
pipecat-ai[daily]
|
||||||
pipecat-ai[deepgram]
|
pipecat-ai[deepgram]
|
||||||
pipecat-ai[elevenlabs]
|
pipecat-ai[elevenlabs]
|
||||||
pipecat-ai[fal]
|
pipecat-ai[fal]
|
||||||
pipecat-ai[fireworks]
|
pipecat-ai[fireworks]
|
||||||
|
pipecat-ai[fish]
|
||||||
pipecat-ai[gladia]
|
pipecat-ai[gladia]
|
||||||
pipecat-ai[google]
|
pipecat-ai[google]
|
||||||
pipecat-ai[grok]
|
pipecat-ai[grok]
|
||||||
pipecat-ai[groq]
|
pipecat-ai[groq]
|
||||||
# pipecat-ai[krisp] # Mocked instead
|
# pipecat-ai[krisp] # Mocked
|
||||||
|
pipecat-ai[koala]
|
||||||
pipecat-ai[langchain]
|
pipecat-ai[langchain]
|
||||||
pipecat-ai[livekit]
|
pipecat-ai[livekit]
|
||||||
pipecat-ai[lmnt]
|
pipecat-ai[lmnt]
|
||||||
pipecat-ai[local]
|
pipecat-ai[local]
|
||||||
|
# pipecat-ai[mem0] # Mocked
|
||||||
|
# pipecat-ai[mlx-whisper] # Mocked
|
||||||
pipecat-ai[moondream]
|
pipecat-ai[moondream]
|
||||||
pipecat-ai[nim]
|
pipecat-ai[nim]
|
||||||
|
# pipecat-ai[neuphonic] # Mocked
|
||||||
pipecat-ai[noisereduce]
|
pipecat-ai[noisereduce]
|
||||||
pipecat-ai[openai]
|
pipecat-ai[openai]
|
||||||
# pipecat-ai[openpipe]
|
# pipecat-ai[openpipe]
|
||||||
@@ -36,5 +43,9 @@ pipecat-ai[riva]
|
|||||||
pipecat-ai[silero]
|
pipecat-ai[silero]
|
||||||
pipecat-ai[simli]
|
pipecat-ai[simli]
|
||||||
pipecat-ai[soundfile]
|
pipecat-ai[soundfile]
|
||||||
|
pipecat-ai[tavus]
|
||||||
|
pipecat-ai[together]
|
||||||
|
# pipecat-ai[ultravox] # Mocked
|
||||||
|
# pipecat-ai[webrtc] # Mocked
|
||||||
pipecat-ai[websocket]
|
pipecat-ai[websocket]
|
||||||
pipecat-ai[whisper]
|
pipecat-ai[whisper]
|
||||||
@@ -29,6 +29,9 @@ DAILY_SAMPLE_ROOM_URL=https://...
|
|||||||
ELEVENLABS_API_KEY=...
|
ELEVENLABS_API_KEY=...
|
||||||
ELEVENLABS_VOICE_ID=...
|
ELEVENLABS_VOICE_ID=...
|
||||||
|
|
||||||
|
# Neuphonic
|
||||||
|
NEUPHONIC_API_KEY=...
|
||||||
|
|
||||||
# Fal
|
# Fal
|
||||||
FAL_KEY=...
|
FAL_KEY=...
|
||||||
|
|
||||||
@@ -87,3 +90,6 @@ ASSEMBLYAI_API_KEY=...
|
|||||||
|
|
||||||
# OpenRouter
|
# OpenRouter
|
||||||
OPENROUTER_API_KEY=...
|
OPENROUTER_API_KEY=...
|
||||||
|
|
||||||
|
# Piper
|
||||||
|
PIPER_BASE_URL=...
|
||||||
@@ -18,7 +18,7 @@ from pipecat.frames.frames import AudioRawFrame, EndFrame, OutputAudioRawFrame,
|
|||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineTask
|
from pipecat.pipeline.task import PipelineTask
|
||||||
from pipecat.services.cartesia import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -64,7 +64,7 @@ async def main():
|
|||||||
|
|
||||||
tts = CartesiaTTSService(
|
tts = CartesiaTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
)
|
)
|
||||||
|
|
||||||
runner = PipelineRunner()
|
runner = PipelineRunner()
|
||||||
|
|||||||
@@ -21,9 +21,9 @@ from pipecat.pipeline.runner import PipelineRunner
|
|||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
|
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
|
||||||
from pipecat.services.canonical import CanonicalMetricsService
|
from pipecat.services.canonical.metrics import CanonicalMetricsService
|
||||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
|
||||||
from pipecat.services.openai import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -113,8 +113,8 @@ async def main():
|
|||||||
llm,
|
llm,
|
||||||
tts,
|
tts,
|
||||||
transport.output(),
|
transport.output(),
|
||||||
audio_buffer_processor, # captures audio into a buffer
|
|
||||||
canonical, # uploads audio buffer to Canonical AI for metrics
|
canonical, # uploads audio buffer to Canonical AI for metrics
|
||||||
|
audio_buffer_processor, # captures audio into a buffer
|
||||||
context_aggregator.assistant(),
|
context_aggregator.assistant(),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -23,8 +23,8 @@ from pipecat.pipeline.runner import PipelineRunner
|
|||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
|
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
|
||||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
|
||||||
from pipecat.services.openai import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -32,10 +32,16 @@ load_dotenv(override=True)
|
|||||||
logger.remove(0)
|
logger.remove(0)
|
||||||
logger.add(sys.stderr, level="DEBUG")
|
logger.add(sys.stderr, level="DEBUG")
|
||||||
|
|
||||||
|
# Create the recordings directory if it doesn't exist
|
||||||
|
os.makedirs("recordings", exist_ok=True)
|
||||||
|
|
||||||
async def save_audio(audio: bytes, sample_rate: int, num_channels: int):
|
|
||||||
|
async def save_audio(audio: bytes, sample_rate: int, num_channels: int, name: str):
|
||||||
if len(audio) > 0:
|
if len(audio) > 0:
|
||||||
filename = f"conversation_recording{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
|
filename = os.path.join(
|
||||||
|
"recordings",
|
||||||
|
f"{name}_conversation_recording{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.wav",
|
||||||
|
)
|
||||||
with io.BytesIO() as buffer:
|
with io.BytesIO() as buffer:
|
||||||
with wave.open(buffer, "wb") as wf:
|
with wave.open(buffer, "wb") as wf:
|
||||||
wf.setsampwidth(2)
|
wf.setsampwidth(2)
|
||||||
@@ -110,7 +116,7 @@ async def main():
|
|||||||
|
|
||||||
# NOTE: Watch out! This will save all the conversation in memory. You
|
# NOTE: Watch out! This will save all the conversation in memory. You
|
||||||
# can pass `buffer_size` to get periodic callbacks.
|
# can pass `buffer_size` to get periodic callbacks.
|
||||||
audiobuffer = AudioBufferProcessor()
|
audiobuffer = AudioBufferProcessor(enable_turn_audio=True)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
@@ -128,7 +134,15 @@ async def main():
|
|||||||
|
|
||||||
@audiobuffer.event_handler("on_audio_data")
|
@audiobuffer.event_handler("on_audio_data")
|
||||||
async def on_audio_data(buffer, audio, sample_rate, num_channels):
|
async def on_audio_data(buffer, audio, sample_rate, num_channels):
|
||||||
await save_audio(audio, sample_rate, num_channels)
|
await save_audio(audio, sample_rate, num_channels, "full")
|
||||||
|
|
||||||
|
@audiobuffer.event_handler("on_user_turn_audio_data")
|
||||||
|
async def on_user_turn_audio_data(buffer, audio, sample_rate, num_channels):
|
||||||
|
await save_audio(audio, sample_rate, num_channels, "user")
|
||||||
|
|
||||||
|
@audiobuffer.event_handler("on_bot_turn_audio_data")
|
||||||
|
async def on_bot_turn_audio_data(buffer, audio, sample_rate, num_channels):
|
||||||
|
await save_audio(audio, sample_rate, num_channels, "bot")
|
||||||
|
|
||||||
@transport.event_handler("on_first_participant_joined")
|
@transport.event_handler("on_first_participant_joined")
|
||||||
async def on_first_participant_joined(transport, participant):
|
async def on_first_participant_joined(transport, participant):
|
||||||
|
|||||||
@@ -1,3 +1,9 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2024–2025, Daily
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: BSD 2-Clause License
|
||||||
|
#
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import asyncio
|
import asyncio
|
||||||
import os
|
import os
|
||||||
@@ -12,8 +18,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
|||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
|
||||||
from pipecat.services.openai import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|||||||
@@ -1,3 +1,9 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2024–2025, Daily
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: BSD 2-Clause License
|
||||||
|
#
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
|
|||||||
@@ -1,3 +1,9 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2024–2025, Daily
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: BSD 2-Clause License
|
||||||
|
#
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
@@ -10,8 +16,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
|||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.services.cartesia import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.services.openai import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -34,7 +40,7 @@ async def main(room_url: str, token: str):
|
|||||||
)
|
)
|
||||||
|
|
||||||
tts = CartesiaTTSService(
|
tts = CartesiaTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY", ""), voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22"
|
api_key=os.getenv("CARTESIA_API_KEY", ""), voice_id="71a7ad14-091c-4e8e-a314-022ece01c121"
|
||||||
)
|
)
|
||||||
|
|
||||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||||
|
|||||||
178
examples/deployment/pipecat-cloud-daily-pstn-server/README.md
Normal file
178
examples/deployment/pipecat-cloud-daily-pstn-server/README.md
Normal file
@@ -0,0 +1,178 @@
|
|||||||
|
# Handling PSTN/SIP Dial-in on Pipecat Cloud
|
||||||
|
|
||||||
|
This repository contains two server implementations for handling
|
||||||
|
the pinless dial-in workflow in Pipecat Cloud. This is the companion to the
|
||||||
|
Pipecat Cloud [pstn_sip starter image](https://github.com/daily-co/pipecat-cloud-images/tree/main/pipecat-starters/pstn_sip).
|
||||||
|
In addition you can use `/api/dial` to trigger dial-out, and
|
||||||
|
eventually, call-transfers.
|
||||||
|
|
||||||
|
1. [FastAPI Server](fastapi-webhook-server/README.md) -
|
||||||
|
A FastAPI implementation that handles PSTN (Public Switched Telephone
|
||||||
|
Network) and SIP (Session Initiation Protocol) calls using the Daily API.
|
||||||
|
|
||||||
|
2. [Next.js Serverless](nextjs-webhook-server/README.md) -
|
||||||
|
A Next.js API implementation designed for deployment on Vercel's
|
||||||
|
serverless platform.
|
||||||
|
|
||||||
|
Both implementations provide:
|
||||||
|
|
||||||
|
- HMAC signature validation for pinless webhook
|
||||||
|
- Structured logging
|
||||||
|
- Support for dial-in and dial-out settings
|
||||||
|
- Voicemail detection and call transfer functionality (coming soon)
|
||||||
|
- Test request handling
|
||||||
|
|
||||||
|
## Choosing an Implementation
|
||||||
|
|
||||||
|
- Use the **FastAPI Server** if you:
|
||||||
|
|
||||||
|
- Need a standalone server
|
||||||
|
- Prefer Python and FastAPI
|
||||||
|
- Want to deploy to traditional hosting platforms
|
||||||
|
|
||||||
|
- Use the **Next.js Serverless** implementation if you:
|
||||||
|
- Want serverless deployment
|
||||||
|
- Prefer JavaScript/TypeScript
|
||||||
|
- Already use Next.js and Vercel for other projects
|
||||||
|
- Need quick scaling and zero maintenance
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
### Environment Variables
|
||||||
|
|
||||||
|
Both implementations require similar environment variables:
|
||||||
|
|
||||||
|
- `PIPECAT_CLOUD_API_KEY`: Pipecat Cloud API Key, begins with pk\_\*
|
||||||
|
- `AGENT_NAME`: Your Daily agent name
|
||||||
|
- `PINLESS_HMAC_SECRET`: Your HMAC secret for request verification
|
||||||
|
- `LOG_LEVEL`: (Optional) Logging level (defaults to 'info')
|
||||||
|
|
||||||
|
See the individual README files in each implementation directory for
|
||||||
|
specific setup instructions.
|
||||||
|
|
||||||
|
### Phone number setup
|
||||||
|
|
||||||
|
You can buy a phone number through the Pipecat Cloud Dashboard:
|
||||||
|
|
||||||
|
1. Go to `Settings` > `Telephony`
|
||||||
|
2. Follow the UI to purchase a phone number
|
||||||
|
3. Configure the webhook URL to receive incoming calls (e.g. `https://my-webhook-url.com/api/dial`)
|
||||||
|
|
||||||
|
Or purchase the number using Daily's
|
||||||
|
[PhoneNumbers API](https://docs.daily.co/reference/rest-api/phone-numbers).
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl --request POST \
|
||||||
|
--url https://api.daily.co/v1/domain-dialin-config \
|
||||||
|
--header 'Authorization: Bearer $TOKEN' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data-raw '{
|
||||||
|
"type": "pinless_dialin",
|
||||||
|
"name_prefix": "Customer1",
|
||||||
|
"phone_number": "+1PURCHASED_NUM",
|
||||||
|
"room_creation_api": "https://example.com/api/dial",
|
||||||
|
"hold_music_url": "https://example.com/static/ringtone.mp3",
|
||||||
|
"timeout_config": {
|
||||||
|
"message": "No agent is available right now"
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
The API will return a static SIP URI (`sip_uri`) that can be called
|
||||||
|
from other SIP services.
|
||||||
|
|
||||||
|
### `room_creation_api`
|
||||||
|
|
||||||
|
To make and receive calls currently you have to host a server that
|
||||||
|
handles incoming calls. In the coming weeks, incoming calls will be
|
||||||
|
directly handled within Daily and we will expose an endpoint similar
|
||||||
|
to `{service}/start` that will manage this for you.
|
||||||
|
|
||||||
|
In the meantime, the server described below serves as the webhook
|
||||||
|
handler for the `room_creation_api`. Configure your pinless phone
|
||||||
|
number or SIP interconnect to the `ngrok` tunnel or
|
||||||
|
the actual server URL, append `/api/dial` to the webhook URL.
|
||||||
|
|
||||||
|
## Example curl commands
|
||||||
|
|
||||||
|
Note: Replace `http://localhost:3000` with your actual server URL and
|
||||||
|
phone numbers with valid values for your use case.
|
||||||
|
|
||||||
|
### Dialin Request
|
||||||
|
|
||||||
|
The server will receive a request when a call is received from Daily.
|
||||||
|
|
||||||
|
### Dialout Request
|
||||||
|
|
||||||
|
Dial a number, will use any purchased number
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:3000/api/dial \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"dialout_settings": [
|
||||||
|
{
|
||||||
|
"phoneNumber": "+1234567890",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
Dial a number with callerId, which is the UUID of a purchased number.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:3000/api/dial \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"dialout_settings": [
|
||||||
|
{
|
||||||
|
"phoneNumber": "+1234567890",
|
||||||
|
"callerId": "purchased_phone_uuid"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
Dial a number
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:3000/api/dial \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"dialout_settings": [
|
||||||
|
{
|
||||||
|
"phoneNumber": "+1234567890",
|
||||||
|
"callerId": "purchased_phone_uuid"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Advanced Request with Voicemail Detection
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:3000/api/dial \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"To": "+1234567890",
|
||||||
|
"From": "+1987654321",
|
||||||
|
"callId": "call-uuid-123",
|
||||||
|
"callDomain": "domain-uuid-456",
|
||||||
|
"dialout_settings": [
|
||||||
|
{
|
||||||
|
"phoneNumber": "+1234567890",
|
||||||
|
"callerId": "purchased_phone_uuid"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"voicemail_detection": {
|
||||||
|
"testInPrebuilt": true
|
||||||
|
},
|
||||||
|
"call_transfer": {
|
||||||
|
"mode": "dialout",
|
||||||
|
"speakSummary": true,
|
||||||
|
"storeSummary": true,
|
||||||
|
"operatorNumber": "+1234567890",
|
||||||
|
"testInPrebuilt": true
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
```
|
||||||
@@ -0,0 +1,98 @@
|
|||||||
|
# FastAPI server for handling Daily PSTN/SIP Webhook
|
||||||
|
|
||||||
|
A FastAPI server that handles PSTN (Public Switched Telephone Network) and SIP (Session Initiation Protocol) calls using the Daily API.
|
||||||
|
|
||||||
|
## Setup
|
||||||
|
|
||||||
|
1. Clone the repository
|
||||||
|
|
||||||
|
2. Navigate to the `fastapi-webhook-server` directory:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd fastapi-webhook-server
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Install dependencies:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
4. Copy `env.example` to `.env`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cp env.example .env
|
||||||
|
```
|
||||||
|
|
||||||
|
5. Update `.env` with your credentials:
|
||||||
|
|
||||||
|
- `AGENT_NAME`: Your Daily agent name
|
||||||
|
- `PIPECAT_CLOUD_API_KEY`: Your Daily API key
|
||||||
|
- `PINLESS_HMAC_SECRET`: Your HMAC secret for request verification
|
||||||
|
|
||||||
|
## Running the Server
|
||||||
|
|
||||||
|
Start the server:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python server.py
|
||||||
|
```
|
||||||
|
|
||||||
|
The server will run on `http://localhost:7860` and you can expose it via ngrok for testing:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
`ngrok http 7860`
|
||||||
|
```
|
||||||
|
|
||||||
|
> Tip: Use a subdomain for a consistent URL (e.g. `ngrok http -subdomain=mydomain http://localhost:7860`)
|
||||||
|
|
||||||
|
## API Endpoints
|
||||||
|
|
||||||
|
### GET /
|
||||||
|
|
||||||
|
Health check endpoint that returns a "Hello, World!" message.
|
||||||
|
|
||||||
|
### POST /api/dial
|
||||||
|
|
||||||
|
Initiates a PSTN/SIP call with the following request body format:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"To": "+14152251493",
|
||||||
|
"From": "+14158483432",
|
||||||
|
"callId": "string-contains-uuid",
|
||||||
|
"callDomain": "string-contains-uuid",
|
||||||
|
"dialout_settings": [
|
||||||
|
{
|
||||||
|
"phoneNumber": "+14158483432",
|
||||||
|
"callerId": "+14152251493"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"voicemail_detection": {
|
||||||
|
"testInPrebuilt": true
|
||||||
|
},
|
||||||
|
"call_transfer": {
|
||||||
|
"mode": "dialout",
|
||||||
|
"speakSummary": true,
|
||||||
|
"storeSummary": true,
|
||||||
|
"operatorNumber": "+14152250006",
|
||||||
|
"testInPrebuilt": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Response
|
||||||
|
|
||||||
|
Returns a JSON object containing:
|
||||||
|
|
||||||
|
- `status`: Success/failure status
|
||||||
|
- `data`: Response from Daily API
|
||||||
|
- `room_properties`: Properties of the created Daily room
|
||||||
|
|
||||||
|
## Error Handling
|
||||||
|
|
||||||
|
- 401: Invalid signature
|
||||||
|
- 400: Invalid authorization header (e.g. missing Daily API key in bot.py)
|
||||||
|
- 405: Method not allowed (e.g. incorrect route on the webhook URL)
|
||||||
|
- 500: Server errors (missing API key, network issues)
|
||||||
|
- Other status codes are passed through from the Daily API
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
AGENT_NAME="your-agent-name"
|
||||||
|
PIPECAT_CLOUD_API_KEY="your-daily-api-key"
|
||||||
|
PINLESS_HMAC_SECRET="hmac-secret-pinless-dialin"
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
fastapi
|
||||||
|
uvicorn
|
||||||
|
python-dotenv
|
||||||
|
requests
|
||||||
|
pydantic
|
||||||
|
loguru
|
||||||
@@ -0,0 +1,201 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2025, Daily
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: BSD 2-Clause License
|
||||||
|
#
|
||||||
|
|
||||||
|
# server.py
|
||||||
|
|
||||||
|
|
||||||
|
import base64 # for calculating hmac signature
|
||||||
|
import hmac
|
||||||
|
import os # for accessing environment variables
|
||||||
|
import time # for setting expiration time
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from fastapi import FastAPI, HTTPException, Request
|
||||||
|
from loguru import logger
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
load_dotenv(override=True)
|
||||||
|
|
||||||
|
app = FastAPI()
|
||||||
|
|
||||||
|
|
||||||
|
class RoomRequest(BaseModel):
|
||||||
|
test: Optional[str] = Field(None, alias="Test", description="Test field")
|
||||||
|
To: Optional[str] = Field(None, alias="to", description="Destination phone number")
|
||||||
|
From: Optional[str] = Field(None, alias="from", description="Source phone number")
|
||||||
|
callId: Optional[str] = Field(None, alias="call_id", description="Unique call identifier")
|
||||||
|
callDomain: Optional[str] = Field(
|
||||||
|
None, alias="call_domain", description="Call domain identifier"
|
||||||
|
)
|
||||||
|
dialout_settings: Optional[List[Dict[str, Any]]] = Field(
|
||||||
|
None, description="An array of phone numbers or SIP URIs to dialout to"
|
||||||
|
)
|
||||||
|
voicemail_detection: Optional[Dict[str, Any]] = Field(
|
||||||
|
None, description="A flag to perform voicemail or answeing-machine detection"
|
||||||
|
)
|
||||||
|
call_transfer: Optional[Dict[str, Any]] = Field(None, description="to initiate a call transfer")
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
populate_by_name = True
|
||||||
|
alias_generator = None
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
body can contain any fields, but for handling PSTN/SIP,
|
||||||
|
we recommend sending the following custom values:
|
||||||
|
dialin, dialout, voicemail detection, and call transfer
|
||||||
|
|
||||||
|
|
||||||
|
"To": "+14152251493",
|
||||||
|
"From": "+14158483432",
|
||||||
|
"callId": "string-contains-uuid",
|
||||||
|
"callDomain": "string-contains-uuid"
|
||||||
|
These need to be remapped to dialin_settings
|
||||||
|
|
||||||
|
"dialout_settings": [
|
||||||
|
{"phoneNumber": "+14158483432", "callerId": "+14152251493"},
|
||||||
|
{"sipUri": "sip:username@sip.hostname"}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
|
||||||
|
voicemail_detection:{
|
||||||
|
testInPrebuilt: true
|
||||||
|
},
|
||||||
|
|
||||||
|
"call_transfer": {
|
||||||
|
"mode": "dialout",
|
||||||
|
"speakSummary": true,
|
||||||
|
"storeSummary": true,
|
||||||
|
"operatorNumber": "+14152250006",
|
||||||
|
"testInPrebuilt": true
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/")
|
||||||
|
async def read_root():
|
||||||
|
return {"message": "Hello, World!"}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/dial")
|
||||||
|
async def dial(request: RoomRequest, raw_request: Request):
|
||||||
|
logger.info("Incoming request to /dial:")
|
||||||
|
logger.info(f"Headers: {dict(raw_request.headers)}")
|
||||||
|
raw_body = await raw_request.body()
|
||||||
|
raw_body_str = raw_body.decode()
|
||||||
|
logger.info(f"Raw body: {raw_body_str}")
|
||||||
|
logger.info(f"Parsed body: {request.dict()}")
|
||||||
|
|
||||||
|
# calculate signature and compare/verify
|
||||||
|
hmac_secret = os.getenv("PINLESS_HMAC_SECRET")
|
||||||
|
timestamp = raw_request.headers.get("x-pinless-timestamp")
|
||||||
|
signature = raw_request.headers.get("x-pinless-signature")
|
||||||
|
|
||||||
|
if not hmac_secret:
|
||||||
|
logger.debug("Skipping HMAC validation - PINLESS_HMAC_SECRET not set")
|
||||||
|
elif timestamp and signature:
|
||||||
|
message = timestamp + "." + raw_body_str
|
||||||
|
|
||||||
|
base64_decoded_secret = base64.b64decode(hmac_secret)
|
||||||
|
computed_signature = base64.b64encode(
|
||||||
|
hmac.new(base64_decoded_secret, message.encode(), "sha256").digest()
|
||||||
|
).decode()
|
||||||
|
|
||||||
|
if computed_signature != signature:
|
||||||
|
logger.error(f"Invalid signature. Expected {signature}, got {computed_signature}")
|
||||||
|
raise HTTPException(status_code=401, detail="Invalid signature")
|
||||||
|
else:
|
||||||
|
logger.debug("Skipping HMAC validation - no signature headers present")
|
||||||
|
|
||||||
|
if request.test == "test":
|
||||||
|
logger.debug("Test request received")
|
||||||
|
return {"status": "success", "message": "Test request received"}
|
||||||
|
|
||||||
|
dialin_settings = None
|
||||||
|
# these fields are camelCase in the request
|
||||||
|
required_fields = ["To", "From", "callId", "callDomain"]
|
||||||
|
if all(
|
||||||
|
field in request.dict() and request.dict()[field] is not None for field in required_fields
|
||||||
|
):
|
||||||
|
# transform from camelCase to snake_case because daily-python expects snake_case
|
||||||
|
dialin_settings = {
|
||||||
|
"From": request.From,
|
||||||
|
"To": request.To,
|
||||||
|
"call_id": request.callId,
|
||||||
|
"call_domain": request.callDomain,
|
||||||
|
# transform from camelCase to snake_case
|
||||||
|
}
|
||||||
|
logger.debug(f"Populated dialin_settings from request: {dialin_settings}")
|
||||||
|
|
||||||
|
daily_room_properties = {
|
||||||
|
"enable_dialout": request.dialout_settings is not None,
|
||||||
|
}
|
||||||
|
|
||||||
|
if dialin_settings is not None:
|
||||||
|
sip_config = {
|
||||||
|
"display_name": request.From,
|
||||||
|
"sip_mode": "dial-in",
|
||||||
|
"num_endpoints": 2 if request.call_transfer is not None else 1,
|
||||||
|
}
|
||||||
|
daily_room_properties["sip"] = sip_config
|
||||||
|
|
||||||
|
# Setting default expiry to 5 minutes from now
|
||||||
|
daily_room_properties["exp"] = int(time.time()) + (5 * 60)
|
||||||
|
|
||||||
|
logger.debug(f"Daily room properties: {daily_room_properties}")
|
||||||
|
payload = {
|
||||||
|
"createDailyRoom": True,
|
||||||
|
"dailyRoomProperties": daily_room_properties,
|
||||||
|
"body": {
|
||||||
|
"dialin_settings": dialin_settings,
|
||||||
|
"dialout_settings": request.dialout_settings,
|
||||||
|
"voicemail_detection": request.voicemail_detection,
|
||||||
|
"call_transfer": request.call_transfer,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
pcc_api_key = os.getenv("PIPECAT_CLOUD_API_KEY")
|
||||||
|
agent_name = os.getenv("AGENT_NAME", "my-first-agent")
|
||||||
|
|
||||||
|
if not pcc_api_key:
|
||||||
|
raise HTTPException(status_code=500, detail="DAILY_API_KEY environment variable is not set")
|
||||||
|
|
||||||
|
headers = {"Authorization": f"Bearer {pcc_api_key}", "Content-Type": "application/json"}
|
||||||
|
|
||||||
|
url = f"https://api.pipecat.daily.co/v1/public/{agent_name}/start"
|
||||||
|
|
||||||
|
logger.debug(f"Making API call to Daily: {url} {headers} {payload}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.post(url, json=payload, headers=headers)
|
||||||
|
response.raise_for_status()
|
||||||
|
response_data = response.json()
|
||||||
|
logger.debug(f"Response: {response_data}")
|
||||||
|
return {
|
||||||
|
"status": "success",
|
||||||
|
"data": response_data,
|
||||||
|
"room_properties": daily_room_properties,
|
||||||
|
}
|
||||||
|
except requests.exceptions.HTTPError as e:
|
||||||
|
# Pass through the status code and error details from the Daily API
|
||||||
|
status_code = e.response.status_code
|
||||||
|
error_detail = e.response.json() if e.response.content else str(e)
|
||||||
|
logger.error(f"HTTP error: {error_detail}")
|
||||||
|
raise HTTPException(status_code=status_code, detail=error_detail)
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
logger.error(f"Request error: {str(e)}")
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
try:
|
||||||
|
import uvicorn
|
||||||
|
|
||||||
|
uvicorn.run(app, host="0.0.0.0", port=7860)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
logger.info("Server stopped manually")
|
||||||
53
examples/deployment/pipecat-cloud-daily-pstn-server/nextjs-webhook-server/.gitignore
vendored
Normal file
53
examples/deployment/pipecat-cloud-daily-pstn-server/nextjs-webhook-server/.gitignore
vendored
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
# dependencies
|
||||||
|
/node_modules
|
||||||
|
/.pnp
|
||||||
|
.pnp.js
|
||||||
|
|
||||||
|
# testing
|
||||||
|
/coverage
|
||||||
|
|
||||||
|
# next.js
|
||||||
|
/.next/
|
||||||
|
/out/
|
||||||
|
|
||||||
|
# production
|
||||||
|
/build
|
||||||
|
|
||||||
|
# misc
|
||||||
|
.DS_Store
|
||||||
|
*.pem
|
||||||
|
|
||||||
|
# debug
|
||||||
|
npm-debug.log*
|
||||||
|
yarn-debug.log*
|
||||||
|
yarn-error.log*
|
||||||
|
.pnpm-debug.log*
|
||||||
|
|
||||||
|
# local env files
|
||||||
|
.env*.local
|
||||||
|
|
||||||
|
# vercel
|
||||||
|
.vercel
|
||||||
|
|
||||||
|
# typescript
|
||||||
|
*.tsbuildinfo
|
||||||
|
next-env.d.ts
|
||||||
|
|
||||||
|
# IDE specific files
|
||||||
|
.idea/
|
||||||
|
.vscode/
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
|
||||||
|
# Logs
|
||||||
|
logs
|
||||||
|
*.log
|
||||||
|
|
||||||
|
# OS generated files
|
||||||
|
.DS_Store
|
||||||
|
.DS_Store?
|
||||||
|
._*
|
||||||
|
.Spotlight-V100
|
||||||
|
.Trashes
|
||||||
|
ehthumbs.db
|
||||||
|
Thumbs.db
|
||||||
@@ -0,0 +1,115 @@
|
|||||||
|
# Next.js server for handling Daily PSTN/SIP Webhook
|
||||||
|
|
||||||
|
Next.js API routes for handling Daily PSTN/SIP Pipecat requests.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- API endpoint for handling Daily PSTN/SIP Pipecat requests
|
||||||
|
- HMAC signature validation
|
||||||
|
- Structured logging with Pino
|
||||||
|
- Support for dial-in and dial-out settings
|
||||||
|
- Voicemail detection and call transfer functionality
|
||||||
|
- Test request handling
|
||||||
|
|
||||||
|
## Setup
|
||||||
|
|
||||||
|
1. Clone the repository
|
||||||
|
|
||||||
|
2. Navigate to the `nextjs-webhook-server` directory:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd nextjs-webhook-server
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Install dependencies:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm install
|
||||||
|
```
|
||||||
|
|
||||||
|
4. Create `.env.local` file with your credentials:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cp env.local.example .env.local
|
||||||
|
```
|
||||||
|
|
||||||
|
5. Update your `.env` with your secrets:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
PIPECAT_CLOUD_API_KEY=pk_*
|
||||||
|
AGENT_NAME=my-first-agent
|
||||||
|
PINLESS_HMAC_SECRET=your_hmac_secret
|
||||||
|
LOG_LEVEL=info
|
||||||
|
```
|
||||||
|
|
||||||
|
### Running the server
|
||||||
|
|
||||||
|
Run the development server:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm run dev
|
||||||
|
```
|
||||||
|
|
||||||
|
The server will run on `http://localhost:7860` and you can expose it via ngrok for testing:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
`ngrok http 7860`
|
||||||
|
```
|
||||||
|
|
||||||
|
> Tip: Use a subdomain for a consistent URL (e.g. `ngrok http -subdomain=mydomain http://localhost:7860`)
|
||||||
|
|
||||||
|
## API Endpoints
|
||||||
|
|
||||||
|
### GET /api
|
||||||
|
|
||||||
|
Returns a simple "Hello, World!" message with a cute cat emoji to verify the server is running.
|
||||||
|
|
||||||
|
### POST /api/dial
|
||||||
|
|
||||||
|
Handles dial-in and dial-out requests for Pipecat Cloud.
|
||||||
|
|
||||||
|
#### Test Requests
|
||||||
|
|
||||||
|
The endpoint handles test requests when a webhook is configured. Send a request with `"Test": "test"` to verify your setup:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"Test": "test"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Production Request Format
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
// for dial-in from webhook
|
||||||
|
"To": "+14152251493",
|
||||||
|
"From": "+14158483432",
|
||||||
|
"callId": "string-contains-uuid",
|
||||||
|
"callDomain": "string-contains-uuid",
|
||||||
|
// for making a dial out to a phone or SIP
|
||||||
|
"dialout_settings": [
|
||||||
|
{ "phoneNumber": "+14158483432", "callerId": "purchased_phone_uuid" },
|
||||||
|
{ "sipUri": "sip:username@sip.hostname.com" }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Deployment
|
||||||
|
|
||||||
|
The application is configured for Vercel deployment:
|
||||||
|
|
||||||
|
1. Push your code to a Git repository
|
||||||
|
2. Import your project in Vercel dashboard
|
||||||
|
3. Configure environment variables:
|
||||||
|
- `PIPECAT_CLOUD_API_KEY`
|
||||||
|
- `AGENT_NAME`
|
||||||
|
- `PINLESS_HMAC_SECRET`
|
||||||
|
- `LOG_LEVEL` (optional, defaults to 'info')
|
||||||
|
4. Deploy!
|
||||||
|
|
||||||
|
## Security
|
||||||
|
|
||||||
|
- HMAC signature validation for request authentication
|
||||||
|
- Environment variables for sensitive credentials
|
||||||
|
- Method validation (POST only for /dial)
|
||||||
@@ -0,0 +1,4 @@
|
|||||||
|
AGENT_NAME=my-first-agent
|
||||||
|
PIPECAT_CLOUD_API_KEY=your_daily_api_key
|
||||||
|
PINLESS_HMAC_SECRET=your_hmac_secret
|
||||||
|
LOG_LEVEL="info"
|
||||||
5447
examples/deployment/pipecat-cloud-daily-pstn-server/nextjs-webhook-server/package-lock.json
generated
Normal file
5447
examples/deployment/pipecat-cloud-daily-pstn-server/nextjs-webhook-server/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,22 @@
|
|||||||
|
{
|
||||||
|
"name": "my-daily-app",
|
||||||
|
"version": "0.1.0",
|
||||||
|
"private": true,
|
||||||
|
"scripts": {
|
||||||
|
"dev": "next dev -p 7860",
|
||||||
|
"build": "next build",
|
||||||
|
"start": "next start -p 7860",
|
||||||
|
"lint": "next lint"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"axios": "^1.6.0",
|
||||||
|
"next": "^14.0.0",
|
||||||
|
"pino": "^8.15.0",
|
||||||
|
"react": "^18.2.0",
|
||||||
|
"react-dom": "^18.2.0"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"eslint": "^8.46.0",
|
||||||
|
"eslint-config-next": "^14.0.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,175 @@
|
|||||||
|
import { logger } from '../../lib/utils';
|
||||||
|
import axios from 'axios';
|
||||||
|
import crypto from 'crypto';
|
||||||
|
|
||||||
|
const validateSignature = (body, signature, timestamp, secret) => {
|
||||||
|
// Skip if any required fields are missing
|
||||||
|
if (!signature || !timestamp || !secret) {
|
||||||
|
logger.warn('Missing required fields for HMAC validation');
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const decodedSecret = Buffer.from(secret, 'base64');
|
||||||
|
const hmac = crypto.createHmac('sha256', decodedSecret);
|
||||||
|
const signatureData = `${timestamp}.${body}`;
|
||||||
|
const computedSignature = hmac.update(signatureData).digest('base64');
|
||||||
|
|
||||||
|
logger.debug('Signature validation:', {
|
||||||
|
timestamp,
|
||||||
|
signatureData: signatureData.substring(0, 50) + '...',
|
||||||
|
computedSignature,
|
||||||
|
receivedSignature: signature
|
||||||
|
});
|
||||||
|
|
||||||
|
return computedSignature === signature;
|
||||||
|
} catch (error) {
|
||||||
|
logger.error('Error validating signature:', error);
|
||||||
|
return true; // Allow request to proceed on error
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
export default async function handler(req, res) {
|
||||||
|
// Only allow POST requests
|
||||||
|
if (req.method !== 'POST') {
|
||||||
|
return res.status(405).json({ error: 'Method not allowed' });
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
logger.info('Incoming request to /api/dial:');
|
||||||
|
logger.info(`Headers: ${JSON.stringify(req.headers)}`);
|
||||||
|
|
||||||
|
const rawBody = JSON.stringify(req.body);
|
||||||
|
logger.info(`Raw body: ${rawBody}`);
|
||||||
|
|
||||||
|
const signature = req.headers['x-pinless-signature'];
|
||||||
|
const timestamp = req.headers['x-pinless-timestamp'];
|
||||||
|
|
||||||
|
if (signature && timestamp) {
|
||||||
|
logger.info('Validating HMAC signature');
|
||||||
|
if (!validateSignature(rawBody, signature, timestamp, process.env.PINLESS_HMAC_SECRET)) {
|
||||||
|
logger.error('Invalid HMAC signature', { signature, timestamp });
|
||||||
|
return res.status(401).json({
|
||||||
|
error: 'Invalid signature',
|
||||||
|
message: 'Invalid HMAC signature'
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
logger.info('Skipping HMAC validation - no signature headers present');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract request data
|
||||||
|
const {
|
||||||
|
Test: test,
|
||||||
|
To,
|
||||||
|
From,
|
||||||
|
callId,
|
||||||
|
callDomain,
|
||||||
|
dialout_settings,
|
||||||
|
voicemail_detection,
|
||||||
|
call_transfer
|
||||||
|
} = req.body;
|
||||||
|
|
||||||
|
// Handle test requests when a webhook is configured
|
||||||
|
if (test === 'test') {
|
||||||
|
logger.debug('Test request received');
|
||||||
|
return res.status(200).json({ status: 'success', message: 'Test request received' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process dialin settings
|
||||||
|
let dialin_settings = null;
|
||||||
|
const requiredFields = ['To', 'From', 'callId', 'callDomain'];
|
||||||
|
|
||||||
|
if (requiredFields.every(field => req.body[field] !== undefined && req.body[field] !== null)) {
|
||||||
|
dialin_settings = {
|
||||||
|
// snake_case because pipecat expects this format
|
||||||
|
From,
|
||||||
|
To,
|
||||||
|
call_id: callId,
|
||||||
|
call_domain: callDomain,
|
||||||
|
};
|
||||||
|
logger.debug(`Populated dialin_settings from request: ${JSON.stringify(dialin_settings)}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set up Daily room properties
|
||||||
|
const daily_room_properties = {
|
||||||
|
enable_dialout: dialout_settings !== undefined && dialout_settings !== null,
|
||||||
|
exp: Math.floor(Date.now() / 1000) + (5 * 60), // 5 minutes from now
|
||||||
|
};
|
||||||
|
|
||||||
|
// Configure SIP if dialin settings are provided
|
||||||
|
if (dialin_settings !== null) {
|
||||||
|
const sip_config = {
|
||||||
|
display_name: From,
|
||||||
|
sip_mode: 'dial-in',
|
||||||
|
num_endpoints: call_transfer !== null ? 2 : 1,
|
||||||
|
};
|
||||||
|
daily_room_properties.sip = sip_config;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prepare payload for {service}/start API call
|
||||||
|
const payload = {
|
||||||
|
createDailyRoom: true,
|
||||||
|
dailyRoomProperties: daily_room_properties,
|
||||||
|
body: {
|
||||||
|
dialin_settings,
|
||||||
|
dialout_settings,
|
||||||
|
voicemail_detection,
|
||||||
|
call_transfer,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
logger.debug(`Daily room properties: ${JSON.stringify(daily_room_properties)}`);
|
||||||
|
|
||||||
|
// Get Daily API key and agent name from environment variables
|
||||||
|
const pccApiKey = process.env.PIPECAT_CLOUD_API_KEY;
|
||||||
|
const agentName = process.env.AGENT_NAME || 'my-first-agent';
|
||||||
|
|
||||||
|
if (!pccApiKey) {
|
||||||
|
throw new Error('PIPECAT_CLOUD_API_KEY environment variable is not set');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set up headers for Daily API call
|
||||||
|
const headers = {
|
||||||
|
'Authorization': `Bearer ${pccApiKey}`,
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
};
|
||||||
|
|
||||||
|
const url = `https://api.pipecat.daily.co/v1/public/${agentName}/start`;
|
||||||
|
logger.debug(`Making API call to Daily: ${url} ${JSON.stringify(headers)} ${JSON.stringify(payload)}`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await axios.post(url, payload, { headers });
|
||||||
|
logger.debug(`Response: ${JSON.stringify(response.data)}`);
|
||||||
|
|
||||||
|
return res.status(200).json({
|
||||||
|
status: 'success',
|
||||||
|
data: response.data,
|
||||||
|
room_properties: daily_room_properties,
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
if (error.response) {
|
||||||
|
// Pass through status code and error details from the Daily API
|
||||||
|
const statusCode = error.response.status;
|
||||||
|
const errorDetail = error.response.data || error.message;
|
||||||
|
logger.error(`HTTP error: ${JSON.stringify(errorDetail)}`);
|
||||||
|
return res.status(statusCode).json(errorDetail);
|
||||||
|
} else {
|
||||||
|
logger.error(`Request error: ${error.message}`);
|
||||||
|
return res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
logger.error(`Unexpected error: ${error.message}`);
|
||||||
|
return res.status(500).json({ error: 'Internal server error', message: error.message });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Configure body parser to preserve raw body text
|
||||||
|
export const config = {
|
||||||
|
api: {
|
||||||
|
bodyParser: {
|
||||||
|
sizeLimit: '1mb',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
import { logger } from '../../lib/utils';
|
||||||
|
|
||||||
|
export default function handler(req, res) {
|
||||||
|
logger.info('Received request to /api');
|
||||||
|
res.status(200).json({ message: 'Hello, World! from ᓚᘏᗢ' });
|
||||||
|
}
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
module.exports = {
|
||||||
|
version: 2,
|
||||||
|
buildCommand: "next build",
|
||||||
|
outputDirectory: ".next",
|
||||||
|
cleanUrls: true
|
||||||
|
};
|
||||||
94
examples/deployment/pipecat-cloud-example/.gitignore
vendored
Normal file
94
examples/deployment/pipecat-cloud-example/.gitignore
vendored
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
# Python
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
*.so
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
dist/
|
||||||
|
*.egg-info/
|
||||||
|
*.egg
|
||||||
|
.installed.cfg
|
||||||
|
.eggs/
|
||||||
|
downloads/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
share/python-wheels/
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# Virtual Environments
|
||||||
|
venv/
|
||||||
|
env/
|
||||||
|
.env
|
||||||
|
.venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# IDE
|
||||||
|
.idea/
|
||||||
|
.vscode/
|
||||||
|
.spyderproject
|
||||||
|
.spyproject
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# Testing and Coverage
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
htmlcov/
|
||||||
|
.pytest_cache/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
.hypothesis/
|
||||||
|
cover/
|
||||||
|
|
||||||
|
# Logs and Databases
|
||||||
|
*.log
|
||||||
|
*.db
|
||||||
|
db.sqlite3
|
||||||
|
db.sqlite3-journal
|
||||||
|
pip-log.txt
|
||||||
|
|
||||||
|
# System Files
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
||||||
|
desktop.ini
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
*.bak
|
||||||
|
*.tmp
|
||||||
|
*~
|
||||||
|
|
||||||
|
# Build and Documentation
|
||||||
|
docs/_build/
|
||||||
|
.pybuilder/
|
||||||
|
target/
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
.pdm.toml
|
||||||
|
.pdm-python
|
||||||
|
.pdm-build/
|
||||||
|
__pypackages__/
|
||||||
|
|
||||||
|
# Other
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
*.sage.py
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
.pyre/
|
||||||
|
.pytype/
|
||||||
|
cython_debug/
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# Pipecat cloud
|
||||||
|
.pcc-deploy.toml
|
||||||
7
examples/deployment/pipecat-cloud-example/Dockerfile
Normal file
7
examples/deployment/pipecat-cloud-example/Dockerfile
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
FROM dailyco/pipecat-base:latest
|
||||||
|
|
||||||
|
COPY ./requirements.txt requirements.txt
|
||||||
|
|
||||||
|
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
||||||
|
|
||||||
|
COPY ./bot.py bot.py
|
||||||
196
examples/deployment/pipecat-cloud-example/README.md
Normal file
196
examples/deployment/pipecat-cloud-example/README.md
Normal file
@@ -0,0 +1,196 @@
|
|||||||
|
# Pipecat Cloud Starter Project
|
||||||
|
|
||||||
|
[](https://docs.pipecat.daily.co) [](https://discord.gg/dailyco)
|
||||||
|
|
||||||
|
A template voice agent for [Pipecat Cloud](https://www.daily.co/products/pipecat-cloud/) that demonstrates building and deploying a conversational AI agent.
|
||||||
|
|
||||||
|
> **For a detailed step-by-step guide, see our [Quickstart Documentation](https://docs.pipecat.daily.co/quickstart).**
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
- Python 3.10+
|
||||||
|
- Linux, MacOS, or Windows Subsystem for Linux (WSL)
|
||||||
|
- [Docker](https://www.docker.com) and a Docker repository (e.g., [Docker Hub](https://hub.docker.com))
|
||||||
|
- A Docker Hub account (or other container registry account)
|
||||||
|
- [Pipecat Cloud](https://pipecat.daily.co) account
|
||||||
|
|
||||||
|
> **Note**: If you haven't installed Docker yet, follow the official installation guides for your platform ([Linux](https://docs.docker.com/engine/install/), [Mac](https://docs.docker.com/desktop/setup/install/mac-install/), [Windows](https://docs.docker.com/desktop/setup/install/windows-install/)). For Docker Hub, [create a free account](https://hub.docker.com/signup) and log in via terminal with `docker login`.
|
||||||
|
|
||||||
|
## Get Started
|
||||||
|
|
||||||
|
### 1. Get the starter project
|
||||||
|
|
||||||
|
Clone the starter project from GitHub:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git clone https://github.com/daily-co/pipecat-cloud-starter
|
||||||
|
cd pipecat-cloud-starter
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Set up your Python environment
|
||||||
|
|
||||||
|
We recommend using a virtual environment to manage your Python dependencies.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Create a virtual environment
|
||||||
|
python -m venv .venv
|
||||||
|
|
||||||
|
# Activate it
|
||||||
|
source .venv/bin/activate # On Windows: .venv\Scripts\activate
|
||||||
|
|
||||||
|
# Install the Pipecat Cloud CLI
|
||||||
|
pip install pipecatcloud
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Authenticate with Pipecat Cloud
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pcc auth login
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Acquire required API keys
|
||||||
|
|
||||||
|
This starter requires the following API keys:
|
||||||
|
|
||||||
|
- **OpenAI API Key**: Get from [platform.openai.com/api-keys](https://platform.openai.com/api-keys)
|
||||||
|
- **Cartesia API Key**: Get from [play.cartesia.ai/keys](https://play.cartesia.ai/keys)
|
||||||
|
- **Daily API Key**: Automatically provided through your Pipecat Cloud account
|
||||||
|
|
||||||
|
### 5. Configure to run locally (optional)
|
||||||
|
|
||||||
|
You can test your agent locally before deploying to Pipecat Cloud:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Set environment variables with your API keys
|
||||||
|
export CARTESIA_API_KEY="your_cartesia_key"
|
||||||
|
export DAILY_API_KEY="your_daily_key"
|
||||||
|
export OPENAI_API_KEY="your_openai_key"
|
||||||
|
```
|
||||||
|
|
||||||
|
> Your `DAILY_API_KEY` can be found at [https://pipecat.daily.co](https://pipecat.daily.co) under the `Settings` in the `Daily (WebRTC)` tab.
|
||||||
|
|
||||||
|
First install requirements:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
Then, launch the bot.py script locally:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
LOCAL_RUN=1 python bot.py
|
||||||
|
```
|
||||||
|
|
||||||
|
## Deploy & Run
|
||||||
|
|
||||||
|
### 1. Build and push your Docker image
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Build the image (targeting ARM architecture for cloud deployment)
|
||||||
|
docker build --platform=linux/arm64 -t my-first-agent:latest .
|
||||||
|
|
||||||
|
# Tag with your Docker username and version
|
||||||
|
docker tag my-first-agent:latest your-username/my-first-agent:0.1
|
||||||
|
|
||||||
|
# Push to Docker Hub
|
||||||
|
docker push your-username/my-first-agent:0.1
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Create a secret set for your API keys
|
||||||
|
|
||||||
|
The starter project requires API keys for OpenAI and Cartesia:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Copy the example env file
|
||||||
|
cp env.example .env
|
||||||
|
|
||||||
|
# Edit .env to add your API keys:
|
||||||
|
# CARTESIA_API_KEY=your_cartesia_key
|
||||||
|
# OPENAI_API_KEY=your_openai_key
|
||||||
|
|
||||||
|
# Create a secret set from your .env file
|
||||||
|
pcc secrets set my-first-agent-secrets --file .env
|
||||||
|
```
|
||||||
|
|
||||||
|
Alternatively, you can create secrets directly via CLI:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pcc secrets set my-first-agent-secrets \
|
||||||
|
CARTESIA_API_KEY=your_cartesia_key \
|
||||||
|
OPENAI_API_KEY=your_openai_key
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Deploy to Pipecat Cloud
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pcc deploy my-first-agent your-username/my-first-agent:0.1 --secrets my-first-agent-secrets
|
||||||
|
```
|
||||||
|
|
||||||
|
> **Note (Optional)**: For a more maintainable approach, you can use the included `pcc-deploy.toml` file:
|
||||||
|
>
|
||||||
|
> ```toml
|
||||||
|
> agent_name = "my-first-agent"
|
||||||
|
> image = "your-username/my-first-agent:0.1"
|
||||||
|
> secret_set = "my-first-agent-secrets"
|
||||||
|
>
|
||||||
|
> [scaling]
|
||||||
|
> min_instances = 0
|
||||||
|
> ```
|
||||||
|
>
|
||||||
|
> Then simply run `pcc deploy` without additional arguments.
|
||||||
|
|
||||||
|
> **Note**: If your repository is private, you'll need to add credentials:
|
||||||
|
>
|
||||||
|
> ```bash
|
||||||
|
> # Create pull secret (you’ll be prompted for credentials)
|
||||||
|
> pcc secrets image-pull-secret pull-secret https://index.docker.io/v1/
|
||||||
|
>
|
||||||
|
> # Deploy with credentials
|
||||||
|
> pcc deploy my-first-agent your-username/my-first-agent:0.1 --credentials pull-secret
|
||||||
|
> ```
|
||||||
|
|
||||||
|
### 4. Check deployment and scaling (optional)
|
||||||
|
|
||||||
|
By default, your agent will use "scale-to-zero" configuration, which means it may have a cold start of around 10 seconds when first used. By default, idle instances are maintained for 5 minutes before being terminated when using scale-to-zero.
|
||||||
|
|
||||||
|
For more responsive testing, you can scale your deployment to keep a minimum of one instance warm:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Ensure at least one warm instance is always available
|
||||||
|
pcc deploy my-first-agent your-username/my-first-agent:0.1 --min-instances 1
|
||||||
|
|
||||||
|
# Check the status of your deployment
|
||||||
|
pcc agent status my-first-agent
|
||||||
|
```
|
||||||
|
|
||||||
|
By default, idle instances are maintained for 5 minutes before being terminated when using scale-to-zero.
|
||||||
|
|
||||||
|
### 5. Create an API key
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Create a public API key for accessing your agent
|
||||||
|
pcc organizations keys create
|
||||||
|
|
||||||
|
# Set it as the default key to use with your agent
|
||||||
|
pcc organizations keys use
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6. Start your agent
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start a session with your agent in a Daily room
|
||||||
|
pcc agent start my-first-agent --use-daily
|
||||||
|
```
|
||||||
|
|
||||||
|
This will return a URL, which you can use to connect to your running agent.
|
||||||
|
|
||||||
|
## Documentation
|
||||||
|
|
||||||
|
For more details on Pipecat Cloud and its capabilities:
|
||||||
|
|
||||||
|
- [Pipecat Cloud Documentation](https://docs.pipecat.daily.co)
|
||||||
|
- [Pipecat Project Documentation](https://docs.pipecat.ai)
|
||||||
|
|
||||||
|
## Support
|
||||||
|
|
||||||
|
Join our [Discord community](https://discord.gg/dailyco) for help and discussions.
|
||||||
161
examples/deployment/pipecat-cloud-example/bot.py
Normal file
161
examples/deployment/pipecat-cloud-example/bot.py
Normal file
@@ -0,0 +1,161 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2025, Daily
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: BSD 2-Clause License
|
||||||
|
#
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from loguru import logger
|
||||||
|
from pipecatcloud.agent import DailySessionArguments
|
||||||
|
|
||||||
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
|
from pipecat.frames.frames import LLMMessagesFrame
|
||||||
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
|
# Check if we're in local development mode
|
||||||
|
LOCAL_RUN = os.getenv("LOCAL_RUN")
|
||||||
|
if LOCAL_RUN:
|
||||||
|
import asyncio
|
||||||
|
import webbrowser
|
||||||
|
|
||||||
|
try:
|
||||||
|
from local_runner import configure
|
||||||
|
except ImportError:
|
||||||
|
logger.error("Could not import local_runner module. Local development mode may not work.")
|
||||||
|
|
||||||
|
# Load environment variables
|
||||||
|
load_dotenv(override=True)
|
||||||
|
|
||||||
|
|
||||||
|
async def main(room_url: str, token: str):
|
||||||
|
"""Main pipeline setup and execution function.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
room_url: The Daily room URL
|
||||||
|
token: The Daily room token
|
||||||
|
"""
|
||||||
|
logger.debug("Starting bot in room: {}", room_url)
|
||||||
|
|
||||||
|
transport = DailyTransport(
|
||||||
|
room_url,
|
||||||
|
token,
|
||||||
|
"bot",
|
||||||
|
DailyParams(
|
||||||
|
audio_out_enabled=True,
|
||||||
|
transcription_enabled=True,
|
||||||
|
vad_enabled=True,
|
||||||
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
tts = CartesiaTTSService(
|
||||||
|
api_key=os.getenv("CARTESIA_API_KEY"), voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22"
|
||||||
|
)
|
||||||
|
|
||||||
|
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
context = OpenAILLMContext(messages)
|
||||||
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
|
pipeline = Pipeline(
|
||||||
|
[
|
||||||
|
transport.input(),
|
||||||
|
context_aggregator.user(),
|
||||||
|
llm,
|
||||||
|
tts,
|
||||||
|
transport.output(),
|
||||||
|
context_aggregator.assistant(),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
task = PipelineTask(
|
||||||
|
pipeline,
|
||||||
|
params=PipelineParams(
|
||||||
|
allow_interruptions=True,
|
||||||
|
enable_metrics=True,
|
||||||
|
enable_usage_metrics=True,
|
||||||
|
report_only_initial_ttfb=True,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
@transport.event_handler("on_first_participant_joined")
|
||||||
|
async def on_first_participant_joined(transport, participant):
|
||||||
|
logger.info("First participant joined: {}", participant["id"])
|
||||||
|
await transport.capture_participant_transcription(participant["id"])
|
||||||
|
# Kick off the conversation.
|
||||||
|
messages.append(
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "Please start with 'Hello World' and introduce yourself to the user.",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||||
|
|
||||||
|
@transport.event_handler("on_participant_left")
|
||||||
|
async def on_participant_left(transport, participant, reason):
|
||||||
|
logger.info("Participant left: {}", participant)
|
||||||
|
await task.cancel()
|
||||||
|
|
||||||
|
runner = PipelineRunner()
|
||||||
|
|
||||||
|
await runner.run(task)
|
||||||
|
|
||||||
|
|
||||||
|
async def bot(args: DailySessionArguments):
|
||||||
|
"""Main bot entry point compatible with the FastAPI route handler.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
room_url: The Daily room URL
|
||||||
|
token: The Daily room token
|
||||||
|
body: The configuration object from the request body
|
||||||
|
session_id: The session ID for logging
|
||||||
|
"""
|
||||||
|
logger.info(f"Bot process initialized {args.room_url} {args.token}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
await main(args.room_url, args.token)
|
||||||
|
logger.info("Bot process completed")
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"Error in bot process: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
# Local development functions
|
||||||
|
async def local_main():
|
||||||
|
"""Function for local development testing."""
|
||||||
|
try:
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
(room_url, token) = await configure(session)
|
||||||
|
logger.warning("_")
|
||||||
|
logger.warning("_")
|
||||||
|
logger.warning(f"Talk to your voice agent here: {room_url}")
|
||||||
|
logger.warning("_")
|
||||||
|
logger.warning("_")
|
||||||
|
webbrowser.open(room_url)
|
||||||
|
await main(room_url, token)
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"Error in local development mode: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
# Local development entry point
|
||||||
|
if LOCAL_RUN and __name__ == "__main__":
|
||||||
|
try:
|
||||||
|
asyncio.run(local_main())
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"Failed to run in local mode: {e}")
|
||||||
2
examples/deployment/pipecat-cloud-example/env.example
Normal file
2
examples/deployment/pipecat-cloud-example/env.example
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
CARTESIA_API_KEY=
|
||||||
|
OPENAI_API_KEY=
|
||||||
46
examples/deployment/pipecat-cloud-example/local_runner.py
Normal file
46
examples/deployment/pipecat-cloud-example/local_runner.py
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2024–2025, Daily
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: BSD 2-Clause License
|
||||||
|
#
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
|
||||||
|
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomParams
|
||||||
|
|
||||||
|
|
||||||
|
async def configure(aiohttp_session: aiohttp.ClientSession):
|
||||||
|
(url, token) = await configure_with_args(aiohttp_session)
|
||||||
|
return (url, token)
|
||||||
|
|
||||||
|
|
||||||
|
async def configure_with_args(aiohttp_session: aiohttp.ClientSession = None):
|
||||||
|
key = os.getenv("DAILY_API_KEY")
|
||||||
|
if not key:
|
||||||
|
raise Exception(
|
||||||
|
"No Daily API key specified. set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers."
|
||||||
|
)
|
||||||
|
|
||||||
|
daily_rest_helper = DailyRESTHelper(
|
||||||
|
daily_api_key=key,
|
||||||
|
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
|
||||||
|
aiohttp_session=aiohttp_session,
|
||||||
|
)
|
||||||
|
|
||||||
|
room = await daily_rest_helper.create_room(
|
||||||
|
DailyRoomParams(properties={"enable_prejoin_ui": False})
|
||||||
|
)
|
||||||
|
if not room.url:
|
||||||
|
raise HTTPException(status_code=500, detail="Failed to create room")
|
||||||
|
|
||||||
|
url = room.url
|
||||||
|
|
||||||
|
# Create a meeting token for the given room with an expiration 1 hour in
|
||||||
|
# the future.
|
||||||
|
expiry_time: float = 60 * 60
|
||||||
|
|
||||||
|
token = await daily_rest_helper.get_token(url, expiry_time)
|
||||||
|
|
||||||
|
return (url, token)
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
agent_name = "my-first-agent"
|
||||||
|
image = "your-username/my-first-agent:0.1"
|
||||||
|
secret_set = "my-first-agent-secrets"
|
||||||
|
|
||||||
|
[scaling]
|
||||||
|
min_instances = 0
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
pipecatcloud
|
||||||
|
pipecat-ai[cartesia,daily,openai,silero]>=0.0.58
|
||||||
|
python-dotenv~=1.0.1
|
||||||
57
examples/foundational/01-say-one-thing-piper.py
Normal file
57
examples/foundational/01-say-one-thing-piper.py
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2024–2025, Daily
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: BSD 2-Clause License
|
||||||
|
#
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from loguru import logger
|
||||||
|
from runner import configure
|
||||||
|
|
||||||
|
from pipecat.frames.frames import EndFrame, TTSSpeakFrame
|
||||||
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
|
from pipecat.pipeline.task import PipelineTask
|
||||||
|
from pipecat.services.piper.tts import PiperTTSService
|
||||||
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
|
load_dotenv(override=True)
|
||||||
|
|
||||||
|
logger.remove(0)
|
||||||
|
logger.add(sys.stderr, level="DEBUG")
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
(room_url, _) = await configure(session)
|
||||||
|
|
||||||
|
transport = DailyTransport(
|
||||||
|
room_url, None, "Say One Thing", DailyParams(audio_out_enabled=True)
|
||||||
|
)
|
||||||
|
|
||||||
|
tts = PiperTTSService(
|
||||||
|
base_url=os.getenv("PIPER_BASE_URL"), aiohttp_session=session, sample_rate=24000
|
||||||
|
)
|
||||||
|
|
||||||
|
runner = PipelineRunner()
|
||||||
|
|
||||||
|
task = PipelineTask(Pipeline([tts, transport.output()]))
|
||||||
|
|
||||||
|
# Register an event handler so we can play the audio when the
|
||||||
|
# participant joins.
|
||||||
|
@transport.event_handler("on_first_participant_joined")
|
||||||
|
async def on_first_participant_joined(transport, participant):
|
||||||
|
await task.queue_frames(
|
||||||
|
[TTSSpeakFrame(f"Hello there, how are you today ?"), EndFrame()]
|
||||||
|
)
|
||||||
|
|
||||||
|
await runner.run(task)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
@@ -17,7 +17,7 @@ from pipecat.frames.frames import EndFrame, TTSSpeakFrame
|
|||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineTask
|
from pipecat.pipeline.task import PipelineTask
|
||||||
from pipecat.services.cartesia import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -36,7 +36,7 @@ async def main():
|
|||||||
|
|
||||||
tts = CartesiaTTSService(
|
tts = CartesiaTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
)
|
)
|
||||||
|
|
||||||
runner = PipelineRunner()
|
runner = PipelineRunner()
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ from pipecat.frames.frames import EndFrame, TTSSpeakFrame
|
|||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineTask
|
from pipecat.pipeline.task import PipelineTask
|
||||||
from pipecat.services.cartesia import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.transports.local.audio import LocalAudioTransport, LocalAudioTransportParams
|
from pipecat.transports.local.audio import LocalAudioTransport, LocalAudioTransportParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -29,7 +29,7 @@ async def main():
|
|||||||
|
|
||||||
tts = CartesiaTTSService(
|
tts = CartesiaTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
)
|
)
|
||||||
|
|
||||||
pipeline = Pipeline([tts, transport.output()])
|
pipeline = Pipeline([tts, transport.output()])
|
||||||
|
|||||||
@@ -1,3 +1,9 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2024–2025, Daily
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: BSD 2-Clause License
|
||||||
|
#
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import asyncio
|
import asyncio
|
||||||
import os
|
import os
|
||||||
@@ -12,7 +18,7 @@ from pipecat.frames.frames import TextFrame
|
|||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineTask
|
from pipecat.pipeline.task import PipelineTask
|
||||||
from pipecat.services.cartesia import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.transports.services.livekit import LiveKitParams, LiveKitTransport
|
from pipecat.transports.services.livekit import LiveKitParams, LiveKitTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -83,7 +89,7 @@ async def main():
|
|||||||
|
|
||||||
tts = CartesiaTTSService(
|
tts = CartesiaTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
)
|
)
|
||||||
|
|
||||||
runner = PipelineRunner()
|
runner = PipelineRunner()
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ from pipecat.frames.frames import EndFrame, TTSSpeakFrame
|
|||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineTask
|
from pipecat.pipeline.task import PipelineTask
|
||||||
from pipecat.services.riva import FastPitchTTSService
|
from pipecat.services.riva.tts import FastPitchTTSService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|||||||
@@ -17,8 +17,8 @@ from pipecat.frames.frames import EndFrame, LLMMessagesFrame
|
|||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineTask
|
from pipecat.pipeline.task import PipelineTask
|
||||||
from pipecat.services.cartesia import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.services.openai import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -37,7 +37,7 @@ async def main():
|
|||||||
|
|
||||||
tts = CartesiaTTSService(
|
tts = CartesiaTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
)
|
)
|
||||||
|
|
||||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ from pipecat.frames.frames import TextFrame
|
|||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineTask
|
from pipecat.pipeline.task import PipelineTask
|
||||||
from pipecat.services.fal import FalImageGenService
|
from pipecat.services.fal.image import FalImageGenService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ from pipecat.frames.frames import TextFrame
|
|||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineTask
|
from pipecat.pipeline.task import PipelineTask
|
||||||
from pipecat.services.fal import FalImageGenService
|
from pipecat.services.fal.image import FalImageGenService
|
||||||
from pipecat.transports.local.tk import TkLocalTransport, TkTransportParams
|
from pipecat.transports.local.tk import TkLocalTransport, TkTransportParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ from pipecat.frames.frames import EndFrame, TextFrame
|
|||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.services.google import GoogleImageGenService
|
from pipecat.services.google.image import GoogleImageGenService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|||||||
@@ -27,9 +27,9 @@ from pipecat.pipeline.sync_parallel_pipeline import SyncParallelPipeline
|
|||||||
from pipecat.pipeline.task import PipelineTask
|
from pipecat.pipeline.task import PipelineTask
|
||||||
from pipecat.processors.aggregators.sentence import SentenceAggregator
|
from pipecat.processors.aggregators.sentence import SentenceAggregator
|
||||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||||
from pipecat.services.cartesia import CartesiaHttpTTSService
|
from pipecat.services.cartesia.tts import CartesiaHttpTTSService
|
||||||
from pipecat.services.fal import FalImageGenService
|
from pipecat.services.fal.image import FalImageGenService
|
||||||
from pipecat.services.openai import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -87,7 +87,7 @@ async def main():
|
|||||||
|
|
||||||
tts = CartesiaHttpTTSService(
|
tts = CartesiaHttpTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
)
|
)
|
||||||
|
|
||||||
imagegen = FalImageGenService(
|
imagegen = FalImageGenService(
|
||||||
|
|||||||
@@ -27,9 +27,9 @@ from pipecat.pipeline.sync_parallel_pipeline import SyncParallelPipeline
|
|||||||
from pipecat.pipeline.task import PipelineTask
|
from pipecat.pipeline.task import PipelineTask
|
||||||
from pipecat.processors.aggregators.sentence import SentenceAggregator
|
from pipecat.processors.aggregators.sentence import SentenceAggregator
|
||||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||||
from pipecat.services.cartesia import CartesiaHttpTTSService
|
from pipecat.services.cartesia.tts import CartesiaHttpTTSService
|
||||||
from pipecat.services.fal import FalImageGenService
|
from pipecat.services.fal.image import FalImageGenService
|
||||||
from pipecat.services.openai import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.local.tk import TkLocalTransport, TkTransportParams
|
from pipecat.transports.local.tk import TkLocalTransport, TkTransportParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -97,7 +97,7 @@ async def main():
|
|||||||
|
|
||||||
tts = CartesiaHttpTTSService(
|
tts = CartesiaHttpTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
)
|
)
|
||||||
|
|
||||||
imagegen = FalImageGenService(
|
imagegen = FalImageGenService(
|
||||||
|
|||||||
@@ -26,8 +26,8 @@ from pipecat.pipeline.runner import PipelineRunner
|
|||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||||
from pipecat.services.cartesia import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.services.openai import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -74,7 +74,7 @@ async def main():
|
|||||||
|
|
||||||
tts = CartesiaTTSService(
|
tts = CartesiaTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
)
|
)
|
||||||
|
|
||||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||||
|
|||||||
@@ -27,8 +27,8 @@ from pipecat.pipeline.runner import PipelineRunner
|
|||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||||
from pipecat.services.cartesia import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.services.openai import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -93,7 +93,7 @@ async def main():
|
|||||||
|
|
||||||
tts = CartesiaTTSService(
|
tts = CartesiaTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
)
|
)
|
||||||
|
|
||||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||||
|
|||||||
@@ -18,8 +18,8 @@ from pipecat.pipeline.runner import PipelineRunner
|
|||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.audio.vad.silero import SileroVAD
|
from pipecat.processors.audio.vad.silero import SileroVAD
|
||||||
from pipecat.services.cartesia import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.services.openai import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -47,7 +47,7 @@ async def main():
|
|||||||
|
|
||||||
tts = CartesiaTTSService(
|
tts = CartesiaTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
)
|
)
|
||||||
|
|
||||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||||
|
|||||||
@@ -18,8 +18,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
|||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.services.cartesia import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.services.openai import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -46,7 +46,7 @@ async def main():
|
|||||||
|
|
||||||
tts = CartesiaTTSService(
|
tts = CartesiaTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
)
|
)
|
||||||
|
|
||||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||||
|
|||||||
@@ -18,8 +18,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
|||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.services.anthropic import AnthropicLLMService
|
from pipecat.services.anthropic.llm import AnthropicLLMService
|
||||||
from pipecat.services.cartesia import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -46,7 +46,7 @@ async def main():
|
|||||||
|
|
||||||
tts = CartesiaTTSService(
|
tts = CartesiaTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
)
|
)
|
||||||
|
|
||||||
llm = AnthropicLLMService(
|
llm = AnthropicLLMService(
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ from pipecat.processors.aggregators.llm_response import (
|
|||||||
LLMUserResponseAggregator,
|
LLMUserResponseAggregator,
|
||||||
)
|
)
|
||||||
from pipecat.processors.frameworks.langchain import LangchainProcessor
|
from pipecat.processors.frameworks.langchain import LangchainProcessor
|
||||||
from pipecat.services.cartesia import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -64,7 +64,7 @@ async def main():
|
|||||||
|
|
||||||
tts = CartesiaTTSService(
|
tts = CartesiaTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
)
|
)
|
||||||
|
|
||||||
prompt = ChatPromptTemplate.from_messages(
|
prompt = ChatPromptTemplate.from_messages(
|
||||||
|
|||||||
@@ -24,8 +24,9 @@ from pipecat.pipeline.pipeline import Pipeline
|
|||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.services.deepgram import DeepgramSTTService, DeepgramTTSService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.openai import OpenAILLMService
|
from pipecat.services.deepgram.tts import DeepgramTTSService
|
||||||
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|||||||
@@ -18,8 +18,9 @@ from pipecat.pipeline.pipeline import Pipeline
|
|||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.services.deepgram import DeepgramSTTService, DeepgramTTSService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.openai import OpenAILLMService
|
from pipecat.services.deepgram.tts import DeepgramTTSService
|
||||||
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|||||||
103
examples/foundational/07d-interruptible-elevenlabs-http.py
Normal file
103
examples/foundational/07d-interruptible-elevenlabs-http.py
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2024–2025, Daily
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: BSD 2-Clause License
|
||||||
|
#
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from loguru import logger
|
||||||
|
from runner import configure
|
||||||
|
|
||||||
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
|
from pipecat.services.elevenlabs.tts import ElevenLabsHttpTTSService
|
||||||
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
|
load_dotenv(override=True)
|
||||||
|
|
||||||
|
logger.remove(0)
|
||||||
|
logger.add(sys.stderr, level="DEBUG")
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
(room_url, token) = await configure(session)
|
||||||
|
|
||||||
|
transport = DailyTransport(
|
||||||
|
room_url,
|
||||||
|
token,
|
||||||
|
"Respond bot",
|
||||||
|
DailyParams(
|
||||||
|
audio_out_enabled=True,
|
||||||
|
transcription_enabled=True,
|
||||||
|
vad_enabled=True,
|
||||||
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
tts = ElevenLabsHttpTTSService(
|
||||||
|
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
|
||||||
|
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
|
||||||
|
aiohttp_session=session,
|
||||||
|
)
|
||||||
|
|
||||||
|
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
context = OpenAILLMContext(messages)
|
||||||
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
|
pipeline = Pipeline(
|
||||||
|
[
|
||||||
|
transport.input(), # Transport user input
|
||||||
|
context_aggregator.user(), # User responses
|
||||||
|
llm, # LLM
|
||||||
|
tts, # TTS
|
||||||
|
transport.output(), # Transport bot output
|
||||||
|
context_aggregator.assistant(), # Assistant spoken responses
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
task = PipelineTask(
|
||||||
|
pipeline,
|
||||||
|
params=PipelineParams(
|
||||||
|
allow_interruptions=True,
|
||||||
|
enable_metrics=True,
|
||||||
|
enable_usage_metrics=True,
|
||||||
|
report_only_initial_ttfb=True,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
@transport.event_handler("on_first_participant_joined")
|
||||||
|
async def on_first_participant_joined(transport, participant):
|
||||||
|
await transport.capture_participant_transcription(participant["id"])
|
||||||
|
# Kick off the conversation.
|
||||||
|
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||||
|
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||||
|
|
||||||
|
@transport.event_handler("on_participant_left")
|
||||||
|
async def on_participant_left(transport, participant, reason):
|
||||||
|
await task.cancel()
|
||||||
|
|
||||||
|
runner = PipelineRunner()
|
||||||
|
|
||||||
|
await runner.run(task)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
@@ -18,8 +18,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
|||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
|
||||||
from pipecat.services.openai import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|||||||
@@ -18,8 +18,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
|||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.services.openai import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.services.playht import PlayHTHttpTTSService
|
from pipecat.services.playht.tts import PlayHTHttpTTSService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|||||||
@@ -18,8 +18,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
|||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.services.openai import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.services.playht import PlayHTTTSService
|
from pipecat.services.playht.tts import PlayHTTTSService
|
||||||
from pipecat.transcriptions.language import Language
|
from pipecat.transcriptions.language import Language
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
@@ -48,7 +48,7 @@ async def main():
|
|||||||
tts = PlayHTTTSService(
|
tts = PlayHTTTSService(
|
||||||
user_id=os.getenv("PLAYHT_USER_ID"),
|
user_id=os.getenv("PLAYHT_USER_ID"),
|
||||||
api_key=os.getenv("PLAYHT_API_KEY"),
|
api_key=os.getenv("PLAYHT_API_KEY"),
|
||||||
voice_url="s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json",
|
voice_url="s3://voice-cloning-zero-shot/e46b4027-b38d-4d24-b292-38fbca2be0ef/original/manifest.json",
|
||||||
params=PlayHTTTSService.InputParams(language=Language.EN),
|
params=PlayHTTTSService.InputParams(language=Language.EN),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -18,7 +18,9 @@ from pipecat.pipeline.pipeline import Pipeline
|
|||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.services.azure import AzureLLMService, AzureSTTService, AzureTTSService
|
from pipecat.services.azure.llm import AzureLLMService
|
||||||
|
from pipecat.services.azure.stt import AzureSTTService
|
||||||
|
from pipecat.services.azure.tts import AzureTTSService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|||||||
@@ -18,7 +18,9 @@ from pipecat.pipeline.pipeline import Pipeline
|
|||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.services.openai import OpenAILLMService, OpenAISTTService, OpenAITTSService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
|
from pipecat.services.openai.stt import OpenAISTTService
|
||||||
|
from pipecat.services.openai.tts import OpenAITTSService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -51,16 +53,20 @@ async def main():
|
|||||||
# api_key="gsk_***",
|
# api_key="gsk_***",
|
||||||
# model="whisper-large-v3",
|
# model="whisper-large-v3",
|
||||||
# )
|
# )
|
||||||
stt = OpenAISTTService(api_key=os.getenv("OPENAI_API_KEY"), model="whisper-1")
|
stt = OpenAISTTService(
|
||||||
|
api_key=os.getenv("OPENAI_API_KEY"),
|
||||||
|
model="gpt-4o-transcribe-latest",
|
||||||
|
prompt="Expect words related to dogs, such as breed names.",
|
||||||
|
)
|
||||||
|
|
||||||
tts = OpenAITTSService(api_key=os.getenv("OPENAI_API_KEY"), voice="alloy")
|
tts = OpenAITTSService(api_key=os.getenv("OPENAI_API_KEY"), voice="ballad")
|
||||||
|
|
||||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||||
|
|
||||||
messages = [
|
messages = [
|
||||||
{
|
{
|
||||||
"role": "system",
|
"role": "system",
|
||||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
"content": "You are very knowledgable about dogs. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@@ -19,8 +19,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
|||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.services.cartesia import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.services.openpipe import OpenPipeLLMService
|
from pipecat.services.openpipe.llm import OpenPipeLLMService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -47,7 +47,7 @@ async def main():
|
|||||||
|
|
||||||
tts = CartesiaTTSService(
|
tts = CartesiaTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
)
|
)
|
||||||
|
|
||||||
timestamp = int(time.time())
|
timestamp = int(time.time())
|
||||||
|
|||||||
@@ -18,8 +18,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
|||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.services.openai import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.services.xtts import XTTSService
|
from pipecat.services.xtts.tts import XTTSService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|||||||
@@ -18,9 +18,11 @@ from pipecat.pipeline.pipeline import Pipeline
|
|||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.services.cartesia import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.services.gladia import GladiaSTTService
|
from pipecat.services.gladia.config import GladiaInputParams, LanguageConfig
|
||||||
from pipecat.services.openai import OpenAILLMService
|
from pipecat.services.gladia.stt import GladiaSTTService
|
||||||
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
|
from pipecat.transcriptions.language import Language
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -47,11 +49,16 @@ async def main():
|
|||||||
|
|
||||||
stt = GladiaSTTService(
|
stt = GladiaSTTService(
|
||||||
api_key=os.getenv("GLADIA_API_KEY"),
|
api_key=os.getenv("GLADIA_API_KEY"),
|
||||||
|
params=GladiaInputParams(
|
||||||
|
language_config=LanguageConfig(
|
||||||
|
languages=[Language.EN],
|
||||||
|
)
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
tts = CartesiaTTSService(
|
tts = CartesiaTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
)
|
)
|
||||||
|
|
||||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||||
|
|||||||
@@ -18,8 +18,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
|||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.services.lmnt import LmntTTSService
|
from pipecat.services.lmnt.tts import LmntTTSService
|
||||||
from pipecat.services.openai import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|||||||
@@ -17,9 +17,9 @@ from pipecat.audio.vad.silero import SileroVADAnalyzer
|
|||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.services.ai_services import OpenAILLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.services.cartesia import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.services.together import TogetherLLMService
|
from pipecat.services.together.llm import TogetherLLMService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -46,7 +46,7 @@ async def main():
|
|||||||
|
|
||||||
tts = CartesiaTTSService(
|
tts = CartesiaTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
)
|
)
|
||||||
|
|
||||||
llm = TogetherLLMService(
|
llm = TogetherLLMService(
|
||||||
|
|||||||
@@ -18,9 +18,9 @@ from pipecat.pipeline.pipeline import Pipeline
|
|||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.services.aws import PollyTTSService
|
from pipecat.services.aws.tts import PollyTTSService
|
||||||
from pipecat.services.deepgram import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.openai import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|||||||
@@ -18,7 +18,9 @@ from pipecat.pipeline.pipeline import Pipeline
|
|||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.services.google import GoogleLLMService, GoogleSTTService, GoogleTTSService
|
from pipecat.services.google.llm import GoogleLLMService
|
||||||
|
from pipecat.services.google.stt import GoogleSTTService
|
||||||
|
from pipecat.services.google.tts import GoogleTTSService
|
||||||
from pipecat.transcriptions.language import Language
|
from pipecat.transcriptions.language import Language
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
|
|||||||
@@ -18,9 +18,9 @@ from pipecat.pipeline.pipeline import Pipeline
|
|||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.services.assemblyai import AssemblyAISTTService
|
from pipecat.services.assemblyai.stt import AssemblyAISTTService
|
||||||
from pipecat.services.cartesia import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.services.openai import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -51,7 +51,7 @@ async def main():
|
|||||||
|
|
||||||
tts = CartesiaTTSService(
|
tts = CartesiaTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
)
|
)
|
||||||
|
|
||||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||||
|
|||||||
@@ -14,14 +14,15 @@ from loguru import logger
|
|||||||
from runner import configure
|
from runner import configure
|
||||||
|
|
||||||
from pipecat.audio.filters.krisp_filter import KrispFilter
|
from pipecat.audio.filters.krisp_filter import KrispFilter
|
||||||
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.services.deepgram import DeepgramSTTService, DeepgramTTSService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.openai import OpenAILLMService
|
from pipecat.services.deepgram.tts import DeepgramTTSService
|
||||||
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
from pipecat.vad.silero import SileroVADAnalyzer
|
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
|
|||||||
103
examples/foundational/07q-interruptible-rime-http.py
Normal file
103
examples/foundational/07q-interruptible-rime-http.py
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2024–2025, Daily
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: BSD 2-Clause License
|
||||||
|
#
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from loguru import logger
|
||||||
|
from runner import configure
|
||||||
|
|
||||||
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
|
from pipecat.services.rime.tts import RimeHttpTTSService
|
||||||
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
|
load_dotenv(override=True)
|
||||||
|
|
||||||
|
logger.remove(0)
|
||||||
|
logger.add(sys.stderr, level="DEBUG")
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
(room_url, token) = await configure(session)
|
||||||
|
|
||||||
|
transport = DailyTransport(
|
||||||
|
room_url,
|
||||||
|
token,
|
||||||
|
"Respond bot",
|
||||||
|
DailyParams(
|
||||||
|
audio_out_enabled=True,
|
||||||
|
transcription_enabled=True,
|
||||||
|
vad_enabled=True,
|
||||||
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
tts = RimeHttpTTSService(
|
||||||
|
api_key=os.getenv("RIME_API_KEY", ""),
|
||||||
|
voice_id="rex",
|
||||||
|
aiohttp_session=session,
|
||||||
|
)
|
||||||
|
|
||||||
|
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
context = OpenAILLMContext(messages)
|
||||||
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
|
pipeline = Pipeline(
|
||||||
|
[
|
||||||
|
transport.input(), # Transport user input
|
||||||
|
context_aggregator.user(), # User responses
|
||||||
|
llm, # LLM
|
||||||
|
tts, # TTS
|
||||||
|
transport.output(), # Transport bot output
|
||||||
|
context_aggregator.assistant(), # Assistant spoken responses
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
task = PipelineTask(
|
||||||
|
pipeline,
|
||||||
|
params=PipelineParams(
|
||||||
|
allow_interruptions=True,
|
||||||
|
enable_metrics=True,
|
||||||
|
enable_usage_metrics=True,
|
||||||
|
report_only_initial_ttfb=True,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
@transport.event_handler("on_first_participant_joined")
|
||||||
|
async def on_first_participant_joined(transport, participant):
|
||||||
|
await transport.capture_participant_transcription(participant["id"])
|
||||||
|
# Kick off the conversation.
|
||||||
|
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||||
|
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||||
|
|
||||||
|
@transport.event_handler("on_participant_left")
|
||||||
|
async def on_participant_left(transport, participant, reason):
|
||||||
|
await task.cancel()
|
||||||
|
|
||||||
|
runner = PipelineRunner()
|
||||||
|
|
||||||
|
await runner.run(task)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
@@ -18,8 +18,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
|||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.services.openai import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.services.rime import RimeTTSService
|
from pipecat.services.rime.tts import RimeTTSService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|||||||
@@ -18,8 +18,9 @@ from pipecat.pipeline.pipeline import Pipeline
|
|||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.services.nim import NimLLMService
|
from pipecat.services.nim.llm import NimLLMService
|
||||||
from pipecat.services.riva import FastPitchTTSService, ParakeetSTTService
|
from pipecat.services.riva.stt import ParakeetSTTService
|
||||||
|
from pipecat.services.riva.tts import FastPitchTTSService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|||||||
@@ -32,8 +32,8 @@ from pipecat.pipeline.runner import PipelineRunner
|
|||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.frame_processor import FrameProcessor
|
from pipecat.processors.frame_processor import FrameProcessor
|
||||||
from pipecat.services.cartesia import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.services.google import GoogleLLMService
|
from pipecat.services.google.llm import GoogleLLMService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -213,7 +213,7 @@ async def main():
|
|||||||
|
|
||||||
tts = CartesiaTTSService(
|
tts = CartesiaTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
)
|
)
|
||||||
|
|
||||||
llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"), model="gemini-2.0-flash-001")
|
llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"), model="gemini-2.0-flash-001")
|
||||||
|
|||||||
@@ -18,8 +18,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
|||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.services.fish import FishAudioTTSService
|
from pipecat.services.fish.tts import FishAudioTTSService
|
||||||
from pipecat.services.openai import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|||||||
91
examples/foundational/07u-interruptible-ultravox.py
Normal file
91
examples/foundational/07u-interruptible-ultravox.py
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2024–2025, Daily
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: BSD 2-Clause License
|
||||||
|
#
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from loguru import logger
|
||||||
|
from runner import configure
|
||||||
|
|
||||||
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
|
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||||
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
|
from pipecat.services.ultravox.stt import UltravoxSTTService
|
||||||
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
|
load_dotenv(override=True)
|
||||||
|
|
||||||
|
# NOTE: This example requires GPU resources to run efficiently.
|
||||||
|
# The Ultravox model is compute-intensive and performs best with GPU acceleration.
|
||||||
|
# This can be deployed on cloud GPU providers like Cerebrium.ai for optimal performance.
|
||||||
|
|
||||||
|
logger.remove(0)
|
||||||
|
logger.add(sys.stderr, level="DEBUG")
|
||||||
|
|
||||||
|
# Want to initialize the ultravox processor since it takes time to load the model and dont
|
||||||
|
# want to load it every time the pipeline is run
|
||||||
|
ultravox_processor = UltravoxSTTService(
|
||||||
|
model_name="fixie-ai/ultravox-v0_5-llama-3_1-8b",
|
||||||
|
hf_token=os.getenv("HF_TOKEN"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
(room_url, token) = await configure(session)
|
||||||
|
|
||||||
|
transport = DailyTransport(
|
||||||
|
room_url,
|
||||||
|
token,
|
||||||
|
"Respond bot",
|
||||||
|
DailyParams(
|
||||||
|
audio_out_enabled=True,
|
||||||
|
transcription_enabled=False,
|
||||||
|
vad_enabled=True,
|
||||||
|
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||||
|
vad_audio_passthrough=True,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
tts = CartesiaTTSService(
|
||||||
|
api_key=os.environ.get("CARTESIA_API_KEY"),
|
||||||
|
voice_id="97f4b8fb-f2fe-444b-bb9a-c109783a857a",
|
||||||
|
)
|
||||||
|
|
||||||
|
pipeline = Pipeline(
|
||||||
|
[
|
||||||
|
transport.input(), # Transport user input
|
||||||
|
ultravox_processor,
|
||||||
|
tts, # TTS
|
||||||
|
transport.output(), # Transport bot output
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
task = PipelineTask(
|
||||||
|
pipeline,
|
||||||
|
params=PipelineParams(
|
||||||
|
allow_interruptions=True,
|
||||||
|
enable_metrics=True,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
@transport.event_handler("on_participant_left")
|
||||||
|
async def on_participant_left(transport, participant, reason):
|
||||||
|
await task.cancel()
|
||||||
|
|
||||||
|
runner = PipelineRunner()
|
||||||
|
|
||||||
|
await runner.run(task)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
102
examples/foundational/07v-interruptible-neuphonic-http.py
Normal file
102
examples/foundational/07v-interruptible-neuphonic-http.py
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2024–2025, Daily
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: BSD 2-Clause License
|
||||||
|
#
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from loguru import logger
|
||||||
|
from runner import configure
|
||||||
|
|
||||||
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
|
from pipecat.services.neuphonic.tts import NeuphonicHttpTTSService
|
||||||
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
|
load_dotenv(override=True)
|
||||||
|
|
||||||
|
logger.remove(0)
|
||||||
|
logger.add(sys.stderr, level="DEBUG")
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
(room_url, token) = await configure(session)
|
||||||
|
|
||||||
|
transport = DailyTransport(
|
||||||
|
room_url,
|
||||||
|
token,
|
||||||
|
"Respond bot",
|
||||||
|
DailyParams(
|
||||||
|
audio_out_enabled=True,
|
||||||
|
transcription_enabled=True,
|
||||||
|
vad_enabled=True,
|
||||||
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
tts = NeuphonicHttpTTSService(
|
||||||
|
api_key=os.getenv("NEUPHONIC_API_KEY"),
|
||||||
|
voice_id="fc854436-2dac-4d21-aa69-ae17b54e98eb", # Emily
|
||||||
|
)
|
||||||
|
|
||||||
|
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
context = OpenAILLMContext(messages)
|
||||||
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
|
pipeline = Pipeline(
|
||||||
|
[
|
||||||
|
transport.input(), # Transport user input
|
||||||
|
context_aggregator.user(), # User responses
|
||||||
|
llm, # LLM
|
||||||
|
tts, # TTS
|
||||||
|
transport.output(), # Transport bot output
|
||||||
|
context_aggregator.assistant(), # Assistant spoken responses
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
task = PipelineTask(
|
||||||
|
pipeline,
|
||||||
|
params=PipelineParams(
|
||||||
|
allow_interruptions=True,
|
||||||
|
enable_metrics=True,
|
||||||
|
enable_usage_metrics=True,
|
||||||
|
report_only_initial_ttfb=True,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
@transport.event_handler("on_first_participant_joined")
|
||||||
|
async def on_first_participant_joined(transport, participant):
|
||||||
|
await transport.capture_participant_transcription(participant["id"])
|
||||||
|
# Kick off the conversation.
|
||||||
|
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||||
|
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||||
|
|
||||||
|
@transport.event_handler("on_participant_left")
|
||||||
|
async def on_participant_left(transport, participant, reason):
|
||||||
|
await task.cancel()
|
||||||
|
|
||||||
|
runner = PipelineRunner()
|
||||||
|
|
||||||
|
await runner.run(task)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
102
examples/foundational/07v-interruptible-neuphonic.py
Normal file
102
examples/foundational/07v-interruptible-neuphonic.py
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2024–2025, Daily
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: BSD 2-Clause License
|
||||||
|
#
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from loguru import logger
|
||||||
|
from runner import configure
|
||||||
|
|
||||||
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
|
from pipecat.services.neuphonic.tts import NeuphonicTTSService
|
||||||
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
|
load_dotenv(override=True)
|
||||||
|
|
||||||
|
logger.remove(0)
|
||||||
|
logger.add(sys.stderr, level="DEBUG")
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
(room_url, token) = await configure(session)
|
||||||
|
|
||||||
|
transport = DailyTransport(
|
||||||
|
room_url,
|
||||||
|
token,
|
||||||
|
"Respond bot",
|
||||||
|
DailyParams(
|
||||||
|
audio_out_enabled=True,
|
||||||
|
transcription_enabled=True,
|
||||||
|
vad_enabled=True,
|
||||||
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
tts = NeuphonicTTSService(
|
||||||
|
api_key=os.getenv("NEUPHONIC_API_KEY"),
|
||||||
|
voice_id="fc854436-2dac-4d21-aa69-ae17b54e98eb", # Emily
|
||||||
|
)
|
||||||
|
|
||||||
|
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
context = OpenAILLMContext(messages)
|
||||||
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
|
pipeline = Pipeline(
|
||||||
|
[
|
||||||
|
transport.input(), # Transport user input
|
||||||
|
context_aggregator.user(), # User responses
|
||||||
|
llm, # LLM
|
||||||
|
tts, # TTS
|
||||||
|
transport.output(), # Transport bot output
|
||||||
|
context_aggregator.assistant(), # Assistant spoken responses
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
task = PipelineTask(
|
||||||
|
pipeline,
|
||||||
|
params=PipelineParams(
|
||||||
|
allow_interruptions=True,
|
||||||
|
enable_metrics=True,
|
||||||
|
enable_usage_metrics=True,
|
||||||
|
report_only_initial_ttfb=True,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
@transport.event_handler("on_first_participant_joined")
|
||||||
|
async def on_first_participant_joined(transport, participant):
|
||||||
|
await transport.capture_participant_transcription(participant["id"])
|
||||||
|
# Kick off the conversation.
|
||||||
|
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||||
|
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||||
|
|
||||||
|
@transport.event_handler("on_participant_left")
|
||||||
|
async def on_participant_left(transport, participant, reason):
|
||||||
|
await task.cancel()
|
||||||
|
|
||||||
|
runner = PipelineRunner()
|
||||||
|
|
||||||
|
await runner.run(task)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
109
examples/foundational/07w-interruptible-fal.py
Normal file
109
examples/foundational/07w-interruptible-fal.py
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2024–2025, Daily
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: BSD 2-Clause License
|
||||||
|
#
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from loguru import logger
|
||||||
|
from runner import configure
|
||||||
|
|
||||||
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
|
from pipecat.services.fal.stt import FalSTTService
|
||||||
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
|
load_dotenv(override=True)
|
||||||
|
|
||||||
|
logger.remove(0)
|
||||||
|
logger.add(sys.stderr, level="DEBUG")
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
(room_url, token) = await configure(session)
|
||||||
|
|
||||||
|
transport = DailyTransport(
|
||||||
|
room_url,
|
||||||
|
token,
|
||||||
|
"Respond bot",
|
||||||
|
DailyParams(
|
||||||
|
audio_out_enabled=True,
|
||||||
|
vad_enabled=True,
|
||||||
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
|
vad_audio_passthrough=True,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
stt = FalSTTService(
|
||||||
|
api_key=os.getenv("FAL_KEY"),
|
||||||
|
)
|
||||||
|
|
||||||
|
tts = CartesiaTTSService(
|
||||||
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
|
)
|
||||||
|
|
||||||
|
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
context = OpenAILLMContext(messages)
|
||||||
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
|
pipeline = Pipeline(
|
||||||
|
[
|
||||||
|
transport.input(), # Transport user input
|
||||||
|
stt, # STT
|
||||||
|
context_aggregator.user(), # User responses
|
||||||
|
llm, # LLM
|
||||||
|
tts, # TTS
|
||||||
|
transport.output(), # Transport bot output
|
||||||
|
context_aggregator.assistant(), # Assistant spoken responses
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
task = PipelineTask(
|
||||||
|
pipeline,
|
||||||
|
params=PipelineParams(
|
||||||
|
allow_interruptions=True,
|
||||||
|
enable_metrics=True,
|
||||||
|
enable_usage_metrics=True,
|
||||||
|
report_only_initial_ttfb=True,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
@transport.event_handler("on_first_participant_joined")
|
||||||
|
async def on_first_participant_joined(transport, participant):
|
||||||
|
await transport.capture_participant_transcription(participant["id"])
|
||||||
|
# Kick off the conversation.
|
||||||
|
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||||
|
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||||
|
|
||||||
|
# Register an event handler to exit the application when the user leaves.
|
||||||
|
@transport.event_handler("on_participant_left")
|
||||||
|
async def on_participant_left(transport, participant, reason):
|
||||||
|
await task.cancel()
|
||||||
|
|
||||||
|
runner = PipelineRunner()
|
||||||
|
|
||||||
|
await runner.run(task)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
91
examples/foundational/07x-interruptible-local.py
Normal file
91
examples/foundational/07x-interruptible-local.py
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2024–2025, Daily
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: BSD 2-Clause License
|
||||||
|
#
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
|
from pipecat.transports.local.audio import LocalAudioTransport, LocalAudioTransportParams
|
||||||
|
|
||||||
|
load_dotenv(override=True)
|
||||||
|
|
||||||
|
logger.remove(0)
|
||||||
|
logger.add(sys.stderr, level="DEBUG")
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
transport = LocalAudioTransport(
|
||||||
|
LocalAudioTransportParams(
|
||||||
|
audio_in_enabled=True,
|
||||||
|
audio_out_enabled=True,
|
||||||
|
vad_enabled=True,
|
||||||
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
|
vad_audio_passthrough=True,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||||
|
|
||||||
|
tts = CartesiaTTSService(
|
||||||
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
|
)
|
||||||
|
|
||||||
|
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "You are a helpful LLM. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
context = OpenAILLMContext(messages)
|
||||||
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
|
pipeline = Pipeline(
|
||||||
|
[
|
||||||
|
transport.input(), # Transport user input
|
||||||
|
stt,
|
||||||
|
context_aggregator.user(), # User responses
|
||||||
|
llm, # LLM
|
||||||
|
tts, # TTS
|
||||||
|
transport.output(), # Transport bot output
|
||||||
|
context_aggregator.assistant(), # Assistant spoken responses
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
task = PipelineTask(
|
||||||
|
pipeline,
|
||||||
|
params=PipelineParams(
|
||||||
|
allow_interruptions=True,
|
||||||
|
enable_metrics=True,
|
||||||
|
enable_usage_metrics=True,
|
||||||
|
report_only_initial_ttfb=True,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||||
|
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||||
|
|
||||||
|
runner = PipelineRunner()
|
||||||
|
|
||||||
|
await runner.run(task)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
103
examples/foundational/07y-interruptible-groq.py
Normal file
103
examples/foundational/07y-interruptible-groq.py
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2024–2025, Daily
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: BSD 2-Clause License
|
||||||
|
#
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from loguru import logger
|
||||||
|
from runner import configure
|
||||||
|
|
||||||
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
|
from pipecat.services.groq.llm import GroqLLMService
|
||||||
|
from pipecat.services.groq.stt import GroqSTTService
|
||||||
|
from pipecat.services.groq.tts import GroqTTSService
|
||||||
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
|
load_dotenv(override=True)
|
||||||
|
|
||||||
|
logger.remove(0)
|
||||||
|
logger.add(sys.stderr, level="DEBUG")
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
(room_url, token) = await configure(session)
|
||||||
|
|
||||||
|
transport = DailyTransport(
|
||||||
|
room_url,
|
||||||
|
token,
|
||||||
|
"Respond bot",
|
||||||
|
DailyParams(
|
||||||
|
audio_out_enabled=True,
|
||||||
|
# transcription_enabled=True,
|
||||||
|
vad_enabled=True,
|
||||||
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
|
vad_audio_passthrough=True,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
stt = GroqSTTService(api_key=os.getenv("GROQ_API_KEY"))
|
||||||
|
|
||||||
|
llm = GroqLLMService(api_key=os.getenv("GROQ_API_KEY"), model="llama-3.3-70b-versatile")
|
||||||
|
|
||||||
|
tts = GroqTTSService(api_key=os.getenv("GROQ_API_KEY"))
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
context = OpenAILLMContext(messages)
|
||||||
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
|
pipeline = Pipeline(
|
||||||
|
[
|
||||||
|
transport.input(), # Transport user input
|
||||||
|
stt,
|
||||||
|
context_aggregator.user(), # User responses
|
||||||
|
llm, # LLM
|
||||||
|
tts, # TTS
|
||||||
|
transport.output(), # Transport bot output
|
||||||
|
context_aggregator.assistant(), # Assistant spoken responses
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
task = PipelineTask(
|
||||||
|
pipeline,
|
||||||
|
params=PipelineParams(
|
||||||
|
allow_interruptions=True,
|
||||||
|
enable_metrics=True,
|
||||||
|
enable_usage_metrics=True,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
@transport.event_handler("on_first_participant_joined")
|
||||||
|
async def on_first_participant_joined(transport, participant):
|
||||||
|
await transport.capture_participant_transcription(participant["id"])
|
||||||
|
# Kick off the conversation.
|
||||||
|
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||||
|
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||||
|
|
||||||
|
@transport.event_handler("on_participant_left")
|
||||||
|
async def on_participant_left(transport, participant, reason):
|
||||||
|
await task.cancel()
|
||||||
|
|
||||||
|
runner = PipelineRunner()
|
||||||
|
|
||||||
|
await runner.run(task)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
@@ -19,8 +19,8 @@ from pipecat.pipeline.runner import PipelineRunner
|
|||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.filters.wake_check_filter import WakeCheckFilter
|
from pipecat.processors.filters.wake_check_filter import WakeCheckFilter
|
||||||
from pipecat.services.cartesia import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.services.openai import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -47,7 +47,7 @@ async def main():
|
|||||||
|
|
||||||
tts = CartesiaTTSService(
|
tts = CartesiaTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
)
|
)
|
||||||
|
|
||||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||||
|
|||||||
@@ -29,8 +29,8 @@ from pipecat.processors.aggregators.openai_llm_context import (
|
|||||||
)
|
)
|
||||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||||
from pipecat.processors.logger import FrameLogger
|
from pipecat.processors.logger import FrameLogger
|
||||||
from pipecat.services.cartesia import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.services.openai import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -100,7 +100,7 @@ async def main():
|
|||||||
|
|
||||||
tts = CartesiaTTSService(
|
tts = CartesiaTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
)
|
)
|
||||||
|
|
||||||
messages = [
|
messages = [
|
||||||
|
|||||||
@@ -22,8 +22,8 @@ from pipecat.pipeline.task import PipelineTask
|
|||||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
||||||
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||||
from pipecat.services.cartesia import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.services.moondream import MoondreamService
|
from pipecat.services.moondream.vision import MoondreamService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -77,7 +77,7 @@ async def main():
|
|||||||
|
|
||||||
tts = CartesiaTTSService(
|
tts = CartesiaTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
)
|
)
|
||||||
|
|
||||||
@transport.event_handler("on_first_participant_joined")
|
@transport.event_handler("on_first_participant_joined")
|
||||||
|
|||||||
@@ -22,8 +22,8 @@ from pipecat.pipeline.task import PipelineTask
|
|||||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
||||||
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||||
from pipecat.services.cartesia import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.services.google import GoogleLLMService
|
from pipecat.services.google.llm import GoogleLLMService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -77,7 +77,7 @@ async def main():
|
|||||||
|
|
||||||
tts = CartesiaTTSService(
|
tts = CartesiaTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
)
|
)
|
||||||
|
|
||||||
@transport.event_handler("on_first_participant_joined")
|
@transport.event_handler("on_first_participant_joined")
|
||||||
|
|||||||
@@ -22,8 +22,8 @@ from pipecat.pipeline.task import PipelineTask
|
|||||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
||||||
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||||
from pipecat.services.cartesia import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.services.openai import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -76,7 +76,7 @@ async def main():
|
|||||||
|
|
||||||
tts = CartesiaTTSService(
|
tts = CartesiaTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
)
|
)
|
||||||
|
|
||||||
@transport.event_handler("on_first_participant_joined")
|
@transport.event_handler("on_first_participant_joined")
|
||||||
|
|||||||
@@ -22,8 +22,8 @@ from pipecat.pipeline.task import PipelineTask
|
|||||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
||||||
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||||
from pipecat.services.anthropic import AnthropicLLMService
|
from pipecat.services.anthropic.llm import AnthropicLLMService
|
||||||
from pipecat.services.cartesia import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -76,7 +76,7 @@ async def main():
|
|||||||
|
|
||||||
tts = CartesiaTTSService(
|
tts = CartesiaTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
)
|
)
|
||||||
|
|
||||||
@transport.event_handler("on_first_participant_joined")
|
@transport.event_handler("on_first_participant_joined")
|
||||||
|
|||||||
@@ -12,12 +12,13 @@ from dotenv import load_dotenv
|
|||||||
from loguru import logger
|
from loguru import logger
|
||||||
from runner import configure
|
from runner import configure
|
||||||
|
|
||||||
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.frames.frames import Frame, TranscriptionFrame
|
from pipecat.frames.frames import Frame, TranscriptionFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineTask
|
from pipecat.pipeline.task import PipelineTask
|
||||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||||
from pipecat.services.whisper import WhisperSTTService
|
from pipecat.services.whisper.stt import WhisperSTTService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -39,7 +40,15 @@ async def main():
|
|||||||
(room_url, _) = await configure(session)
|
(room_url, _) = await configure(session)
|
||||||
|
|
||||||
transport = DailyTransport(
|
transport = DailyTransport(
|
||||||
room_url, None, "Transcription bot", DailyParams(audio_in_enabled=True)
|
room_url,
|
||||||
|
None,
|
||||||
|
"Transcription bot",
|
||||||
|
DailyParams(
|
||||||
|
audio_in_enabled=True,
|
||||||
|
vad_enabled=True,
|
||||||
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
|
vad_audio_passthrough=True,
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
stt = WhisperSTTService()
|
stt = WhisperSTTService()
|
||||||
|
|||||||
@@ -10,12 +10,13 @@ import sys
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.frames.frames import Frame, TranscriptionFrame
|
from pipecat.frames.frames import Frame, TranscriptionFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineTask
|
from pipecat.pipeline.task import PipelineTask
|
||||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||||
from pipecat.services.whisper import WhisperSTTService
|
from pipecat.services.whisper.stt import WhisperSTTService
|
||||||
from pipecat.transports.local.audio import LocalAudioTransport, LocalAudioTransportParams
|
from pipecat.transports.local.audio import LocalAudioTransport, LocalAudioTransportParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -33,7 +34,14 @@ class TranscriptionLogger(FrameProcessor):
|
|||||||
|
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
transport = LocalAudioTransport(LocalAudioTransportParams(audio_in_enabled=True))
|
transport = LocalAudioTransport(
|
||||||
|
LocalAudioTransportParams(
|
||||||
|
audio_in_enabled=True,
|
||||||
|
vad_enabled=True,
|
||||||
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
|
vad_audio_passthrough=True,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
stt = WhisperSTTService()
|
stt = WhisperSTTService()
|
||||||
|
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ from pipecat.pipeline.pipeline import Pipeline
|
|||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineTask
|
from pipecat.pipeline.task import PipelineTask
|
||||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||||
from pipecat.services.deepgram import DeepgramSTTService, Language, LiveOptions
|
from pipecat.services.deepgram.stt import DeepgramSTTService, Language, LiveOptions
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -45,7 +45,7 @@ async def main():
|
|||||||
|
|
||||||
stt = DeepgramSTTService(
|
stt = DeepgramSTTService(
|
||||||
api_key=os.getenv("DEEPGRAM_API_KEY"),
|
api_key=os.getenv("DEEPGRAM_API_KEY"),
|
||||||
# live_options=LiveOptions(language=Language.FR),
|
# live_options=LiveOptions(language=Language.FR),
|
||||||
)
|
)
|
||||||
|
|
||||||
tl = TranscriptionLogger()
|
tl = TranscriptionLogger()
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ from pipecat.pipeline.pipeline import Pipeline
|
|||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineTask
|
from pipecat.pipeline.task import PipelineTask
|
||||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||||
from pipecat.services.assemblyai import AssemblyAISTTService
|
from pipecat.services.assemblyai.stt import AssemblyAISTTService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|||||||
95
examples/foundational/13e-whisper-mlx.py
Normal file
95
examples/foundational/13e-whisper-mlx.py
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2024–2025, Daily
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: BSD 2-Clause License
|
||||||
|
#
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from loguru import logger
|
||||||
|
from runner import configure
|
||||||
|
|
||||||
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
|
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||||
|
from pipecat.frames.frames import Frame, TranscriptionFrame, UserStoppedSpeakingFrame
|
||||||
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||||
|
from pipecat.services.whisper.stt import MLXModel, WhisperSTTServiceMLX
|
||||||
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
|
load_dotenv(override=True)
|
||||||
|
|
||||||
|
logger.remove(0)
|
||||||
|
logger.add(sys.stderr, level="DEBUG")
|
||||||
|
|
||||||
|
|
||||||
|
STOP_SECS = 2.0
|
||||||
|
|
||||||
|
|
||||||
|
class TranscriptionLogger(FrameProcessor):
|
||||||
|
"""Measures transcription latency.
|
||||||
|
|
||||||
|
Uses the (intentionally) long STOP_SECS parameter to give the transcription time to finish,
|
||||||
|
then outputs the timing between when the VAD first classified audio input as not-speech and
|
||||||
|
the delivery of the last transcription frame.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self._last_transcription_time = time.time()
|
||||||
|
|
||||||
|
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||||
|
await super().process_frame(frame, direction)
|
||||||
|
|
||||||
|
if isinstance(frame, UserStoppedSpeakingFrame):
|
||||||
|
logger.debug(
|
||||||
|
f"Transcription latency: {(STOP_SECS - (time.time() - self._last_transcription_time)):.2f}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if isinstance(frame, TranscriptionFrame):
|
||||||
|
self._last_transcription_time = time.time()
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
(room_url, _) = await configure(session)
|
||||||
|
|
||||||
|
transport = DailyTransport(
|
||||||
|
room_url,
|
||||||
|
None,
|
||||||
|
"Transcription bot",
|
||||||
|
DailyParams(
|
||||||
|
audio_in_enabled=True,
|
||||||
|
vad_enabled=True,
|
||||||
|
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=STOP_SECS)),
|
||||||
|
vad_audio_passthrough=True,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
stt = WhisperSTTServiceMLX(model=MLXModel.LARGE_V3_TURBO)
|
||||||
|
|
||||||
|
tl = TranscriptionLogger()
|
||||||
|
|
||||||
|
pipeline = Pipeline([transport.input(), stt, tl])
|
||||||
|
|
||||||
|
task = PipelineTask(
|
||||||
|
pipeline,
|
||||||
|
params=PipelineParams(
|
||||||
|
enable_metrics=True,
|
||||||
|
report_only_initial_ttfb=False,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
runner = PipelineRunner()
|
||||||
|
|
||||||
|
await runner.run(task)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
@@ -11,16 +11,18 @@ import sys
|
|||||||
import aiohttp
|
import aiohttp
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from openai.types.chat import ChatCompletionToolParam
|
|
||||||
from runner import configure
|
from runner import configure
|
||||||
|
|
||||||
|
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||||
|
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.frames.frames import TTSSpeakFrame
|
from pipecat.frames.frames import TTSSpeakFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.services.cartesia import CartesiaTTSService
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.services.openai import OpenAILLMContext, OpenAILLMService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -29,13 +31,8 @@ logger.remove(0)
|
|||||||
logger.add(sys.stderr, level="DEBUG")
|
logger.add(sys.stderr, level="DEBUG")
|
||||||
|
|
||||||
|
|
||||||
async def start_fetch_weather(function_name, llm, context):
|
|
||||||
"""Push a frame to the LLM; this is handy when the LLM response might take a while."""
|
|
||||||
await llm.push_frame(TTSSpeakFrame("Let me check on that."))
|
|
||||||
logger.debug(f"Starting fetch_weather_from_api with function_name: {function_name}")
|
|
||||||
|
|
||||||
|
|
||||||
async def fetch_weather_from_api(function_name, tool_call_id, args, llm, context, result_callback):
|
async def fetch_weather_from_api(function_name, tool_call_id, args, llm, context, result_callback):
|
||||||
|
await llm.push_frame(TTSSpeakFrame("Let me check on that."))
|
||||||
await result_callback({"conditions": "nice", "temperature": "75"})
|
await result_callback({"conditions": "nice", "temperature": "75"})
|
||||||
|
|
||||||
|
|
||||||
@@ -57,38 +54,33 @@ async def main():
|
|||||||
|
|
||||||
tts = CartesiaTTSService(
|
tts = CartesiaTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
)
|
)
|
||||||
|
|
||||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||||
# Register a function_name of None to get all functions
|
|
||||||
# sent to the same callback with an additional function_name parameter.
|
|
||||||
llm.register_function(None, fetch_weather_from_api, start_callback=start_fetch_weather)
|
|
||||||
|
|
||||||
tools = [
|
# You can also register a function_name of None to get all functions
|
||||||
ChatCompletionToolParam(
|
# sent to the same callback with an additional function_name parameter.
|
||||||
type="function",
|
llm.register_function("get_current_weather", fetch_weather_from_api)
|
||||||
function={
|
|
||||||
"name": "get_current_weather",
|
weather_function = FunctionSchema(
|
||||||
"description": "Get the current weather",
|
name="get_current_weather",
|
||||||
"parameters": {
|
description="Get the current weather",
|
||||||
"type": "object",
|
properties={
|
||||||
"properties": {
|
"location": {
|
||||||
"location": {
|
"type": "string",
|
||||||
"type": "string",
|
"description": "The city and state, e.g. San Francisco, CA",
|
||||||
"description": "The city and state, e.g. San Francisco, CA",
|
|
||||||
},
|
|
||||||
"format": {
|
|
||||||
"type": "string",
|
|
||||||
"enum": ["celsius", "fahrenheit"],
|
|
||||||
"description": "The temperature unit to use. Infer this from the users location.",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"required": ["location", "format"],
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
)
|
"format": {
|
||||||
]
|
"type": "string",
|
||||||
|
"enum": ["celsius", "fahrenheit"],
|
||||||
|
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
required=["location", "format"],
|
||||||
|
)
|
||||||
|
tools = ToolsSchema(standard_tools=[weather_function])
|
||||||
|
|
||||||
messages = [
|
messages = [
|
||||||
{
|
{
|
||||||
"role": "system",
|
"role": "system",
|
||||||
|
|||||||
@@ -13,13 +13,15 @@ from dotenv import load_dotenv
|
|||||||
from loguru import logger
|
from loguru import logger
|
||||||
from runner import configure
|
from runner import configure
|
||||||
|
|
||||||
|
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||||
|
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.services.anthropic import AnthropicLLMService
|
from pipecat.services.anthropic.llm import AnthropicLLMService
|
||||||
from pipecat.services.cartesia import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -51,30 +53,26 @@ async def main():
|
|||||||
|
|
||||||
tts = CartesiaTTSService(
|
tts = CartesiaTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
)
|
)
|
||||||
|
|
||||||
llm = AnthropicLLMService(
|
llm = AnthropicLLMService(
|
||||||
api_key=os.getenv("ANTHROPIC_API_KEY"), model="claude-3-5-sonnet-20240620"
|
api_key=os.getenv("ANTHROPIC_API_KEY"), model="claude-3-7-sonnet-latest"
|
||||||
)
|
)
|
||||||
llm.register_function("get_weather", get_weather)
|
llm.register_function("get_weather", get_weather)
|
||||||
|
|
||||||
tools = [
|
weather_function = FunctionSchema(
|
||||||
{
|
name="get_weather",
|
||||||
"name": "get_weather",
|
description="Get the current weather",
|
||||||
"description": "Get the current weather in a given location",
|
properties={
|
||||||
"input_schema": {
|
"location": {
|
||||||
"type": "object",
|
"type": "string",
|
||||||
"properties": {
|
"description": "The city and state, e.g. San Francisco, CA",
|
||||||
"location": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "The city and state, e.g. San Francisco, CA",
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": ["location"],
|
|
||||||
},
|
},
|
||||||
}
|
},
|
||||||
]
|
required=["location"],
|
||||||
|
)
|
||||||
|
tools = ToolsSchema(standard_tools=[weather_function])
|
||||||
|
|
||||||
# todo: test with very short initial user message
|
# todo: test with very short initial user message
|
||||||
|
|
||||||
|
|||||||
@@ -13,13 +13,15 @@ from dotenv import load_dotenv
|
|||||||
from loguru import logger
|
from loguru import logger
|
||||||
from runner import configure
|
from runner import configure
|
||||||
|
|
||||||
|
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||||
|
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.services.anthropic import AnthropicLLMService
|
from pipecat.services.anthropic.llm import AnthropicLLMService
|
||||||
from pipecat.services.cartesia import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -37,7 +39,12 @@ async def get_weather(function_name, tool_call_id, arguments, llm, context, resu
|
|||||||
|
|
||||||
async def get_image(function_name, tool_call_id, arguments, llm, context, result_callback):
|
async def get_image(function_name, tool_call_id, arguments, llm, context, result_callback):
|
||||||
question = arguments["question"]
|
question = arguments["question"]
|
||||||
await llm.request_image_frame(user_id=video_participant_id, text_content=question)
|
await llm.request_image_frame(
|
||||||
|
user_id=video_participant_id,
|
||||||
|
function_name=function_name,
|
||||||
|
tool_call_id=tool_call_id,
|
||||||
|
text_content=question,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
@@ -60,48 +67,40 @@ async def main():
|
|||||||
|
|
||||||
tts = CartesiaTTSService(
|
tts = CartesiaTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
)
|
)
|
||||||
|
|
||||||
llm = AnthropicLLMService(
|
llm = AnthropicLLMService(
|
||||||
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
||||||
# model="claude-3-5-sonnet-20240620",
|
model="claude-3-7-sonnet-latest",
|
||||||
model="claude-3-5-sonnet-latest",
|
|
||||||
enable_prompt_caching_beta=True,
|
enable_prompt_caching_beta=True,
|
||||||
)
|
)
|
||||||
llm.register_function("get_weather", get_weather)
|
llm.register_function("get_weather", get_weather)
|
||||||
llm.register_function("get_image", get_image)
|
llm.register_function("get_image", get_image)
|
||||||
|
|
||||||
tools = [
|
weather_function = FunctionSchema(
|
||||||
{
|
name="get_weather",
|
||||||
"name": "get_weather",
|
description="Get the current weather",
|
||||||
"description": "Get the current weather in a given location",
|
properties={
|
||||||
"input_schema": {
|
"location": {
|
||||||
"type": "object",
|
"type": "string",
|
||||||
"properties": {
|
"description": "The city and state, e.g. San Francisco, CA",
|
||||||
"location": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "The city and state, e.g. San Francisco, CA",
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": ["location"],
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
required=["location"],
|
||||||
"name": "get_image",
|
)
|
||||||
"description": "Get an image from the video stream.",
|
get_image_function = FunctionSchema(
|
||||||
"input_schema": {
|
name="get_image",
|
||||||
"type": "object",
|
description="Get an image from the video stream.",
|
||||||
"properties": {
|
properties={
|
||||||
"question": {
|
"question": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The question that the user is asking about the image.",
|
"description": "The question that the user is asking about the image.",
|
||||||
}
|
}
|
||||||
},
|
|
||||||
"required": ["question"],
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
]
|
required=["question"],
|
||||||
|
)
|
||||||
|
tools = ToolsSchema(standard_tools=[weather_function, get_image_function])
|
||||||
|
|
||||||
# todo: test with very short initial user message
|
# todo: test with very short initial user message
|
||||||
|
|
||||||
|
|||||||
@@ -11,17 +11,18 @@ import sys
|
|||||||
import aiohttp
|
import aiohttp
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from openai.types.chat import ChatCompletionToolParam
|
|
||||||
from runner import configure
|
from runner import configure
|
||||||
|
|
||||||
|
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||||
|
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.frames.frames import TTSSpeakFrame
|
from pipecat.frames.frames import TTSSpeakFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineTask
|
from pipecat.pipeline.task import PipelineTask
|
||||||
from pipecat.services.cartesia import CartesiaTTSService
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.services.openai import OpenAILLMContext
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.services.together import TogetherLLMService
|
from pipecat.services.together.llm import TogetherLLMService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -30,13 +31,8 @@ logger.remove(0)
|
|||||||
logger.add(sys.stderr, level="DEBUG")
|
logger.add(sys.stderr, level="DEBUG")
|
||||||
|
|
||||||
|
|
||||||
async def start_fetch_weather(function_name, llm, context):
|
|
||||||
"""Push a frame to the LLM; this is handy when the LLM response might take a while."""
|
|
||||||
await llm.push_frame(TTSSpeakFrame("Let me check on that."))
|
|
||||||
logger.debug(f"Starting fetch_weather_from_api with function_name: {function_name}")
|
|
||||||
|
|
||||||
|
|
||||||
async def fetch_weather_from_api(function_name, tool_call_id, args, llm, context, result_callback):
|
async def fetch_weather_from_api(function_name, tool_call_id, args, llm, context, result_callback):
|
||||||
|
await llm.push_frame(TTSSpeakFrame("Let me check on that."))
|
||||||
await result_callback({"conditions": "nice", "temperature": "75"})
|
await result_callback({"conditions": "nice", "temperature": "75"})
|
||||||
|
|
||||||
|
|
||||||
@@ -58,41 +54,34 @@ async def main():
|
|||||||
|
|
||||||
tts = CartesiaTTSService(
|
tts = CartesiaTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
)
|
)
|
||||||
|
|
||||||
llm = TogetherLLMService(
|
llm = TogetherLLMService(
|
||||||
api_key=os.getenv("TOGETHER_API_KEY"),
|
api_key=os.getenv("TOGETHER_API_KEY"),
|
||||||
model="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
|
model="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
|
||||||
)
|
)
|
||||||
# Register a function_name of None to get all functions
|
# You can also register a function_name of None to get all functions
|
||||||
# sent to the same callback with an additional function_name parameter.
|
# sent to the same callback with an additional function_name parameter.
|
||||||
llm.register_function(None, fetch_weather_from_api, start_callback=start_fetch_weather)
|
llm.register_function("get_current_weather", fetch_weather_from_api)
|
||||||
|
|
||||||
tools = [
|
weather_function = FunctionSchema(
|
||||||
ChatCompletionToolParam(
|
name="get_current_weather",
|
||||||
type="function",
|
description="Get the current weather",
|
||||||
function={
|
properties={
|
||||||
"name": "get_current_weather",
|
"location": {
|
||||||
"description": "Get the current weather",
|
"type": "string",
|
||||||
"parameters": {
|
"description": "The city and state, e.g. San Francisco, CA",
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"location": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "The city and state, e.g. San Francisco, CA",
|
|
||||||
},
|
|
||||||
"format": {
|
|
||||||
"type": "string",
|
|
||||||
"enum": ["celsius", "fahrenheit"],
|
|
||||||
"description": "The temperature unit to use. Infer this from the users location.",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"required": ["location", "format"],
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
)
|
"format": {
|
||||||
]
|
"type": "string",
|
||||||
|
"enum": ["celsius", "fahrenheit"],
|
||||||
|
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
required=["location", "format"],
|
||||||
|
)
|
||||||
|
tools = ToolsSchema(standard_tools=[weather_function])
|
||||||
messages = [
|
messages = [
|
||||||
{
|
{
|
||||||
"role": "system",
|
"role": "system",
|
||||||
|
|||||||
@@ -11,15 +11,17 @@ import sys
|
|||||||
import aiohttp
|
import aiohttp
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from openai.types.chat import ChatCompletionToolParam
|
|
||||||
from runner import configure
|
from runner import configure
|
||||||
|
|
||||||
|
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||||
|
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineTask
|
from pipecat.pipeline.task import PipelineTask
|
||||||
from pipecat.services.cartesia import CartesiaTTSService
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.services.openai import OpenAILLMContext, OpenAILLMService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -38,7 +40,12 @@ async def get_weather(function_name, tool_call_id, arguments, llm, context, resu
|
|||||||
async def get_image(function_name, tool_call_id, arguments, llm, context, result_callback):
|
async def get_image(function_name, tool_call_id, arguments, llm, context, result_callback):
|
||||||
logger.debug(f"!!! IN get_image {video_participant_id}, {arguments}")
|
logger.debug(f"!!! IN get_image {video_participant_id}, {arguments}")
|
||||||
question = arguments["question"]
|
question = arguments["question"]
|
||||||
await llm.request_image_frame(user_id=video_participant_id, text_content=question)
|
await llm.request_image_frame(
|
||||||
|
user_id=video_participant_id,
|
||||||
|
function_name=function_name,
|
||||||
|
tool_call_id=tool_call_id,
|
||||||
|
text_content=question,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
@@ -59,54 +66,41 @@ async def main():
|
|||||||
|
|
||||||
tts = CartesiaTTSService(
|
tts = CartesiaTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
)
|
)
|
||||||
|
|
||||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||||
llm.register_function("get_weather", get_weather)
|
llm.register_function("get_weather", get_weather)
|
||||||
llm.register_function("get_image", get_image)
|
llm.register_function("get_image", get_image)
|
||||||
|
|
||||||
tools = [
|
weather_function = FunctionSchema(
|
||||||
ChatCompletionToolParam(
|
name="get_weather",
|
||||||
type="function",
|
description="Get the current weather",
|
||||||
function={
|
properties={
|
||||||
"name": "get_weather",
|
"location": {
|
||||||
"description": "Get the current weather",
|
"type": "string",
|
||||||
"parameters": {
|
"description": "The city and state, e.g. San Francisco, CA",
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"location": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "The city and state, e.g. San Francisco, CA",
|
|
||||||
},
|
|
||||||
"format": {
|
|
||||||
"type": "string",
|
|
||||||
"enum": ["celsius", "fahrenheit"],
|
|
||||||
"description": "The temperature unit to use. Infer this from the users location.",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"required": ["location", "format"],
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
),
|
"format": {
|
||||||
ChatCompletionToolParam(
|
"type": "string",
|
||||||
type="function",
|
"enum": ["celsius", "fahrenheit"],
|
||||||
function={
|
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||||
"name": "get_image",
|
|
||||||
"description": "Get an image from the video stream.",
|
|
||||||
"parameters": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"question": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "The question to ask the AI to generate an image of",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"required": ["question"],
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
),
|
},
|
||||||
]
|
required=["location"],
|
||||||
|
)
|
||||||
|
get_image_function = FunctionSchema(
|
||||||
|
name="get_image",
|
||||||
|
description="Get an image from the video stream.",
|
||||||
|
properties={
|
||||||
|
"question": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The question that the user is asking about the image.",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
required=["question"],
|
||||||
|
)
|
||||||
|
tools = ToolsSchema(standard_tools=[weather_function, get_image_function])
|
||||||
|
|
||||||
system_prompt = """\
|
system_prompt = """\
|
||||||
You are a helpful assistant who converses with a user and answers questions. Respond concisely to general questions.
|
You are a helpful assistant who converses with a user and answers questions. Respond concisely to general questions.
|
||||||
@@ -153,7 +147,7 @@ indicate you should use the get_image tool are:
|
|||||||
await transport.capture_participant_transcription(participant["id"])
|
await transport.capture_participant_transcription(participant["id"])
|
||||||
await transport.capture_participant_video(video_participant_id, framerate=0)
|
await transport.capture_participant_video(video_participant_id, framerate=0)
|
||||||
# Kick off the conversation.
|
# Kick off the conversation.
|
||||||
await tts.say("Hi! Ask me about the weather in San Francisco.")
|
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||||
|
|
||||||
runner = PipelineRunner()
|
runner = PipelineRunner()
|
||||||
|
|
||||||
|
|||||||
@@ -13,14 +13,16 @@ from dotenv import load_dotenv
|
|||||||
from loguru import logger
|
from loguru import logger
|
||||||
from runner import configure
|
from runner import configure
|
||||||
|
|
||||||
|
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||||
|
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.frames.frames import TTSSpeakFrame
|
from pipecat.frames.frames import TTSSpeakFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.services.cartesia import CartesiaTTSService
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.services.google import GoogleLLMService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.services.openai import OpenAILLMContext
|
from pipecat.services.google.llm import GoogleLLMService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -31,13 +33,8 @@ logger.add(sys.stderr, level="DEBUG")
|
|||||||
video_participant_id = None
|
video_participant_id = None
|
||||||
|
|
||||||
|
|
||||||
async def start_fetch_weather(function_name, llm, context):
|
|
||||||
"""Push a frame to the LLM; this is handy when the LLM response might take a while."""
|
|
||||||
await llm.push_frame(TTSSpeakFrame("Let me check on that."))
|
|
||||||
logger.debug(f"Starting fetch_weather_from_api with function_name: {function_name}")
|
|
||||||
|
|
||||||
|
|
||||||
async def get_weather(function_name, tool_call_id, arguments, llm, context, result_callback):
|
async def get_weather(function_name, tool_call_id, arguments, llm, context, result_callback):
|
||||||
|
await llm.push_frame(TTSSpeakFrame("Let me check on that."))
|
||||||
location = arguments["location"]
|
location = arguments["location"]
|
||||||
await result_callback(f"The weather in {location} is currently 72 degrees and sunny.")
|
await result_callback(f"The weather in {location} is currently 72 degrees and sunny.")
|
||||||
|
|
||||||
@@ -45,7 +42,12 @@ async def get_weather(function_name, tool_call_id, arguments, llm, context, resu
|
|||||||
async def get_image(function_name, tool_call_id, arguments, llm, context, result_callback):
|
async def get_image(function_name, tool_call_id, arguments, llm, context, result_callback):
|
||||||
logger.debug(f"!!! IN get_image {video_participant_id}, {arguments}")
|
logger.debug(f"!!! IN get_image {video_participant_id}, {arguments}")
|
||||||
question = arguments["question"]
|
question = arguments["question"]
|
||||||
await llm.request_image_frame(user_id=video_participant_id, text_content=question)
|
await llm.request_image_frame(
|
||||||
|
user_id=video_participant_id,
|
||||||
|
function_name=function_name,
|
||||||
|
tool_call_id=tool_call_id,
|
||||||
|
text_content=question,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
@@ -66,52 +68,41 @@ async def main():
|
|||||||
|
|
||||||
tts = CartesiaTTSService(
|
tts = CartesiaTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
)
|
)
|
||||||
|
|
||||||
llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"), model="gemini-2.0-flash-001")
|
llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"), model="gemini-2.0-flash-001")
|
||||||
llm.register_function("get_weather", get_weather, start_fetch_weather)
|
llm.register_function("get_weather", get_weather)
|
||||||
llm.register_function("get_image", get_image)
|
llm.register_function("get_image", get_image)
|
||||||
|
|
||||||
tools = [
|
weather_function = FunctionSchema(
|
||||||
{
|
name="get_weather",
|
||||||
"function_declarations": [
|
description="Get the current weather",
|
||||||
{
|
properties={
|
||||||
"name": "get_weather",
|
"location": {
|
||||||
"description": "Get the current weather",
|
"type": "string",
|
||||||
"parameters": {
|
"description": "The city and state, e.g. San Francisco, CA",
|
||||||
"type": "object",
|
},
|
||||||
"properties": {
|
"format": {
|
||||||
"location": {
|
"type": "string",
|
||||||
"type": "string",
|
"enum": ["celsius", "fahrenheit"],
|
||||||
"description": "The city and state, e.g. San Francisco, CA",
|
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||||
},
|
},
|
||||||
"format": {
|
},
|
||||||
"type": "string",
|
required=["location", "format"],
|
||||||
"enum": ["celsius", "fahrenheit"],
|
)
|
||||||
"description": "The temperature unit to use. Infer this from the users location.",
|
get_image_function = FunctionSchema(
|
||||||
},
|
name="get_image",
|
||||||
},
|
description="Get an image from the video stream.",
|
||||||
"required": ["location", "format"],
|
properties={
|
||||||
},
|
"question": {
|
||||||
},
|
"type": "string",
|
||||||
{
|
"description": "The question that the user is asking about the image.",
|
||||||
"name": "get_image",
|
}
|
||||||
"description": "Get and image from the camera or video stream.",
|
},
|
||||||
"parameters": {
|
required=["question"],
|
||||||
"type": "object",
|
)
|
||||||
"properties": {
|
tools = ToolsSchema(standard_tools=[weather_function, get_image_function])
|
||||||
"question": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "The question to to use when running inference on the acquired image.",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"required": ["question"],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
]
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
system_prompt = """\
|
system_prompt = """\
|
||||||
You are a helpful assistant who converses with a user and answers questions. Respond concisely to general questions.
|
You are a helpful assistant who converses with a user and answers questions. Respond concisely to general questions.
|
||||||
|
|||||||
@@ -11,17 +11,19 @@ import sys
|
|||||||
import aiohttp
|
import aiohttp
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from openai.types.chat import ChatCompletionToolParam
|
|
||||||
from runner import configure
|
from runner import configure
|
||||||
|
|
||||||
|
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||||
|
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.frames.frames import TTSSpeakFrame
|
from pipecat.frames.frames import TTSSpeakFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.services.cartesia import CartesiaTTSService
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.services.groq import GroqLLMService, GroqSTTService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.services.openai import OpenAILLMContext
|
from pipecat.services.groq.llm import GroqLLMService
|
||||||
|
from pipecat.services.groq.stt import GroqSTTService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -30,13 +32,8 @@ logger.remove(0)
|
|||||||
logger.add(sys.stderr, level="DEBUG")
|
logger.add(sys.stderr, level="DEBUG")
|
||||||
|
|
||||||
|
|
||||||
async def start_fetch_weather(function_name, llm, context):
|
|
||||||
"""Push a frame to the LLM; this is handy when the LLM response might take a while."""
|
|
||||||
await llm.push_frame(TTSSpeakFrame("Let me check on that."))
|
|
||||||
logger.debug(f"Starting fetch_weather_from_api with function_name: {function_name}")
|
|
||||||
|
|
||||||
|
|
||||||
async def fetch_weather_from_api(function_name, tool_call_id, args, llm, context, result_callback):
|
async def fetch_weather_from_api(function_name, tool_call_id, args, llm, context, result_callback):
|
||||||
|
await llm.push_frame(TTSSpeakFrame("Let me check on that."))
|
||||||
await result_callback({"conditions": "nice", "temperature": "75"})
|
await result_callback({"conditions": "nice", "temperature": "75"})
|
||||||
|
|
||||||
|
|
||||||
@@ -60,38 +57,31 @@ async def main():
|
|||||||
|
|
||||||
tts = CartesiaTTSService(
|
tts = CartesiaTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
)
|
)
|
||||||
|
|
||||||
llm = GroqLLMService(api_key=os.getenv("GROQ_API_KEY"), model="llama-3.3-70b-versatile")
|
llm = GroqLLMService(api_key=os.getenv("GROQ_API_KEY"), model="llama-3.3-70b-versatile")
|
||||||
# Register a function_name of None to get all functions
|
# You can also register a function_name of None to get all functions
|
||||||
# sent to the same callback with an additional function_name parameter.
|
# sent to the same callback with an additional function_name parameter.
|
||||||
llm.register_function(None, fetch_weather_from_api, start_callback=start_fetch_weather)
|
llm.register_function("get_current_weather", fetch_weather_from_api)
|
||||||
|
|
||||||
tools = [
|
weather_function = FunctionSchema(
|
||||||
ChatCompletionToolParam(
|
name="get_current_weather",
|
||||||
type="function",
|
description="Get the current weather",
|
||||||
function={
|
properties={
|
||||||
"name": "get_current_weather",
|
"location": {
|
||||||
"description": "Get the current weather",
|
"type": "string",
|
||||||
"parameters": {
|
"description": "The city and state, e.g. San Francisco, CA",
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"location": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "The city and state, e.g. San Francisco, CA",
|
|
||||||
},
|
|
||||||
"unit": {
|
|
||||||
"type": "string",
|
|
||||||
"enum": ["celsius", "fahrenheit"],
|
|
||||||
"description": "The temperature unit to use. Infer this from the users location.",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"required": ["location"],
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
)
|
"format": {
|
||||||
]
|
"type": "string",
|
||||||
|
"enum": ["celsius", "fahrenheit"],
|
||||||
|
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
required=["location"],
|
||||||
|
)
|
||||||
|
tools = ToolsSchema(standard_tools=[weather_function])
|
||||||
messages = [
|
messages = [
|
||||||
{
|
{
|
||||||
"role": "system",
|
"role": "system",
|
||||||
|
|||||||
@@ -11,17 +11,17 @@ import sys
|
|||||||
import aiohttp
|
import aiohttp
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from openai.types.chat import ChatCompletionToolParam
|
|
||||||
from runner import configure
|
from runner import configure
|
||||||
|
|
||||||
|
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||||
|
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.frames.frames import TTSSpeakFrame
|
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.services.cartesia import CartesiaTTSService
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.services.grok import GrokLLMService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.services.openai import OpenAILLMContext
|
from pipecat.services.grok.llm import GrokLLMService
|
||||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
@@ -30,12 +30,6 @@ logger.remove(0)
|
|||||||
logger.add(sys.stderr, level="DEBUG")
|
logger.add(sys.stderr, level="DEBUG")
|
||||||
|
|
||||||
|
|
||||||
async def start_fetch_weather(function_name, llm, context):
|
|
||||||
"""Push a frame to the LLM; this is handy when the LLM response might take a while."""
|
|
||||||
await llm.push_frame(TTSSpeakFrame("Let me check on that."))
|
|
||||||
logger.debug(f"Starting fetch_weather_from_api with function_name: {function_name}")
|
|
||||||
|
|
||||||
|
|
||||||
async def fetch_weather_from_api(function_name, tool_call_id, args, llm, context, result_callback):
|
async def fetch_weather_from_api(function_name, tool_call_id, args, llm, context, result_callback):
|
||||||
await result_callback({"conditions": "nice", "temperature": "75"})
|
await result_callback({"conditions": "nice", "temperature": "75"})
|
||||||
|
|
||||||
@@ -58,38 +52,31 @@ async def main():
|
|||||||
|
|
||||||
tts = CartesiaTTSService(
|
tts = CartesiaTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
|
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||||
)
|
)
|
||||||
|
|
||||||
llm = GrokLLMService(api_key=os.getenv("GROK_API_KEY"))
|
llm = GrokLLMService(api_key=os.getenv("GROK_API_KEY"))
|
||||||
# Register a function_name of None to get all functions
|
# You can also register a function_name of None to get all functions
|
||||||
# sent to the same callback with an additional function_name parameter.
|
# sent to the same callback with an additional function_name parameter.
|
||||||
llm.register_function(None, fetch_weather_from_api, start_callback=start_fetch_weather)
|
llm.register_function("get_current_weather", fetch_weather_from_api)
|
||||||
|
|
||||||
tools = [
|
weather_function = FunctionSchema(
|
||||||
ChatCompletionToolParam(
|
name="get_current_weather",
|
||||||
type="function",
|
description="Get the current weather",
|
||||||
function={
|
properties={
|
||||||
"name": "get_current_weather",
|
"location": {
|
||||||
"description": "Get the current weather",
|
"type": "string",
|
||||||
"parameters": {
|
"description": "The city and state, e.g. San Francisco, CA",
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"location": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "The city and state, e.g. San Francisco, CA",
|
|
||||||
},
|
|
||||||
"format": {
|
|
||||||
"type": "string",
|
|
||||||
"enum": ["celsius", "fahrenheit"],
|
|
||||||
"description": "The temperature unit to use. Infer this from the users location.",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"required": ["location", "format"],
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
)
|
"format": {
|
||||||
]
|
"type": "string",
|
||||||
|
"enum": ["celsius", "fahrenheit"],
|
||||||
|
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
required=["location", "format"],
|
||||||
|
)
|
||||||
|
tools = ToolsSchema(standard_tools=[weather_function])
|
||||||
messages = [
|
messages = [
|
||||||
{
|
{
|
||||||
"role": "system",
|
"role": "system",
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user